Files
PythonProject/3.html
lingxiao865 f5c4158fc1 first commit
2025-09-18 17:50:03 +08:00

1136 lines
47 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html>
<html lang="zh">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>SenseVoice + VAD 实时识别</title>
<style>
body {
font-family: Arial, sans-serif;
padding: 20px;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
margin: 0;
color: white;
}
.container {
max-width: 1000px;
margin: 0 auto;
background: rgba(255, 255, 255, 0.1);
padding: 20px;
border-radius: 15px;
backdrop-filter: blur(10px);
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.2);
}
h1 {
text-align: center;
margin-bottom: 20px;
font-size: 2.5em;
text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.3);
}
/* Tabs 样式 */
.tabs {
display: flex;
flex-direction: column;
width: 100%;
}
.tab-buttons {
display: flex;
border-bottom: 2px solid rgba(255, 255, 255, 0.2);
margin-bottom: 20px;
}
.tab-button {
padding: 15px 30px;
background: rgba(255, 255, 255, 0.1);
border: none;
outline: none;
cursor: pointer;
transition: all 0.3s ease;
font-size: 16px;
font-weight: bold;
color: rgba(255, 255, 255, 0.7);
border-radius: 10px 10px 0 0;
}
.tab-button:hover {
background: rgba(255, 255, 255, 0.2);
}
.tab-button.active {
background: rgba(255, 255, 255, 0.3);
color: white;
}
.tab-content {
display: none;
padding: 20px;
background: rgba(255, 255, 255, 0.05);
border-radius: 0 10px 10px 10px;
}
.tab-content.active {
display: block;
animation: fadeIn 0.5s;
}
@keyframes fadeIn {
from {
opacity: 0;
}
to {
opacity: 1;
}
}
.controls {
display: flex;
justify-content: center;
gap: 20px;
margin-bottom: 20px;
flex-wrap: wrap;
}
button {
padding: 15px 30px;
font-size: 18px;
border: none;
border-radius: 50px;
cursor: pointer;
transition: all 0.3s ease;
font-weight: bold;
box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2);
}
#startBtn {
background: linear-gradient(45deg, #4CAF50, #8BC34A);
color: white;
}
#stopBtn {
background: linear-gradient(45deg, #f44336, #e91e63);
color: white;
}
#manualBtn {
background: linear-gradient(45deg, #2196F3, #03A9F4);
color: white;
}
#sendManualBtn {
background: linear-gradient(45deg, #FF9800, #FFC107);
color: white;
}
#speakerBtn {
background: linear-gradient(45deg, #9C27B0, #E91E63);
color: white;
}
#stopSpeakerBtn {
background: linear-gradient(45deg, #f44336, #e91e63);
color: white;
}
button:hover:not(:disabled) {
transform: translateY(-3px);
box-shadow: 0 6px 20px rgba(0, 0, 0, 0.3);
}
button:disabled {
opacity: 0.6;
cursor: not-allowed;
}
.status-container {
text-align: center;
margin: 20px 0;
padding: 15px;
background: rgba(255, 255, 255, 0.15);
border-radius: 10px;
font-size: 16px;
}
#status,
#manualStatus,
#speakerStatus {
font-weight: bold;
margin-bottom: 10px;
}
.result-container {
margin: 20px 0;
}
.result-box {
min-height: 120px;
padding: 20px;
background: rgba(0, 0, 0, 0.2);
border-radius: 10px;
font-size: 18px;
line-height: 1.6;
white-space: pre-wrap;
overflow-y: auto;
max-height: 200px;
margin-bottom: 15px;
}
.info {
margin-top: 20px;
padding: 15px;
background: rgba(255, 255, 255, 0.1);
border-radius: 10px;
font-size: 14px;
}
.visualization {
height: 80px;
background: rgba(0, 0, 0, 0.1);
border-radius: 10px;
margin: 15px 0;
display: flex;
align-items: center;
justify-content: center;
}
.bar {
width: 8px;
height: 30px;
background: linear-gradient(to top, #4CAF50, #81C784);
margin: 0 2px;
border-radius: 2px;
animation: pulse 1s infinite;
}
@keyframes pulse {
0%,
100% {
height: 30px;
}
50% {
height: 60px;
}
}
.section-title {
font-size: 1.5em;
margin-bottom: 15px;
text-align: center;
color: #FFD700;
}
.recording-indicator {
display: inline-block;
width: 12px;
height: 12px;
background-color: #f44336;
border-radius: 50%;
margin-right: 10px;
animation: blink 1s infinite;
}
@keyframes blink {
0%,
100% {
opacity: 1;
}
50% {
opacity: 0.3;
}
}
</style>
</head>
<body>
<div class="container">
<h1>🎙️ SenseVoiceSmall 语音识别</h1>
<div class="tabs">
<!-- Tab 按钮 -->
<div class="tab-buttons">
<button class="tab-button active" onclick="openTab(event, 'realtime')">VAD识别</button>
<button class="tab-button" onclick="openTab(event, 'manual')">手动识别</button>
<button class="tab-button" onclick="openTab(event, 'speaker')">扬声器识别</button>
</div>
<!-- 实时识别 Tab -->
<div id="realtime" class="tab-content active">
<div class="section-title">VAD语音识别</div>
<div class="controls">
<button id="startBtn">开始监听</button>
<button id="stopBtn" disabled>停止监听</button>
</div>
<div class="status-container">
<div id="status">状态:初始化中...</div>
<div class="visualization" id="visualization">
<!-- 音频可视化 -->
</div>
</div>
<div class="result-container">
<h2>实时识别结果</h2>
<div id="result" class="result-box">识别结果将显示在这里...</div>
</div>
</div>
<!-- 手动识别 Tab -->
<div id="manual" class="tab-content">
<div class="section-title">手动语音识别</div>
<div class="controls">
<button id="manualBtn">开始录音</button>
<button id="sendManualBtn" disabled>发送识别</button>
</div>
<div class="status-container">
<div id="manualStatus">状态:等待录音...</div>
</div>
<div class="result-container">
<h2>手动识别结果</h2>
<div id="manualResult" class="result-box">识别结果将显示在这里...</div>
</div>
</div>
<!-- 扬声器识别 Tab -->
<div id="speaker" class="tab-content">
<div class="section-title">扬声器识别</div>
<div class="controls">
<button id="speakerBtn">开始录制扬声器</button>
<button id="stopSpeakerBtn" disabled>停止录制</button>
</div>
<div class="status-container">
<div id="speakerStatus">状态:等待录制...</div>
</div>
<div class="result-container">
<h2>扬声器识别结果</h2>
<div id="speakerResult" class="result-box">识别结果将显示在这里...</div>
</div>
</div>
</div>
<div class="info">
<h3>使用说明</h3>
<ul>
<li><strong>实时识别</strong>:自动检测语音并发送到后端进行识别</li>
<li><strong>手动识别</strong>:点击按钮开始录音,再次点击按钮结束录音并发送识别</li>
<li><strong>扬声器识别</strong>:录制系统扬声器输出的声音并识别</li>
<li>所有识别结果将实时显示在对应标签页中</li>
</ul>
</div>
</div>
<script src="https://unpkg.com/onnxruntime-web@1.22.0/dist/ort.js"></script>
<script src="https://unpkg.com/@ricky0123/vad-web@0.0.27/dist/bundle.min.js"></script>
<!--<script src="https://unpkg.com/onnxruntime-web@1.14.0/dist/ort.js"></script>-->
<!--<script src="https://unpkg.com/@ricky0123/vad-web@0.0.26/dist/bundle.min.js"></script>-->
<script>
// Tab 切换功能
function openTab(evt, tabName) {
// 隐藏所有 tab 内容
const tabContents = document.getElementsByClassName("tab-content");
for (let i = 0; i < tabContents.length; i++) {
tabContents[i].classList.remove("active");
}
// 移除所有激活按钮的样式
const tabButtons = document.getElementsByClassName("tab-button");
for (let i = 0; i < tabButtons.length; i++) {
tabButtons[i].classList.remove("active");
}
// 显示当前 tab添加激活样式
document.getElementById(tabName).classList.add("active");
evt.currentTarget.classList.add("active");
}
let myvad = null;
let websocket = null;
let manualAudioContext = null;
let manualMediaRecorder = null;
let manualChunks = [];
let speakerAudioContext = null;
let speakerMediaRecorder = null;
let speakerChunks = [];
// DOM 元素
const statusDiv = document.getElementById('status');
const resultDiv = document.getElementById('result');
const startBtn = document.getElementById('startBtn');
const stopBtn = document.getElementById('stopBtn');
const manualBtn = document.getElementById('manualBtn');
const sendManualBtn = document.getElementById('sendManualBtn');
const manualStatusDiv = document.getElementById('manualStatus');
const manualResultDiv = document.getElementById('manualResult');
const speakerBtn = document.getElementById('speakerBtn');
const stopSpeakerBtn = document.getElementById('stopSpeakerBtn');
const speakerStatusDiv = document.getElementById('speakerStatus');
const speakerResultDiv = document.getElementById('speakerResult');
const visualizationDiv = document.getElementById('visualization');
let audioBuffer = new Float32Array([]);
const BUFFER_THRESHOLD = 1024 * 10; // 缓冲数据达到 5KB 后发送
let open = false
// 创建音频可视化效果
function createVisualization() {
visualizationDiv.innerHTML = '';
for (let i = 0; i < 20; i++) {
const bar = document.createElement('div');
bar.className = 'bar';
bar.style.animationDelay = `${i * 0.1}s`;
visualizationDiv.appendChild(bar);
}
}
// Float32 → 16-bit PCM
function floatTo16BitPCM(float32Array) {
const buffer = new ArrayBuffer(float32Array.length * 2);
const view = new DataView(buffer);
for (let i = 0; i < float32Array.length; i++) {
const s = Math.max(-1, Math.min(1, float32Array[i]));
const val = s < 0 ? s * 0x8000 : s * 0x7FFF;
view.setInt16(i * 2, val, true);
}
return buffer;
}
// 初始化 VAD
async function initVAD() {
try {
statusDiv.innerText = "🔄 正在初始化 VAD...";
createVisualization();
// 创建音频上下文
const audioContext = new (window.AudioContext || window.webkitAudioContext)();
myvad = await vad.MicVAD.new({
model: "v5",
positiveSpeechThreshold: 0.4,
negativeSpeechThreshold: 0.4,
minSpeechFrames: 15,
preSpeechPadFrames: 30,
// onFrameProcessed: (probs, frame) => {
// // const indicatorColor = interpolateInferno(probs.isSpeech / 2)
// // document.body.style.setProperty("--indicator-color", indicatorColor)
// if (probs.isSpeech > 0.01 && websocket && websocket.readyState === WebSocket.OPEN) {
// audioBuffer = new Float32Array([...audioBuffer, ...frame]);
// open = true;
// } else {
// if (open) {
// const pcm16 = floatTo16BitPCM(audioBuffer);
// websocket.send(pcm16);
// audioBuffer = new Float32Array([]);
// open = false;
// }
// }
// },
onSpeechStart: () => {
statusDiv.innerHTML = "<span class='recording-indicator'></span>🟢 检测到语音,正在发送...";
visualizationDiv.style.opacity = "1";
},
onSpeechEnd: (audio) => {
if (websocket && websocket.readyState === WebSocket.OPEN) {
// console.log('Audio:', audio);
const pcm16 = floatTo16BitPCM(audio);
websocket.send(pcm16);
}
statusDiv.innerHTML = "<span class='recording-indicator'></span>🎤 正在监听中...";
visualizationDiv.style.opacity = "0.5";
},
onnxWASMBasePath: "https://unpkg.com/onnxruntime-web@1.22.0/dist/",
baseAssetPath: "https://unpkg.com/@ricky0123/vad-web@0.0.27/dist/",
audioContext: audioContext,
});
statusDiv.innerText = "✅ VAD 初始化完成,点击【开始监听】";
} catch (err) {
statusDiv.innerText = `❌ VAD 加载失败: ${err.message}`;
console.error('VAD 初始化失败:', err);
}
}
// 连接 WebSocket
function connectWebSocket() {
try {
// 如果已有连接且处于开启状态,则不重新连接
if (websocket && websocket.readyState === WebSocket.OPEN) {
console.log('WebSocket已连接无需重新连接');
statusDiv.innerText = "🟢 WebSocket 已连接";
manualStatusDiv.innerText = "🟢 WebSocket 已连接";
speakerStatusDiv.innerText = "🟢 WebSocket 已连接";
return;
}
// 如果连接正在建立中,也不重新连接
if (websocket && websocket.readyState === WebSocket.CONNECTING) {
console.log('WebSocket正在连接中无需重新连接');
return;
}
statusDiv.innerText = "🔄 正在连接 WebSocket...";
manualStatusDiv.innerText = "🔄 正在连接 WebSocket...";
speakerStatusDiv.innerText = "🔄 正在连接 WebSocket...";
websocket = new WebSocket('wss://ai.makesong.cn/ws/asr');
// websocket = new WebSocket('ws://localhost:8000/ws/asr');
// 添加连接超时处理
const connectionTimeout = setTimeout(() => {
if (websocket && websocket.readyState === WebSocket.CONNECTING) {
statusDiv.innerText = "❌ WebSocket 连接超时,正在重连...";
manualStatusDiv.innerText = "❌ WebSocket 连接超时,正在重连...";
speakerStatusDiv.innerText = "❌ WebSocket 连接超时,正在重连...";
// 不再主动关闭连接而是让其自然超时并触发onclose事件
}
}, 10000); // 10秒超时
websocket.onopen = () => {
clearTimeout(connectionTimeout);
statusDiv.innerText = "🟢 WebSocket 已连接";
manualStatusDiv.innerText = "🟢 WebSocket 已连接";
speakerStatusDiv.innerText = "🟢 WebSocket 已连接";
};
websocket.onmessage = (e) => {
// clearTimeout(connectionTimeout);
try {
const data = e.data
console.log('WebSocket 收到消息:', data);
const activeTab = document.querySelector('.tab-content.active').id;
if (activeTab === 'realtime') {
if (data) {
resultDiv.innerText = data || "";
} else {
resultDiv.innerText = data || "";
}
} else if (activeTab === 'manual') {
if (data) {
manualResultDiv.innerText += (manualResultDiv.innerText ? '\n' : '') + (data || "");
} else if (data) {
const lines = manualResultDiv.innerText.split('\n');
if (lines.length > 0) {
lines[lines.length - 1] = data || "";
manualResultDiv.innerText = lines.join('\n');
} else {
manualResultDiv.innerText = data || "";
}
}
} else if (activeTab === 'speaker') {
if (data) {
speakerResultDiv.innerText += (speakerResultDiv.innerText ? '\n' : '') + (data || "");
} else if (data) {
const lines = speakerResultDiv.innerText.split('\n');
if (lines.length > 0) {
lines[lines.length - 1] = data || "";
speakerResultDiv.innerText = lines.join('\n');
} else {
speakerResultDiv.innerText = data || "";
}
}
} else {
// 默认显示在实时识别结果中
if (data) {
resultDiv.innerText = data || "";
} else {
resultDiv.innerText = data || "";
}
}
// 根据消息类型显示在对应的结果框中
// if (data.type === 'realtime') {
// if (data.final) {
// // 如果是最终结果,直接显示
// resultDiv.innerText = data.text || "";
// } else {
// // 如果是中间结果,可以追加显示或者替换显示
// resultDiv.innerText = data.text || "";
// }
// } else if (data.type === 'manual') {
// if (data.final && data.text) {
// // 累积显示手动识别结果
// manualResultDiv.innerText += (manualResultDiv.innerText ? '\n' : '') + (data.text || "");
// } else if (data.text) {
// // 实时更新最后一行
// const lines = manualResultDiv.innerText.split('\n');
// if (lines.length > 0) {
// lines[lines.length - 1] = data.text || "";
// manualResultDiv.innerText = lines.join('\n');
// } else {
// manualResultDiv.innerText = data.text || "";
// }
// }
// } else if (data.type === 'speaker') {
// if (data.final && data.text) {
// // 累积显示扬声器识别结果
// speakerResultDiv.innerText += (speakerResultDiv.innerText ? '\n' : '') + (data.text || "");
// } else if (data.text) {
// // 实时更新最后一行
// const lines = speakerResultDiv.innerText.split('\n');
// if (lines.length > 0) {
// lines[lines.length - 1] = data.text || "";
// speakerResultDiv.innerText = lines.join('\n');
// } else {
// speakerResultDiv.innerText = data.text || "";
// }
// }
// } else {
// // 默认根据当前激活的标签页显示在对应的结果框中
// const activeTab = document.querySelector('.tab-content.active').id;
// if (activeTab === 'realtime') {
// if (data.final) {
// resultDiv.innerText = data.text || "";
// } else {
// resultDiv.innerText = data.text || "";
// }
// } else if (activeTab === 'manual') {
// if (data.final && data.text) {
// manualResultDiv.innerText += (manualResultDiv.innerText ? '\n' : '') + (data.text || "");
// } else if (data.text) {
// const lines = manualResultDiv.innerText.split('\n');
// if (lines.length > 0) {
// lines[lines.length - 1] = data.text || "";
// manualResultDiv.innerText = lines.join('\n');
// } else {
// manualResultDiv.innerText = data.text || "";
// }
// }
// } else if (activeTab === 'speaker') {
// if (data.final && data.text) {
// speakerResultDiv.innerText += (speakerResultDiv.innerText ? '\n' : '') + (data.text || "");
// } else if (data.text) {
// const lines = speakerResultDiv.innerText.split('\n');
// if (lines.length > 0) {
// lines[lines.length - 1] = data.text || "";
// speakerResultDiv.innerText = lines.join('\n');
// } else {
// speakerResultDiv.innerText = data.text || "";
// }
// }
// } else {
// // 默认显示在实时识别结果中
// if (data.final) {
// resultDiv.innerText = data.text || "";
// } else {
// resultDiv.innerText = data.text || "";
// }
// }
// }
} catch (err) {
console.error('处理WebSocket消息时出错:', err);
resultDiv.innerText = e.data;
}
};
websocket.onclose = () => {
clearTimeout(connectionTimeout);
stopHeartbeat();
statusDiv.innerText = "🔌 已断开连接,正在重连...";
manualStatusDiv.innerText = "🔌 已断开连接,正在重连...";
speakerStatusDiv.innerText = "🔌 已断开连接,正在重连...";
setTimeout(connectWebSocket, 3000); // 重连
};
websocket.onerror = (e) => {
clearTimeout(connectionTimeout);
stopHeartbeat();
statusDiv.innerText = "❌ WebSocket 错误";
manualStatusDiv.innerText = "❌ WebSocket 错误";
speakerStatusDiv.innerText = "❌ WebSocket 错误";
console.error('WebSocket 错误:', e);
};
} catch (error) {
statusDiv.innerText = `❌ WebSocket 连接失败: ${error.message}`;
manualStatusDiv.innerText = `❌ WebSocket 连接失败: ${error.message}`;
speakerStatusDiv.innerText = `❌ WebSocket 连接失败: ${error.message}`;
console.error('WebSocket 连接失败:', error);
}
}
// 实时识别控制按钮
startBtn.addEventListener('click', () => {
if (myvad) {
try {
myvad.start();
startBtn.disabled = true;
stopBtn.disabled = false;
statusDiv.innerHTML = "<span class='recording-indicator'></span>🎤 正在监听中...";
visualizationDiv.style.opacity = "0.5";
} catch (err) {
console.error('启动 VAD 失败:', err);
statusDiv.innerText = `❌ 启动失败: ${err.message}`;
}
}
});
stopBtn.addEventListener('click', () => {
if (myvad) {
try {
// 停止 VAD 和音频流
myvad.pause()
// 关闭音频流
// if (myvad.stream) {
// myvad.stream.getTracks().forEach(track => track.stop());
// myvad.stream = null;
// }
startBtn.disabled = false;
stopBtn.disabled = true;
statusDiv.innerText = "⏸️ 已停止监听";
visualizationDiv.style.opacity = "0.3";
} catch (err) {
console.error('停止 VAD 失败:', err);
statusDiv.innerText = `❌ 停止失败: ${err.message}`;
}
}
});
// 手动识别功能
manualBtn.addEventListener('click', async () => {
try {
if (!manualMediaRecorder || manualMediaRecorder.state === 'inactive') {
manualStatusDiv.innerHTML = "🔄 正在请求麦克风权限...";
const stream = await navigator.mediaDevices.getUserMedia({audio: true});
manualAudioContext = new (window.AudioContext || window.webkitAudioContext)({
sampleRate: 16000 // 设置采样率以匹配语音识别需求
});
// 检查音频上下文状态
if (!manualAudioContext) {
throw new Error('无法创建音频上下文');
}
const source = manualAudioContext.createMediaStreamSource(stream);
// 使用 AudioWorklet 替代已弃用的 ScriptProcessorNode
if (manualAudioContext.audioWorklet) {
try {
// 创建一个简单的处理器来收集音频数据
const processorScript = `
class AudioCollector extends AudioWorkletProcessor {
process(inputs, outputs, parameters) {
const input = inputs[0];
if (input && input[0]) {
this.port.postMessage(input[0]);
}
return true;
}
}
registerProcessor('audio-collector', AudioCollector);
`;
const blob = new Blob([processorScript], {type: 'application/javascript'});
const url = URL.createObjectURL(blob);
await manualAudioContext.audioWorklet.addModule(url);
const processor = new AudioWorkletNode(manualAudioContext, 'audio-collector');
processor.port.onmessage = (e) => {
try {
manualChunks.push(new Float32Array(e.data));
} catch (err) {
console.error('处理音频数据时出错:', err);
}
};
source.connect(processor);
processor.connect(manualAudioContext.destination);
} catch (workletErr) {
console.warn('AudioWorklet 不可用,回退到 ScriptProcessorNode:', workletErr);
// 回退到 ScriptProcessorNode
const processor = manualAudioContext.createScriptProcessor(1024, 1, 1);
source.connect(processor);
processor.connect(manualAudioContext.destination);
processor.onaudioprocess = (e) => {
try {
const inputData = e.inputBuffer.getChannelData(0);
manualChunks.push(new Float32Array(inputData));
} catch (err) {
console.error('处理音频数据时出错:', err);
}
};
}
} else {
// 浏览器不支持 AudioWorklet使用 ScriptProcessorNode
const processor = manualAudioContext.createScriptProcessor(1024, 1, 1);
source.connect(processor);
processor.connect(manualAudioContext.destination);
processor.onaudioprocess = (e) => {
try {
const inputData = e.inputBuffer.getChannelData(0);
manualChunks.push(new Float32Array(inputData));
} catch (err) {
console.error('处理音频数据时出错:', err);
}
};
}
manualChunks = [];
manualBtn.textContent = '停止录音';
manualBtn.style.background = 'linear-gradient(45deg, #f44336, #e91e63)';
sendManualBtn.disabled = false;
manualStatusDiv.innerHTML = "<span class='recording-indicator'></span>🎤 正在录音...";
} else {
try {
if (manualAudioContext) {
await manualAudioContext.close();
}
} catch (err) {
console.error('关闭音频上下文时出错:', err);
}
manualBtn.textContent = '开始录音';
manualBtn.style.background = 'linear-gradient(45deg, #2196F3, #03A9F4)';
manualStatusDiv.innerText = "⏹️ 录音已停止,点击【发送识别】";
}
} catch (err) {
manualStatusDiv.innerText = `❌ 操作失败: ${err.message}`;
console.error('手动识别操作失败:', err);
// 重置按钮状态
manualBtn.textContent = '开始录音';
manualBtn.style.background = 'linear-gradient(45deg, #2196F3, #03A9F4)';
sendManualBtn.disabled = true;
}
});
sendManualBtn.addEventListener('click', () => {
if (manualChunks.length > 0 && websocket && websocket.readyState === WebSocket.OPEN) {
try {
// 合并所有录音片段
const totalLength = manualChunks.reduce((acc, chunk) => acc + chunk.length, 0);
const fullAudio = new Float32Array(totalLength);
let offset = 0;
for (const chunk of manualChunks) {
fullAudio.set(chunk, offset);
offset += chunk.length;
}
// 转换为 PCM 并发送
const pcm16 = floatTo16BitPCM(fullAudio);
websocket.send(pcm16);
manualStatusDiv.innerText = `📤 发送录音数据 (${fullAudio.length} 个采样点)`;
} catch (err) {
manualStatusDiv.innerText = `❌ 发送数据时出错: ${err.message}`;
console.error('发送录音数据时出错:', err);
}
} else if (!websocket || websocket.readyState !== WebSocket.OPEN) {
manualStatusDiv.innerText = "⚠️ WebSocket未连接请等待连接建立";
} else {
manualStatusDiv.innerText = "⚠️ 没有录音数据可发送";
}
});
// 扬声器识别功能
speakerBtn.addEventListener('click', async () => {
if (!speakerMediaRecorder || speakerMediaRecorder.state === 'inactive') {
try {
speakerStatusDiv.innerHTML = "🔄 正在请求桌面媒体流...";
// 直接使用getDisplayMedia API获取桌面音频流
// 注意:某些浏览器要求必须请求视频才能获取音频
const constraints = {
video: true, // 必须请求视频
audio: true
};
speakerStatusDiv.innerHTML = "🔄 正在请求音频流...";
const stream = await navigator.mediaDevices.getDisplayMedia(constraints);
const audioTracks = stream.getAudioTracks();
const videoTracks = stream.getVideoTracks();
// 停止所有视频轨道,因为我们只关心音频
videoTracks.forEach(track => track.stop());
speakerStatusDiv.innerHTML = "🔄 正在设置音频处理...";
// 检查流中是否包含音频轨道
console.log('音频轨道数量:', audioTracks.length);
console.log('音频轨道详情:', audioTracks);
if (audioTracks.length === 0) {
speakerStatusDiv.innerText = "⚠️ 音频流不包含音频轨道";
stream.getTracks().forEach(track => track.stop());
return;
}
// 设置音频轨道属性以获取更好的音频质量
audioTracks.forEach(track => {
console.log('音频轨道设置:', track);
track.addEventListener('ended', () => {
console.log('音频轨道结束');
if (speakerAudioContext) {
speakerAudioContext.close();
speakerAudioContext = null;
}
speakerBtn.textContent = '开始录制扬声器';
speakerBtn.style.background = 'linear-gradient(45deg, #9C27B0, #E91E63)';
speakerStatusDiv.innerText = "⏹️ 录制已停止";
stopSpeakerBtn.disabled = true;
speakerChunks = [];
});
});
// 确保之前的音频上下文已关闭
if (speakerAudioContext) {
speakerAudioContext.close();
speakerAudioContext = null;
}
speakerAudioContext = new (window.AudioContext || window.webkitAudioContext)({
sampleRate: 16000 // 设置采样率以匹配语音识别需求
});
console.log('创建音频上下文:', speakerAudioContext);
const sourceNode = speakerAudioContext.createMediaStreamSource(stream);
console.log('创建媒体流源:', sourceNode);
// 使用 AudioWorklet 替代已弃用的 ScriptProcessorNode
if (speakerAudioContext.audioWorklet) {
try {
// 创建一个简单的处理器来收集音频数据
const processorScript = `
class AudioCollector extends AudioWorkletProcessor {
process(inputs, outputs, parameters) {
const input = inputs[0];
if (input && input[0]) {
this.port.postMessage(input[0]);
}
return true;
}
}
registerProcessor('audio-collector', AudioCollector);
`;
const blob = new Blob([processorScript], {type: 'application/javascript'});
const url = URL.createObjectURL(blob);
await speakerAudioContext.audioWorklet.addModule(url);
const processor = new AudioWorkletNode(speakerAudioContext, 'audio-collector');
processor.port.onmessage = (e) => {
try {
speakerChunks.push(new Float32Array(e.data));
// 实时显示缓冲区大小
if (speakerChunks.length % 10 === 0) {
speakerStatusDiv.innerHTML = `<span class='recording-indicator'></span>🎤 正在录制扬声器... (缓冲区: ${speakerChunks.length})`;
}
} catch (err) {
console.error('处理音频数据时出错:', err);
}
};
sourceNode.connect(processor);
processor.connect(speakerAudioContext.destination);
} catch (workletErr) {
console.warn('AudioWorklet 不可用,回退到 ScriptProcessorNode:', workletErr);
// 回退到 ScriptProcessorNode
const processor = speakerAudioContext.createScriptProcessor(4096, 1, 1);
console.log('创建处理器:', processor);
sourceNode.connect(processor);
processor.connect(speakerAudioContext.destination);
speakerChunks = [];
processor.onaudioprocess = (e) => {
try {
const inputData = e.inputBuffer.getChannelData(0);
speakerChunks.push(new Float32Array(inputData));
// 实时显示缓冲区大小
if (speakerChunks.length % 10 === 0) {
speakerStatusDiv.innerHTML = `<span class='recording-indicator'></span>🎤 正在录制扬声器... (缓冲区: ${speakerChunks.length})`;
}
} catch (err) {
console.error('处理音频数据时出错:', err);
}
};
}
} else {
// 浏览器不支持 AudioWorklet使用 ScriptProcessorNode
const processor = speakerAudioContext.createScriptProcessor(4096, 1, 1);
console.log('创建处理器:', processor);
sourceNode.connect(processor);
processor.connect(speakerAudioContext.destination);
speakerChunks = [];
processor.onaudioprocess = (e) => {
try {
const inputData = e.inputBuffer.getChannelData(0);
speakerChunks.push(new Float32Array(inputData));
// 实时显示缓冲区大小
if (speakerChunks.length % 10 === 0) {
speakerStatusDiv.innerHTML = `<span class='recording-indicator'></span>🎤 正在录制扬声器... (缓冲区: ${speakerChunks.length})`;
}
} catch (err) {
console.error('处理音频数据时出错:', err);
}
};
}
speakerBtn.textContent = '停止录制';
speakerBtn.style.background = 'linear-gradient(45deg, #f44336, #e91e63)';
stopSpeakerBtn.disabled = false;
speakerStatusDiv.innerHTML = "<span class='recording-indicator'></span>🎤 正在录制扬声器...";
// 存储流引用以便在需要时停止
window.speakerStream = stream;
} catch (err) {
console.error('获取扬声器完整错误:', err);
if (err.name === 'NotSupportedError') {
speakerStatusDiv.innerHTML = "❌ 系统不支持扬声器录制功能<br/>" +
"请尝试以下解决方案:<br/>" +
"1. 确保在屏幕共享时选择包含音频的选项<br/>" +
"2. 检查系统音频设置和权限<br/>" +
"3. 在 Windows 系统中,确保已启用立体声混音设备";
} else if (err.name === 'NotAllowedError' || err.name === 'PermissionDeniedError') {
speakerStatusDiv.innerText = "❌ 用户拒绝了屏幕共享或音频访问权限";
} else if (err.name === 'NotFoundError' || err.name === 'OverconstrainedError') {
speakerStatusDiv.innerText = "❌ 未找到可用的音频输入设备";
} else if (err.message && err.message.includes('Error starting capture')) {
speakerStatusDiv.innerHTML = "❌ 启动捕获失败<br/>" +
"请尝试以下解决方案:<br/>" +
"1. 重新点击按钮再次尝试<br/>" +
"2. 重启应用程序<br/>" +
"3. 检查系统音频驱动程序<br/>" +
"4. 确保没有其他应用程序正在使用音频设备";
} else {
speakerStatusDiv.innerHTML = `❌ 获取扬声器失败: ${err.message || err.name}<br/>` +
"请尝试以下解决方案:<br/>" +
"1. 确保使用最新版本的 Electron<br/>" +
"2. 检查应用权限设置<br/>" +
"3. 重启应用程序";
}
}
} else {
// 停止录制(当点击"停止录制"按钮时)
// 停止音频上下文
if (speakerAudioContext) {
speakerAudioContext.close();
speakerAudioContext = null;
}
// 停止所有轨道
if (window.speakerStream) {
window.speakerStream.getTracks().forEach(track => {
track.stop();
});
window.speakerStream = null;
}
speakerBtn.textContent = '开始录制扬声器';
speakerBtn.style.background = 'linear-gradient(45deg, #9C27B0, #E91E63)';
speakerStatusDiv.innerText = "⏹️ 录制已停止";
stopSpeakerBtn.disabled = true;
speakerChunks = [];
}
});
// 假设使用1秒作为发送间隔
const sendInterval = setInterval(() => {
if (speakerChunks.length > 0 && websocket.readyState === WebSocket.OPEN) {
// 合并所有录音片段
const totalLength = speakerChunks.reduce((acc, chunk) => acc + chunk.length, 0);
const fullAudio = new Float32Array(totalLength);
let offset = 0;
for (const chunk of speakerChunks) {
fullAudio.set(chunk, offset);
offset += chunk.length;
}
// 转换为 PCM 并发送
const pcm16 = floatTo16BitPCM(fullAudio);
websocket.send(pcm16);
speakerStatusDiv.innerText = `📤 发送扬声器数据 (${fullAudio.length} 个采样点)`;
// 清空已发送的chunks
speakerChunks = [];
}
}, 3000); // 每秒发送一次
stopSpeakerBtn.addEventListener('click', () => {
clearInterval(sendInterval);
// 发送类型标识,告诉服务器这是扬声器识别数据
if (speakerChunks && speakerChunks.length > 0 && websocket && websocket.readyState === WebSocket.OPEN) {
// 合并所有录音片段
const totalLength = speakerChunks.reduce((acc, chunk) => acc + chunk.length, 0);
const fullAudio = new Float32Array(totalLength);
let offset = 0;
for (const chunk of speakerChunks) {
fullAudio.set(chunk, offset);
offset += chunk.length;
}
// 转换为 PCM 并发送
const pcm16 = floatTo16BitPCM(fullAudio);
websocket.send(pcm16);
speakerStatusDiv.innerText = `📤 发送扬声器数据 (${fullAudio.length} 个采样点)`;
} else if (!speakerChunks || speakerChunks.length === 0) {
speakerStatusDiv.innerText = "⚠️ 没有录制到音频数据";
} else {
speakerStatusDiv.innerText = "⚠️ 没有录制数据可发送或WebSocket未连接";
}
// 真正停止录制
// 停止音频上下文
if (speakerAudioContext) {
speakerAudioContext.close();
speakerAudioContext = null;
}
// 停止所有轨道
if (window.speakerStream) {
window.speakerStream.getTracks().forEach(track => {
track.stop();
});
window.speakerStream = null;
}
// 重置按钮和状态
speakerBtn.textContent = '开始录制扬声器';
speakerBtn.style.background = 'linear-gradient(45deg, #9C27B0, #E91E63)';
speakerStatusDiv.innerText = "⏹️ 录制已停止";
stopSpeakerBtn.disabled = true;
speakerChunks = [];
});
// 启动
window.onload = () => {
initVAD();
connectWebSocket();
};
</script>
</body>
</html>