Files
PythonProject/3.html

1136 lines
47 KiB
HTML
Raw Permalink Normal View History

2025-09-18 17:50:03 +08:00
<!DOCTYPE html>
<html lang="zh">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>SenseVoice + VAD 实时识别</title>
<style>
body {
font-family: Arial, sans-serif;
padding: 20px;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
margin: 0;
color: white;
}
.container {
max-width: 1000px;
margin: 0 auto;
background: rgba(255, 255, 255, 0.1);
padding: 20px;
border-radius: 15px;
backdrop-filter: blur(10px);
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.2);
}
h1 {
text-align: center;
margin-bottom: 20px;
font-size: 2.5em;
text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.3);
}
/* Tabs 样式 */
.tabs {
display: flex;
flex-direction: column;
width: 100%;
}
.tab-buttons {
display: flex;
border-bottom: 2px solid rgba(255, 255, 255, 0.2);
margin-bottom: 20px;
}
.tab-button {
padding: 15px 30px;
background: rgba(255, 255, 255, 0.1);
border: none;
outline: none;
cursor: pointer;
transition: all 0.3s ease;
font-size: 16px;
font-weight: bold;
color: rgba(255, 255, 255, 0.7);
border-radius: 10px 10px 0 0;
}
.tab-button:hover {
background: rgba(255, 255, 255, 0.2);
}
.tab-button.active {
background: rgba(255, 255, 255, 0.3);
color: white;
}
.tab-content {
display: none;
padding: 20px;
background: rgba(255, 255, 255, 0.05);
border-radius: 0 10px 10px 10px;
}
.tab-content.active {
display: block;
animation: fadeIn 0.5s;
}
@keyframes fadeIn {
from {
opacity: 0;
}
to {
opacity: 1;
}
}
.controls {
display: flex;
justify-content: center;
gap: 20px;
margin-bottom: 20px;
flex-wrap: wrap;
}
button {
padding: 15px 30px;
font-size: 18px;
border: none;
border-radius: 50px;
cursor: pointer;
transition: all 0.3s ease;
font-weight: bold;
box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2);
}
#startBtn {
background: linear-gradient(45deg, #4CAF50, #8BC34A);
color: white;
}
#stopBtn {
background: linear-gradient(45deg, #f44336, #e91e63);
color: white;
}
#manualBtn {
background: linear-gradient(45deg, #2196F3, #03A9F4);
color: white;
}
#sendManualBtn {
background: linear-gradient(45deg, #FF9800, #FFC107);
color: white;
}
#speakerBtn {
background: linear-gradient(45deg, #9C27B0, #E91E63);
color: white;
}
#stopSpeakerBtn {
background: linear-gradient(45deg, #f44336, #e91e63);
color: white;
}
button:hover:not(:disabled) {
transform: translateY(-3px);
box-shadow: 0 6px 20px rgba(0, 0, 0, 0.3);
}
button:disabled {
opacity: 0.6;
cursor: not-allowed;
}
.status-container {
text-align: center;
margin: 20px 0;
padding: 15px;
background: rgba(255, 255, 255, 0.15);
border-radius: 10px;
font-size: 16px;
}
#status,
#manualStatus,
#speakerStatus {
font-weight: bold;
margin-bottom: 10px;
}
.result-container {
margin: 20px 0;
}
.result-box {
min-height: 120px;
padding: 20px;
background: rgba(0, 0, 0, 0.2);
border-radius: 10px;
font-size: 18px;
line-height: 1.6;
white-space: pre-wrap;
overflow-y: auto;
max-height: 200px;
margin-bottom: 15px;
}
.info {
margin-top: 20px;
padding: 15px;
background: rgba(255, 255, 255, 0.1);
border-radius: 10px;
font-size: 14px;
}
.visualization {
height: 80px;
background: rgba(0, 0, 0, 0.1);
border-radius: 10px;
margin: 15px 0;
display: flex;
align-items: center;
justify-content: center;
}
.bar {
width: 8px;
height: 30px;
background: linear-gradient(to top, #4CAF50, #81C784);
margin: 0 2px;
border-radius: 2px;
animation: pulse 1s infinite;
}
@keyframes pulse {
0%,
100% {
height: 30px;
}
50% {
height: 60px;
}
}
.section-title {
font-size: 1.5em;
margin-bottom: 15px;
text-align: center;
color: #FFD700;
}
.recording-indicator {
display: inline-block;
width: 12px;
height: 12px;
background-color: #f44336;
border-radius: 50%;
margin-right: 10px;
animation: blink 1s infinite;
}
@keyframes blink {
0%,
100% {
opacity: 1;
}
50% {
opacity: 0.3;
}
}
</style>
</head>
<body>
<div class="container">
<h1>🎙️ SenseVoiceSmall 语音识别</h1>
<div class="tabs">
<!-- Tab 按钮 -->
<div class="tab-buttons">
<button class="tab-button active" onclick="openTab(event, 'realtime')">VAD识别</button>
<button class="tab-button" onclick="openTab(event, 'manual')">手动识别</button>
<button class="tab-button" onclick="openTab(event, 'speaker')">扬声器识别</button>
</div>
<!-- 实时识别 Tab -->
<div id="realtime" class="tab-content active">
<div class="section-title">VAD语音识别</div>
<div class="controls">
<button id="startBtn">开始监听</button>
<button id="stopBtn" disabled>停止监听</button>
</div>
<div class="status-container">
<div id="status">状态:初始化中...</div>
<div class="visualization" id="visualization">
<!-- 音频可视化 -->
</div>
</div>
<div class="result-container">
<h2>实时识别结果</h2>
<div id="result" class="result-box">识别结果将显示在这里...</div>
</div>
</div>
<!-- 手动识别 Tab -->
<div id="manual" class="tab-content">
<div class="section-title">手动语音识别</div>
<div class="controls">
<button id="manualBtn">开始录音</button>
<button id="sendManualBtn" disabled>发送识别</button>
</div>
<div class="status-container">
<div id="manualStatus">状态:等待录音...</div>
</div>
<div class="result-container">
<h2>手动识别结果</h2>
<div id="manualResult" class="result-box">识别结果将显示在这里...</div>
</div>
</div>
<!-- 扬声器识别 Tab -->
<div id="speaker" class="tab-content">
<div class="section-title">扬声器识别</div>
<div class="controls">
<button id="speakerBtn">开始录制扬声器</button>
<button id="stopSpeakerBtn" disabled>停止录制</button>
</div>
<div class="status-container">
<div id="speakerStatus">状态:等待录制...</div>
</div>
<div class="result-container">
<h2>扬声器识别结果</h2>
<div id="speakerResult" class="result-box">识别结果将显示在这里...</div>
</div>
</div>
</div>
<div class="info">
<h3>使用说明</h3>
<ul>
<li><strong>实时识别</strong>:自动检测语音并发送到后端进行识别</li>
<li><strong>手动识别</strong>:点击按钮开始录音,再次点击按钮结束录音并发送识别</li>
<li><strong>扬声器识别</strong>:录制系统扬声器输出的声音并识别</li>
<li>所有识别结果将实时显示在对应标签页中</li>
</ul>
</div>
</div>
<script src="https://unpkg.com/onnxruntime-web@1.22.0/dist/ort.js"></script>
<script src="https://unpkg.com/@ricky0123/vad-web@0.0.27/dist/bundle.min.js"></script>
<!--<script src="https://unpkg.com/onnxruntime-web@1.14.0/dist/ort.js"></script>-->
<!--<script src="https://unpkg.com/@ricky0123/vad-web@0.0.26/dist/bundle.min.js"></script>-->
<script>
// Tab 切换功能
function openTab(evt, tabName) {
// 隐藏所有 tab 内容
const tabContents = document.getElementsByClassName("tab-content");
for (let i = 0; i < tabContents.length; i++) {
tabContents[i].classList.remove("active");
}
// 移除所有激活按钮的样式
const tabButtons = document.getElementsByClassName("tab-button");
for (let i = 0; i < tabButtons.length; i++) {
tabButtons[i].classList.remove("active");
}
// 显示当前 tab添加激活样式
document.getElementById(tabName).classList.add("active");
evt.currentTarget.classList.add("active");
}
let myvad = null;
let websocket = null;
let manualAudioContext = null;
let manualMediaRecorder = null;
let manualChunks = [];
let speakerAudioContext = null;
let speakerMediaRecorder = null;
let speakerChunks = [];
// DOM 元素
const statusDiv = document.getElementById('status');
const resultDiv = document.getElementById('result');
const startBtn = document.getElementById('startBtn');
const stopBtn = document.getElementById('stopBtn');
const manualBtn = document.getElementById('manualBtn');
const sendManualBtn = document.getElementById('sendManualBtn');
const manualStatusDiv = document.getElementById('manualStatus');
const manualResultDiv = document.getElementById('manualResult');
const speakerBtn = document.getElementById('speakerBtn');
const stopSpeakerBtn = document.getElementById('stopSpeakerBtn');
const speakerStatusDiv = document.getElementById('speakerStatus');
const speakerResultDiv = document.getElementById('speakerResult');
const visualizationDiv = document.getElementById('visualization');
let audioBuffer = new Float32Array([]);
const BUFFER_THRESHOLD = 1024 * 10; // 缓冲数据达到 5KB 后发送
let open = false
// 创建音频可视化效果
function createVisualization() {
visualizationDiv.innerHTML = '';
for (let i = 0; i < 20; i++) {
const bar = document.createElement('div');
bar.className = 'bar';
bar.style.animationDelay = `${i * 0.1}s`;
visualizationDiv.appendChild(bar);
}
}
// Float32 → 16-bit PCM
function floatTo16BitPCM(float32Array) {
const buffer = new ArrayBuffer(float32Array.length * 2);
const view = new DataView(buffer);
for (let i = 0; i < float32Array.length; i++) {
const s = Math.max(-1, Math.min(1, float32Array[i]));
const val = s < 0 ? s * 0x8000 : s * 0x7FFF;
view.setInt16(i * 2, val, true);
}
return buffer;
}
// 初始化 VAD
async function initVAD() {
try {
statusDiv.innerText = "🔄 正在初始化 VAD...";
createVisualization();
// 创建音频上下文
const audioContext = new (window.AudioContext || window.webkitAudioContext)();
myvad = await vad.MicVAD.new({
model: "v5",
positiveSpeechThreshold: 0.4,
negativeSpeechThreshold: 0.4,
minSpeechFrames: 15,
preSpeechPadFrames: 30,
// onFrameProcessed: (probs, frame) => {
// // const indicatorColor = interpolateInferno(probs.isSpeech / 2)
// // document.body.style.setProperty("--indicator-color", indicatorColor)
// if (probs.isSpeech > 0.01 && websocket && websocket.readyState === WebSocket.OPEN) {
// audioBuffer = new Float32Array([...audioBuffer, ...frame]);
// open = true;
// } else {
// if (open) {
// const pcm16 = floatTo16BitPCM(audioBuffer);
// websocket.send(pcm16);
// audioBuffer = new Float32Array([]);
// open = false;
// }
// }
// },
onSpeechStart: () => {
statusDiv.innerHTML = "<span class='recording-indicator'></span>🟢 检测到语音,正在发送...";
visualizationDiv.style.opacity = "1";
},
onSpeechEnd: (audio) => {
if (websocket && websocket.readyState === WebSocket.OPEN) {
// console.log('Audio:', audio);
const pcm16 = floatTo16BitPCM(audio);
websocket.send(pcm16);
}
statusDiv.innerHTML = "<span class='recording-indicator'></span>🎤 正在监听中...";
visualizationDiv.style.opacity = "0.5";
},
onnxWASMBasePath: "https://unpkg.com/onnxruntime-web@1.22.0/dist/",
baseAssetPath: "https://unpkg.com/@ricky0123/vad-web@0.0.27/dist/",
audioContext: audioContext,
});
statusDiv.innerText = "✅ VAD 初始化完成,点击【开始监听】";
} catch (err) {
statusDiv.innerText = `❌ VAD 加载失败: ${err.message}`;
console.error('VAD 初始化失败:', err);
}
}
// 连接 WebSocket
function connectWebSocket() {
try {
// 如果已有连接且处于开启状态,则不重新连接
if (websocket && websocket.readyState === WebSocket.OPEN) {
console.log('WebSocket已连接无需重新连接');
statusDiv.innerText = "🟢 WebSocket 已连接";
manualStatusDiv.innerText = "🟢 WebSocket 已连接";
speakerStatusDiv.innerText = "🟢 WebSocket 已连接";
return;
}
// 如果连接正在建立中,也不重新连接
if (websocket && websocket.readyState === WebSocket.CONNECTING) {
console.log('WebSocket正在连接中无需重新连接');
return;
}
statusDiv.innerText = "🔄 正在连接 WebSocket...";
manualStatusDiv.innerText = "🔄 正在连接 WebSocket...";
speakerStatusDiv.innerText = "🔄 正在连接 WebSocket...";
websocket = new WebSocket('wss://ai.makesong.cn/ws/asr');
// websocket = new WebSocket('ws://localhost:8000/ws/asr');
// 添加连接超时处理
const connectionTimeout = setTimeout(() => {
if (websocket && websocket.readyState === WebSocket.CONNECTING) {
statusDiv.innerText = "❌ WebSocket 连接超时,正在重连...";
manualStatusDiv.innerText = "❌ WebSocket 连接超时,正在重连...";
speakerStatusDiv.innerText = "❌ WebSocket 连接超时,正在重连...";
// 不再主动关闭连接而是让其自然超时并触发onclose事件
}
}, 10000); // 10秒超时
websocket.onopen = () => {
clearTimeout(connectionTimeout);
statusDiv.innerText = "🟢 WebSocket 已连接";
manualStatusDiv.innerText = "🟢 WebSocket 已连接";
speakerStatusDiv.innerText = "🟢 WebSocket 已连接";
};
websocket.onmessage = (e) => {
// clearTimeout(connectionTimeout);
try {
const data = e.data
console.log('WebSocket 收到消息:', data);
const activeTab = document.querySelector('.tab-content.active').id;
if (activeTab === 'realtime') {
if (data) {
resultDiv.innerText = data || "";
} else {
resultDiv.innerText = data || "";
}
} else if (activeTab === 'manual') {
if (data) {
manualResultDiv.innerText += (manualResultDiv.innerText ? '\n' : '') + (data || "");
} else if (data) {
const lines = manualResultDiv.innerText.split('\n');
if (lines.length > 0) {
lines[lines.length - 1] = data || "";
manualResultDiv.innerText = lines.join('\n');
} else {
manualResultDiv.innerText = data || "";
}
}
} else if (activeTab === 'speaker') {
if (data) {
speakerResultDiv.innerText += (speakerResultDiv.innerText ? '\n' : '') + (data || "");
} else if (data) {
const lines = speakerResultDiv.innerText.split('\n');
if (lines.length > 0) {
lines[lines.length - 1] = data || "";
speakerResultDiv.innerText = lines.join('\n');
} else {
speakerResultDiv.innerText = data || "";
}
}
} else {
// 默认显示在实时识别结果中
if (data) {
resultDiv.innerText = data || "";
} else {
resultDiv.innerText = data || "";
}
}
// 根据消息类型显示在对应的结果框中
// if (data.type === 'realtime') {
// if (data.final) {
// // 如果是最终结果,直接显示
// resultDiv.innerText = data.text || "";
// } else {
// // 如果是中间结果,可以追加显示或者替换显示
// resultDiv.innerText = data.text || "";
// }
// } else if (data.type === 'manual') {
// if (data.final && data.text) {
// // 累积显示手动识别结果
// manualResultDiv.innerText += (manualResultDiv.innerText ? '\n' : '') + (data.text || "");
// } else if (data.text) {
// // 实时更新最后一行
// const lines = manualResultDiv.innerText.split('\n');
// if (lines.length > 0) {
// lines[lines.length - 1] = data.text || "";
// manualResultDiv.innerText = lines.join('\n');
// } else {
// manualResultDiv.innerText = data.text || "";
// }
// }
// } else if (data.type === 'speaker') {
// if (data.final && data.text) {
// // 累积显示扬声器识别结果
// speakerResultDiv.innerText += (speakerResultDiv.innerText ? '\n' : '') + (data.text || "");
// } else if (data.text) {
// // 实时更新最后一行
// const lines = speakerResultDiv.innerText.split('\n');
// if (lines.length > 0) {
// lines[lines.length - 1] = data.text || "";
// speakerResultDiv.innerText = lines.join('\n');
// } else {
// speakerResultDiv.innerText = data.text || "";
// }
// }
// } else {
// // 默认根据当前激活的标签页显示在对应的结果框中
// const activeTab = document.querySelector('.tab-content.active').id;
// if (activeTab === 'realtime') {
// if (data.final) {
// resultDiv.innerText = data.text || "";
// } else {
// resultDiv.innerText = data.text || "";
// }
// } else if (activeTab === 'manual') {
// if (data.final && data.text) {
// manualResultDiv.innerText += (manualResultDiv.innerText ? '\n' : '') + (data.text || "");
// } else if (data.text) {
// const lines = manualResultDiv.innerText.split('\n');
// if (lines.length > 0) {
// lines[lines.length - 1] = data.text || "";
// manualResultDiv.innerText = lines.join('\n');
// } else {
// manualResultDiv.innerText = data.text || "";
// }
// }
// } else if (activeTab === 'speaker') {
// if (data.final && data.text) {
// speakerResultDiv.innerText += (speakerResultDiv.innerText ? '\n' : '') + (data.text || "");
// } else if (data.text) {
// const lines = speakerResultDiv.innerText.split('\n');
// if (lines.length > 0) {
// lines[lines.length - 1] = data.text || "";
// speakerResultDiv.innerText = lines.join('\n');
// } else {
// speakerResultDiv.innerText = data.text || "";
// }
// }
// } else {
// // 默认显示在实时识别结果中
// if (data.final) {
// resultDiv.innerText = data.text || "";
// } else {
// resultDiv.innerText = data.text || "";
// }
// }
// }
} catch (err) {
console.error('处理WebSocket消息时出错:', err);
resultDiv.innerText = e.data;
}
};
websocket.onclose = () => {
clearTimeout(connectionTimeout);
stopHeartbeat();
statusDiv.innerText = "🔌 已断开连接,正在重连...";
manualStatusDiv.innerText = "🔌 已断开连接,正在重连...";
speakerStatusDiv.innerText = "🔌 已断开连接,正在重连...";
setTimeout(connectWebSocket, 3000); // 重连
};
websocket.onerror = (e) => {
clearTimeout(connectionTimeout);
stopHeartbeat();
statusDiv.innerText = "❌ WebSocket 错误";
manualStatusDiv.innerText = "❌ WebSocket 错误";
speakerStatusDiv.innerText = "❌ WebSocket 错误";
console.error('WebSocket 错误:', e);
};
} catch (error) {
statusDiv.innerText = `❌ WebSocket 连接失败: ${error.message}`;
manualStatusDiv.innerText = `❌ WebSocket 连接失败: ${error.message}`;
speakerStatusDiv.innerText = `❌ WebSocket 连接失败: ${error.message}`;
console.error('WebSocket 连接失败:', error);
}
}
// 实时识别控制按钮
startBtn.addEventListener('click', () => {
if (myvad) {
try {
myvad.start();
startBtn.disabled = true;
stopBtn.disabled = false;
statusDiv.innerHTML = "<span class='recording-indicator'></span>🎤 正在监听中...";
visualizationDiv.style.opacity = "0.5";
} catch (err) {
console.error('启动 VAD 失败:', err);
statusDiv.innerText = `❌ 启动失败: ${err.message}`;
}
}
});
stopBtn.addEventListener('click', () => {
if (myvad) {
try {
// 停止 VAD 和音频流
myvad.pause()
// 关闭音频流
// if (myvad.stream) {
// myvad.stream.getTracks().forEach(track => track.stop());
// myvad.stream = null;
// }
startBtn.disabled = false;
stopBtn.disabled = true;
statusDiv.innerText = "⏸️ 已停止监听";
visualizationDiv.style.opacity = "0.3";
} catch (err) {
console.error('停止 VAD 失败:', err);
statusDiv.innerText = `❌ 停止失败: ${err.message}`;
}
}
});
// 手动识别功能
manualBtn.addEventListener('click', async () => {
try {
if (!manualMediaRecorder || manualMediaRecorder.state === 'inactive') {
manualStatusDiv.innerHTML = "🔄 正在请求麦克风权限...";
const stream = await navigator.mediaDevices.getUserMedia({audio: true});
manualAudioContext = new (window.AudioContext || window.webkitAudioContext)({
sampleRate: 16000 // 设置采样率以匹配语音识别需求
});
// 检查音频上下文状态
if (!manualAudioContext) {
throw new Error('无法创建音频上下文');
}
const source = manualAudioContext.createMediaStreamSource(stream);
// 使用 AudioWorklet 替代已弃用的 ScriptProcessorNode
if (manualAudioContext.audioWorklet) {
try {
// 创建一个简单的处理器来收集音频数据
const processorScript = `
class AudioCollector extends AudioWorkletProcessor {
process(inputs, outputs, parameters) {
const input = inputs[0];
if (input && input[0]) {
this.port.postMessage(input[0]);
}
return true;
}
}
registerProcessor('audio-collector', AudioCollector);
`;
const blob = new Blob([processorScript], {type: 'application/javascript'});
const url = URL.createObjectURL(blob);
await manualAudioContext.audioWorklet.addModule(url);
const processor = new AudioWorkletNode(manualAudioContext, 'audio-collector');
processor.port.onmessage = (e) => {
try {
manualChunks.push(new Float32Array(e.data));
} catch (err) {
console.error('处理音频数据时出错:', err);
}
};
source.connect(processor);
processor.connect(manualAudioContext.destination);
} catch (workletErr) {
console.warn('AudioWorklet 不可用,回退到 ScriptProcessorNode:', workletErr);
// 回退到 ScriptProcessorNode
const processor = manualAudioContext.createScriptProcessor(1024, 1, 1);
source.connect(processor);
processor.connect(manualAudioContext.destination);
processor.onaudioprocess = (e) => {
try {
const inputData = e.inputBuffer.getChannelData(0);
manualChunks.push(new Float32Array(inputData));
} catch (err) {
console.error('处理音频数据时出错:', err);
}
};
}
} else {
// 浏览器不支持 AudioWorklet使用 ScriptProcessorNode
const processor = manualAudioContext.createScriptProcessor(1024, 1, 1);
source.connect(processor);
processor.connect(manualAudioContext.destination);
processor.onaudioprocess = (e) => {
try {
const inputData = e.inputBuffer.getChannelData(0);
manualChunks.push(new Float32Array(inputData));
} catch (err) {
console.error('处理音频数据时出错:', err);
}
};
}
manualChunks = [];
manualBtn.textContent = '停止录音';
manualBtn.style.background = 'linear-gradient(45deg, #f44336, #e91e63)';
sendManualBtn.disabled = false;
manualStatusDiv.innerHTML = "<span class='recording-indicator'></span>🎤 正在录音...";
} else {
try {
if (manualAudioContext) {
await manualAudioContext.close();
}
} catch (err) {
console.error('关闭音频上下文时出错:', err);
}
manualBtn.textContent = '开始录音';
manualBtn.style.background = 'linear-gradient(45deg, #2196F3, #03A9F4)';
manualStatusDiv.innerText = "⏹️ 录音已停止,点击【发送识别】";
}
} catch (err) {
manualStatusDiv.innerText = `❌ 操作失败: ${err.message}`;
console.error('手动识别操作失败:', err);
// 重置按钮状态
manualBtn.textContent = '开始录音';
manualBtn.style.background = 'linear-gradient(45deg, #2196F3, #03A9F4)';
sendManualBtn.disabled = true;
}
});
sendManualBtn.addEventListener('click', () => {
if (manualChunks.length > 0 && websocket && websocket.readyState === WebSocket.OPEN) {
try {
// 合并所有录音片段
const totalLength = manualChunks.reduce((acc, chunk) => acc + chunk.length, 0);
const fullAudio = new Float32Array(totalLength);
let offset = 0;
for (const chunk of manualChunks) {
fullAudio.set(chunk, offset);
offset += chunk.length;
}
// 转换为 PCM 并发送
const pcm16 = floatTo16BitPCM(fullAudio);
websocket.send(pcm16);
manualStatusDiv.innerText = `📤 发送录音数据 (${fullAudio.length} 个采样点)`;
} catch (err) {
manualStatusDiv.innerText = `❌ 发送数据时出错: ${err.message}`;
console.error('发送录音数据时出错:', err);
}
} else if (!websocket || websocket.readyState !== WebSocket.OPEN) {
manualStatusDiv.innerText = "⚠️ WebSocket未连接请等待连接建立";
} else {
manualStatusDiv.innerText = "⚠️ 没有录音数据可发送";
}
});
// 扬声器识别功能
speakerBtn.addEventListener('click', async () => {
if (!speakerMediaRecorder || speakerMediaRecorder.state === 'inactive') {
try {
speakerStatusDiv.innerHTML = "🔄 正在请求桌面媒体流...";
// 直接使用getDisplayMedia API获取桌面音频流
// 注意:某些浏览器要求必须请求视频才能获取音频
const constraints = {
video: true, // 必须请求视频
audio: true
};
speakerStatusDiv.innerHTML = "🔄 正在请求音频流...";
const stream = await navigator.mediaDevices.getDisplayMedia(constraints);
const audioTracks = stream.getAudioTracks();
const videoTracks = stream.getVideoTracks();
// 停止所有视频轨道,因为我们只关心音频
videoTracks.forEach(track => track.stop());
speakerStatusDiv.innerHTML = "🔄 正在设置音频处理...";
// 检查流中是否包含音频轨道
console.log('音频轨道数量:', audioTracks.length);
console.log('音频轨道详情:', audioTracks);
if (audioTracks.length === 0) {
speakerStatusDiv.innerText = "⚠️ 音频流不包含音频轨道";
stream.getTracks().forEach(track => track.stop());
return;
}
// 设置音频轨道属性以获取更好的音频质量
audioTracks.forEach(track => {
console.log('音频轨道设置:', track);
track.addEventListener('ended', () => {
console.log('音频轨道结束');
if (speakerAudioContext) {
speakerAudioContext.close();
speakerAudioContext = null;
}
speakerBtn.textContent = '开始录制扬声器';
speakerBtn.style.background = 'linear-gradient(45deg, #9C27B0, #E91E63)';
speakerStatusDiv.innerText = "⏹️ 录制已停止";
stopSpeakerBtn.disabled = true;
speakerChunks = [];
});
});
// 确保之前的音频上下文已关闭
if (speakerAudioContext) {
speakerAudioContext.close();
speakerAudioContext = null;
}
speakerAudioContext = new (window.AudioContext || window.webkitAudioContext)({
sampleRate: 16000 // 设置采样率以匹配语音识别需求
});
console.log('创建音频上下文:', speakerAudioContext);
const sourceNode = speakerAudioContext.createMediaStreamSource(stream);
console.log('创建媒体流源:', sourceNode);
// 使用 AudioWorklet 替代已弃用的 ScriptProcessorNode
if (speakerAudioContext.audioWorklet) {
try {
// 创建一个简单的处理器来收集音频数据
const processorScript = `
class AudioCollector extends AudioWorkletProcessor {
process(inputs, outputs, parameters) {
const input = inputs[0];
if (input && input[0]) {
this.port.postMessage(input[0]);
}
return true;
}
}
registerProcessor('audio-collector', AudioCollector);
`;
const blob = new Blob([processorScript], {type: 'application/javascript'});
const url = URL.createObjectURL(blob);
await speakerAudioContext.audioWorklet.addModule(url);
const processor = new AudioWorkletNode(speakerAudioContext, 'audio-collector');
processor.port.onmessage = (e) => {
try {
speakerChunks.push(new Float32Array(e.data));
// 实时显示缓冲区大小
if (speakerChunks.length % 10 === 0) {
speakerStatusDiv.innerHTML = `<span class='recording-indicator'></span>🎤 正在录制扬声器... (缓冲区: ${speakerChunks.length})`;
}
} catch (err) {
console.error('处理音频数据时出错:', err);
}
};
sourceNode.connect(processor);
processor.connect(speakerAudioContext.destination);
} catch (workletErr) {
console.warn('AudioWorklet 不可用,回退到 ScriptProcessorNode:', workletErr);
// 回退到 ScriptProcessorNode
const processor = speakerAudioContext.createScriptProcessor(4096, 1, 1);
console.log('创建处理器:', processor);
sourceNode.connect(processor);
processor.connect(speakerAudioContext.destination);
speakerChunks = [];
processor.onaudioprocess = (e) => {
try {
const inputData = e.inputBuffer.getChannelData(0);
speakerChunks.push(new Float32Array(inputData));
// 实时显示缓冲区大小
if (speakerChunks.length % 10 === 0) {
speakerStatusDiv.innerHTML = `<span class='recording-indicator'></span>🎤 正在录制扬声器... (缓冲区: ${speakerChunks.length})`;
}
} catch (err) {
console.error('处理音频数据时出错:', err);
}
};
}
} else {
// 浏览器不支持 AudioWorklet使用 ScriptProcessorNode
const processor = speakerAudioContext.createScriptProcessor(4096, 1, 1);
console.log('创建处理器:', processor);
sourceNode.connect(processor);
processor.connect(speakerAudioContext.destination);
speakerChunks = [];
processor.onaudioprocess = (e) => {
try {
const inputData = e.inputBuffer.getChannelData(0);
speakerChunks.push(new Float32Array(inputData));
// 实时显示缓冲区大小
if (speakerChunks.length % 10 === 0) {
speakerStatusDiv.innerHTML = `<span class='recording-indicator'></span>🎤 正在录制扬声器... (缓冲区: ${speakerChunks.length})`;
}
} catch (err) {
console.error('处理音频数据时出错:', err);
}
};
}
speakerBtn.textContent = '停止录制';
speakerBtn.style.background = 'linear-gradient(45deg, #f44336, #e91e63)';
stopSpeakerBtn.disabled = false;
speakerStatusDiv.innerHTML = "<span class='recording-indicator'></span>🎤 正在录制扬声器...";
// 存储流引用以便在需要时停止
window.speakerStream = stream;
} catch (err) {
console.error('获取扬声器完整错误:', err);
if (err.name === 'NotSupportedError') {
speakerStatusDiv.innerHTML = "❌ 系统不支持扬声器录制功能<br/>" +
"请尝试以下解决方案:<br/>" +
"1. 确保在屏幕共享时选择包含音频的选项<br/>" +
"2. 检查系统音频设置和权限<br/>" +
"3. 在 Windows 系统中,确保已启用立体声混音设备";
} else if (err.name === 'NotAllowedError' || err.name === 'PermissionDeniedError') {
speakerStatusDiv.innerText = "❌ 用户拒绝了屏幕共享或音频访问权限";
} else if (err.name === 'NotFoundError' || err.name === 'OverconstrainedError') {
speakerStatusDiv.innerText = "❌ 未找到可用的音频输入设备";
} else if (err.message && err.message.includes('Error starting capture')) {
speakerStatusDiv.innerHTML = "❌ 启动捕获失败<br/>" +
"请尝试以下解决方案:<br/>" +
"1. 重新点击按钮再次尝试<br/>" +
"2. 重启应用程序<br/>" +
"3. 检查系统音频驱动程序<br/>" +
"4. 确保没有其他应用程序正在使用音频设备";
} else {
speakerStatusDiv.innerHTML = `❌ 获取扬声器失败: ${err.message || err.name}<br/>` +
"请尝试以下解决方案:<br/>" +
"1. 确保使用最新版本的 Electron<br/>" +
"2. 检查应用权限设置<br/>" +
"3. 重启应用程序";
}
}
} else {
// 停止录制(当点击"停止录制"按钮时)
// 停止音频上下文
if (speakerAudioContext) {
speakerAudioContext.close();
speakerAudioContext = null;
}
// 停止所有轨道
if (window.speakerStream) {
window.speakerStream.getTracks().forEach(track => {
track.stop();
});
window.speakerStream = null;
}
speakerBtn.textContent = '开始录制扬声器';
speakerBtn.style.background = 'linear-gradient(45deg, #9C27B0, #E91E63)';
speakerStatusDiv.innerText = "⏹️ 录制已停止";
stopSpeakerBtn.disabled = true;
speakerChunks = [];
}
});
// 假设使用1秒作为发送间隔
const sendInterval = setInterval(() => {
if (speakerChunks.length > 0 && websocket.readyState === WebSocket.OPEN) {
// 合并所有录音片段
const totalLength = speakerChunks.reduce((acc, chunk) => acc + chunk.length, 0);
const fullAudio = new Float32Array(totalLength);
let offset = 0;
for (const chunk of speakerChunks) {
fullAudio.set(chunk, offset);
offset += chunk.length;
}
// 转换为 PCM 并发送
const pcm16 = floatTo16BitPCM(fullAudio);
websocket.send(pcm16);
speakerStatusDiv.innerText = `📤 发送扬声器数据 (${fullAudio.length} 个采样点)`;
// 清空已发送的chunks
speakerChunks = [];
}
}, 3000); // 每秒发送一次
stopSpeakerBtn.addEventListener('click', () => {
clearInterval(sendInterval);
// 发送类型标识,告诉服务器这是扬声器识别数据
if (speakerChunks && speakerChunks.length > 0 && websocket && websocket.readyState === WebSocket.OPEN) {
// 合并所有录音片段
const totalLength = speakerChunks.reduce((acc, chunk) => acc + chunk.length, 0);
const fullAudio = new Float32Array(totalLength);
let offset = 0;
for (const chunk of speakerChunks) {
fullAudio.set(chunk, offset);
offset += chunk.length;
}
// 转换为 PCM 并发送
const pcm16 = floatTo16BitPCM(fullAudio);
websocket.send(pcm16);
speakerStatusDiv.innerText = `📤 发送扬声器数据 (${fullAudio.length} 个采样点)`;
} else if (!speakerChunks || speakerChunks.length === 0) {
speakerStatusDiv.innerText = "⚠️ 没有录制到音频数据";
} else {
speakerStatusDiv.innerText = "⚠️ 没有录制数据可发送或WebSocket未连接";
}
// 真正停止录制
// 停止音频上下文
if (speakerAudioContext) {
speakerAudioContext.close();
speakerAudioContext = null;
}
// 停止所有轨道
if (window.speakerStream) {
window.speakerStream.getTracks().forEach(track => {
track.stop();
});
window.speakerStream = null;
}
// 重置按钮和状态
speakerBtn.textContent = '开始录制扬声器';
speakerBtn.style.background = 'linear-gradient(45deg, #9C27B0, #E91E63)';
speakerStatusDiv.innerText = "⏹️ 录制已停止";
stopSpeakerBtn.disabled = true;
speakerChunks = [];
});
// 启动
window.onload = () => {
initVAD();
connectWebSocket();
};
</script>
</body>
</html>