1136 lines
47 KiB
HTML
1136 lines
47 KiB
HTML
<!DOCTYPE html>
|
||
<html lang="zh">
|
||
|
||
<head>
|
||
<meta charset="UTF-8">
|
||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||
<title>SenseVoice + VAD 实时识别</title>
|
||
<style>
|
||
body {
|
||
font-family: Arial, sans-serif;
|
||
padding: 20px;
|
||
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
||
min-height: 100vh;
|
||
margin: 0;
|
||
color: white;
|
||
}
|
||
|
||
.container {
|
||
max-width: 1000px;
|
||
margin: 0 auto;
|
||
background: rgba(255, 255, 255, 0.1);
|
||
padding: 20px;
|
||
border-radius: 15px;
|
||
backdrop-filter: blur(10px);
|
||
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.2);
|
||
}
|
||
|
||
h1 {
|
||
text-align: center;
|
||
margin-bottom: 20px;
|
||
font-size: 2.5em;
|
||
text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.3);
|
||
}
|
||
|
||
/* Tabs 样式 */
|
||
.tabs {
|
||
display: flex;
|
||
flex-direction: column;
|
||
width: 100%;
|
||
}
|
||
|
||
.tab-buttons {
|
||
display: flex;
|
||
border-bottom: 2px solid rgba(255, 255, 255, 0.2);
|
||
margin-bottom: 20px;
|
||
}
|
||
|
||
.tab-button {
|
||
padding: 15px 30px;
|
||
background: rgba(255, 255, 255, 0.1);
|
||
border: none;
|
||
outline: none;
|
||
cursor: pointer;
|
||
transition: all 0.3s ease;
|
||
font-size: 16px;
|
||
font-weight: bold;
|
||
color: rgba(255, 255, 255, 0.7);
|
||
border-radius: 10px 10px 0 0;
|
||
}
|
||
|
||
.tab-button:hover {
|
||
background: rgba(255, 255, 255, 0.2);
|
||
}
|
||
|
||
.tab-button.active {
|
||
background: rgba(255, 255, 255, 0.3);
|
||
color: white;
|
||
}
|
||
|
||
.tab-content {
|
||
display: none;
|
||
padding: 20px;
|
||
background: rgba(255, 255, 255, 0.05);
|
||
border-radius: 0 10px 10px 10px;
|
||
}
|
||
|
||
.tab-content.active {
|
||
display: block;
|
||
animation: fadeIn 0.5s;
|
||
}
|
||
|
||
@keyframes fadeIn {
|
||
from {
|
||
opacity: 0;
|
||
}
|
||
|
||
to {
|
||
opacity: 1;
|
||
}
|
||
}
|
||
|
||
.controls {
|
||
display: flex;
|
||
justify-content: center;
|
||
gap: 20px;
|
||
margin-bottom: 20px;
|
||
flex-wrap: wrap;
|
||
}
|
||
|
||
button {
|
||
padding: 15px 30px;
|
||
font-size: 18px;
|
||
border: none;
|
||
border-radius: 50px;
|
||
cursor: pointer;
|
||
transition: all 0.3s ease;
|
||
font-weight: bold;
|
||
box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2);
|
||
}
|
||
|
||
#startBtn {
|
||
background: linear-gradient(45deg, #4CAF50, #8BC34A);
|
||
color: white;
|
||
}
|
||
|
||
#stopBtn {
|
||
background: linear-gradient(45deg, #f44336, #e91e63);
|
||
color: white;
|
||
}
|
||
|
||
#manualBtn {
|
||
background: linear-gradient(45deg, #2196F3, #03A9F4);
|
||
color: white;
|
||
}
|
||
|
||
#sendManualBtn {
|
||
background: linear-gradient(45deg, #FF9800, #FFC107);
|
||
color: white;
|
||
}
|
||
|
||
#speakerBtn {
|
||
background: linear-gradient(45deg, #9C27B0, #E91E63);
|
||
color: white;
|
||
}
|
||
|
||
#stopSpeakerBtn {
|
||
background: linear-gradient(45deg, #f44336, #e91e63);
|
||
color: white;
|
||
}
|
||
|
||
button:hover:not(:disabled) {
|
||
transform: translateY(-3px);
|
||
box-shadow: 0 6px 20px rgba(0, 0, 0, 0.3);
|
||
}
|
||
|
||
button:disabled {
|
||
opacity: 0.6;
|
||
cursor: not-allowed;
|
||
}
|
||
|
||
.status-container {
|
||
text-align: center;
|
||
margin: 20px 0;
|
||
padding: 15px;
|
||
background: rgba(255, 255, 255, 0.15);
|
||
border-radius: 10px;
|
||
font-size: 16px;
|
||
}
|
||
|
||
#status,
|
||
#manualStatus,
|
||
#speakerStatus {
|
||
font-weight: bold;
|
||
margin-bottom: 10px;
|
||
}
|
||
|
||
.result-container {
|
||
margin: 20px 0;
|
||
}
|
||
|
||
.result-box {
|
||
min-height: 120px;
|
||
padding: 20px;
|
||
background: rgba(0, 0, 0, 0.2);
|
||
border-radius: 10px;
|
||
font-size: 18px;
|
||
line-height: 1.6;
|
||
white-space: pre-wrap;
|
||
overflow-y: auto;
|
||
max-height: 200px;
|
||
margin-bottom: 15px;
|
||
}
|
||
|
||
.info {
|
||
margin-top: 20px;
|
||
padding: 15px;
|
||
background: rgba(255, 255, 255, 0.1);
|
||
border-radius: 10px;
|
||
font-size: 14px;
|
||
}
|
||
|
||
.visualization {
|
||
height: 80px;
|
||
background: rgba(0, 0, 0, 0.1);
|
||
border-radius: 10px;
|
||
margin: 15px 0;
|
||
display: flex;
|
||
align-items: center;
|
||
justify-content: center;
|
||
}
|
||
|
||
.bar {
|
||
width: 8px;
|
||
height: 30px;
|
||
background: linear-gradient(to top, #4CAF50, #81C784);
|
||
margin: 0 2px;
|
||
border-radius: 2px;
|
||
animation: pulse 1s infinite;
|
||
}
|
||
|
||
@keyframes pulse {
|
||
|
||
0%,
|
||
100% {
|
||
height: 30px;
|
||
}
|
||
|
||
50% {
|
||
height: 60px;
|
||
}
|
||
}
|
||
|
||
.section-title {
|
||
font-size: 1.5em;
|
||
margin-bottom: 15px;
|
||
text-align: center;
|
||
color: #FFD700;
|
||
}
|
||
|
||
.recording-indicator {
|
||
display: inline-block;
|
||
width: 12px;
|
||
height: 12px;
|
||
background-color: #f44336;
|
||
border-radius: 50%;
|
||
margin-right: 10px;
|
||
animation: blink 1s infinite;
|
||
}
|
||
|
||
@keyframes blink {
|
||
|
||
0%,
|
||
100% {
|
||
opacity: 1;
|
||
}
|
||
|
||
50% {
|
||
opacity: 0.3;
|
||
}
|
||
}
|
||
</style>
|
||
</head>
|
||
|
||
<body>
|
||
<div class="container">
|
||
<h1>🎙️ SenseVoiceSmall 语音识别</h1>
|
||
|
||
<div class="tabs">
|
||
<!-- Tab 按钮 -->
|
||
<div class="tab-buttons">
|
||
<button class="tab-button active" onclick="openTab(event, 'realtime')">VAD识别</button>
|
||
<button class="tab-button" onclick="openTab(event, 'manual')">手动识别</button>
|
||
<button class="tab-button" onclick="openTab(event, 'speaker')">扬声器识别</button>
|
||
</div>
|
||
|
||
<!-- 实时识别 Tab -->
|
||
<div id="realtime" class="tab-content active">
|
||
<div class="section-title">VAD语音识别</div>
|
||
<div class="controls">
|
||
<button id="startBtn">开始监听</button>
|
||
<button id="stopBtn" disabled>停止监听</button>
|
||
</div>
|
||
|
||
<div class="status-container">
|
||
<div id="status">状态:初始化中...</div>
|
||
<div class="visualization" id="visualization">
|
||
<!-- 音频可视化 -->
|
||
</div>
|
||
</div>
|
||
|
||
<div class="result-container">
|
||
<h2>实时识别结果</h2>
|
||
<div id="result" class="result-box">识别结果将显示在这里...</div>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- 手动识别 Tab -->
|
||
<div id="manual" class="tab-content">
|
||
<div class="section-title">手动语音识别</div>
|
||
<div class="controls">
|
||
<button id="manualBtn">开始录音</button>
|
||
<button id="sendManualBtn" disabled>发送识别</button>
|
||
</div>
|
||
|
||
<div class="status-container">
|
||
<div id="manualStatus">状态:等待录音...</div>
|
||
</div>
|
||
|
||
<div class="result-container">
|
||
<h2>手动识别结果</h2>
|
||
<div id="manualResult" class="result-box">识别结果将显示在这里...</div>
|
||
</div>
|
||
</div>
|
||
|
||
<!-- 扬声器识别 Tab -->
|
||
<div id="speaker" class="tab-content">
|
||
<div class="section-title">扬声器识别</div>
|
||
<div class="controls">
|
||
<button id="speakerBtn">开始录制扬声器</button>
|
||
<button id="stopSpeakerBtn" disabled>停止录制</button>
|
||
</div>
|
||
|
||
<div class="status-container">
|
||
<div id="speakerStatus">状态:等待录制...</div>
|
||
</div>
|
||
|
||
<div class="result-container">
|
||
<h2>扬声器识别结果</h2>
|
||
<div id="speakerResult" class="result-box">识别结果将显示在这里...</div>
|
||
</div>
|
||
</div>
|
||
</div>
|
||
|
||
<div class="info">
|
||
<h3>使用说明</h3>
|
||
<ul>
|
||
<li><strong>实时识别</strong>:自动检测语音并发送到后端进行识别</li>
|
||
<li><strong>手动识别</strong>:点击按钮开始录音,再次点击按钮结束录音并发送识别</li>
|
||
<li><strong>扬声器识别</strong>:录制系统扬声器输出的声音并识别</li>
|
||
<li>所有识别结果将实时显示在对应标签页中</li>
|
||
</ul>
|
||
</div>
|
||
</div>
|
||
|
||
<script src="https://unpkg.com/onnxruntime-web@1.22.0/dist/ort.js"></script>
|
||
<script src="https://unpkg.com/@ricky0123/vad-web@0.0.27/dist/bundle.min.js"></script>
|
||
|
||
<!--<script src="https://unpkg.com/onnxruntime-web@1.14.0/dist/ort.js"></script>-->
|
||
<!--<script src="https://unpkg.com/@ricky0123/vad-web@0.0.26/dist/bundle.min.js"></script>-->
|
||
|
||
<script>
|
||
// Tab 切换功能
|
||
function openTab(evt, tabName) {
|
||
// 隐藏所有 tab 内容
|
||
const tabContents = document.getElementsByClassName("tab-content");
|
||
for (let i = 0; i < tabContents.length; i++) {
|
||
tabContents[i].classList.remove("active");
|
||
}
|
||
|
||
// 移除所有激活按钮的样式
|
||
const tabButtons = document.getElementsByClassName("tab-button");
|
||
for (let i = 0; i < tabButtons.length; i++) {
|
||
tabButtons[i].classList.remove("active");
|
||
}
|
||
|
||
// 显示当前 tab,添加激活样式
|
||
document.getElementById(tabName).classList.add("active");
|
||
evt.currentTarget.classList.add("active");
|
||
}
|
||
|
||
let myvad = null;
|
||
let websocket = null;
|
||
let manualAudioContext = null;
|
||
let manualMediaRecorder = null;
|
||
let manualChunks = [];
|
||
let speakerAudioContext = null;
|
||
let speakerMediaRecorder = null;
|
||
let speakerChunks = [];
|
||
|
||
// DOM 元素
|
||
const statusDiv = document.getElementById('status');
|
||
const resultDiv = document.getElementById('result');
|
||
const startBtn = document.getElementById('startBtn');
|
||
const stopBtn = document.getElementById('stopBtn');
|
||
const manualBtn = document.getElementById('manualBtn');
|
||
const sendManualBtn = document.getElementById('sendManualBtn');
|
||
const manualStatusDiv = document.getElementById('manualStatus');
|
||
const manualResultDiv = document.getElementById('manualResult');
|
||
const speakerBtn = document.getElementById('speakerBtn');
|
||
const stopSpeakerBtn = document.getElementById('stopSpeakerBtn');
|
||
const speakerStatusDiv = document.getElementById('speakerStatus');
|
||
const speakerResultDiv = document.getElementById('speakerResult');
|
||
const visualizationDiv = document.getElementById('visualization');
|
||
let audioBuffer = new Float32Array([]);
|
||
|
||
const BUFFER_THRESHOLD = 1024 * 10; // 缓冲数据达到 5KB 后发送
|
||
let open = false
|
||
|
||
// 创建音频可视化效果
|
||
function createVisualization() {
|
||
visualizationDiv.innerHTML = '';
|
||
for (let i = 0; i < 20; i++) {
|
||
const bar = document.createElement('div');
|
||
bar.className = 'bar';
|
||
bar.style.animationDelay = `${i * 0.1}s`;
|
||
visualizationDiv.appendChild(bar);
|
||
}
|
||
}
|
||
|
||
// Float32 → 16-bit PCM
|
||
function floatTo16BitPCM(float32Array) {
|
||
const buffer = new ArrayBuffer(float32Array.length * 2);
|
||
const view = new DataView(buffer);
|
||
for (let i = 0; i < float32Array.length; i++) {
|
||
const s = Math.max(-1, Math.min(1, float32Array[i]));
|
||
const val = s < 0 ? s * 0x8000 : s * 0x7FFF;
|
||
view.setInt16(i * 2, val, true);
|
||
}
|
||
return buffer;
|
||
}
|
||
|
||
// 初始化 VAD
|
||
async function initVAD() {
|
||
try {
|
||
statusDiv.innerText = "🔄 正在初始化 VAD...";
|
||
createVisualization();
|
||
|
||
// 创建音频上下文
|
||
const audioContext = new (window.AudioContext || window.webkitAudioContext)();
|
||
|
||
myvad = await vad.MicVAD.new({
|
||
model: "v5",
|
||
positiveSpeechThreshold: 0.4,
|
||
negativeSpeechThreshold: 0.4,
|
||
minSpeechFrames: 15,
|
||
preSpeechPadFrames: 30,
|
||
// onFrameProcessed: (probs, frame) => {
|
||
// // const indicatorColor = interpolateInferno(probs.isSpeech / 2)
|
||
// // document.body.style.setProperty("--indicator-color", indicatorColor)
|
||
// if (probs.isSpeech > 0.01 && websocket && websocket.readyState === WebSocket.OPEN) {
|
||
// audioBuffer = new Float32Array([...audioBuffer, ...frame]);
|
||
// open = true;
|
||
// } else {
|
||
// if (open) {
|
||
// const pcm16 = floatTo16BitPCM(audioBuffer);
|
||
// websocket.send(pcm16);
|
||
// audioBuffer = new Float32Array([]);
|
||
// open = false;
|
||
// }
|
||
// }
|
||
// },
|
||
onSpeechStart: () => {
|
||
statusDiv.innerHTML = "<span class='recording-indicator'></span>🟢 检测到语音,正在发送...";
|
||
visualizationDiv.style.opacity = "1";
|
||
},
|
||
|
||
onSpeechEnd: (audio) => {
|
||
if (websocket && websocket.readyState === WebSocket.OPEN) {
|
||
// console.log('Audio:', audio);
|
||
const pcm16 = floatTo16BitPCM(audio);
|
||
websocket.send(pcm16);
|
||
}
|
||
statusDiv.innerHTML = "<span class='recording-indicator'></span>🎤 正在监听中...";
|
||
|
||
visualizationDiv.style.opacity = "0.5";
|
||
|
||
},
|
||
onnxWASMBasePath: "https://unpkg.com/onnxruntime-web@1.22.0/dist/",
|
||
baseAssetPath: "https://unpkg.com/@ricky0123/vad-web@0.0.27/dist/",
|
||
audioContext: audioContext,
|
||
});
|
||
statusDiv.innerText = "✅ VAD 初始化完成,点击【开始监听】";
|
||
} catch (err) {
|
||
statusDiv.innerText = `❌ VAD 加载失败: ${err.message}`;
|
||
console.error('VAD 初始化失败:', err);
|
||
}
|
||
}
|
||
|
||
// 连接 WebSocket
|
||
function connectWebSocket() {
|
||
try {
|
||
// 如果已有连接且处于开启状态,则不重新连接
|
||
if (websocket && websocket.readyState === WebSocket.OPEN) {
|
||
console.log('WebSocket已连接,无需重新连接');
|
||
statusDiv.innerText = "🟢 WebSocket 已连接";
|
||
manualStatusDiv.innerText = "🟢 WebSocket 已连接";
|
||
speakerStatusDiv.innerText = "🟢 WebSocket 已连接";
|
||
return;
|
||
}
|
||
|
||
// 如果连接正在建立中,也不重新连接
|
||
if (websocket && websocket.readyState === WebSocket.CONNECTING) {
|
||
console.log('WebSocket正在连接中,无需重新连接');
|
||
return;
|
||
}
|
||
|
||
statusDiv.innerText = "🔄 正在连接 WebSocket...";
|
||
manualStatusDiv.innerText = "🔄 正在连接 WebSocket...";
|
||
speakerStatusDiv.innerText = "🔄 正在连接 WebSocket...";
|
||
|
||
websocket = new WebSocket('wss://ai.makesong.cn/ws/asr');
|
||
// websocket = new WebSocket('ws://localhost:8000/ws/asr');
|
||
|
||
// 添加连接超时处理
|
||
const connectionTimeout = setTimeout(() => {
|
||
if (websocket && websocket.readyState === WebSocket.CONNECTING) {
|
||
statusDiv.innerText = "❌ WebSocket 连接超时,正在重连...";
|
||
manualStatusDiv.innerText = "❌ WebSocket 连接超时,正在重连...";
|
||
speakerStatusDiv.innerText = "❌ WebSocket 连接超时,正在重连...";
|
||
// 不再主动关闭连接,而是让其自然超时并触发onclose事件
|
||
}
|
||
}, 10000); // 10秒超时
|
||
|
||
websocket.onopen = () => {
|
||
clearTimeout(connectionTimeout);
|
||
statusDiv.innerText = "🟢 WebSocket 已连接";
|
||
manualStatusDiv.innerText = "🟢 WebSocket 已连接";
|
||
speakerStatusDiv.innerText = "🟢 WebSocket 已连接";
|
||
|
||
};
|
||
|
||
websocket.onmessage = (e) => {
|
||
// clearTimeout(connectionTimeout);
|
||
try {
|
||
const data = e.data
|
||
console.log('WebSocket 收到消息:', data);
|
||
|
||
const activeTab = document.querySelector('.tab-content.active').id;
|
||
if (activeTab === 'realtime') {
|
||
if (data) {
|
||
resultDiv.innerText = data || "";
|
||
} else {
|
||
resultDiv.innerText = data || "";
|
||
}
|
||
} else if (activeTab === 'manual') {
|
||
if (data) {
|
||
manualResultDiv.innerText += (manualResultDiv.innerText ? '\n' : '') + (data || "");
|
||
} else if (data) {
|
||
const lines = manualResultDiv.innerText.split('\n');
|
||
if (lines.length > 0) {
|
||
lines[lines.length - 1] = data || "";
|
||
manualResultDiv.innerText = lines.join('\n');
|
||
} else {
|
||
manualResultDiv.innerText = data || "";
|
||
}
|
||
}
|
||
} else if (activeTab === 'speaker') {
|
||
if (data) {
|
||
speakerResultDiv.innerText += (speakerResultDiv.innerText ? '\n' : '') + (data || "");
|
||
} else if (data) {
|
||
const lines = speakerResultDiv.innerText.split('\n');
|
||
if (lines.length > 0) {
|
||
lines[lines.length - 1] = data || "";
|
||
speakerResultDiv.innerText = lines.join('\n');
|
||
} else {
|
||
speakerResultDiv.innerText = data || "";
|
||
}
|
||
}
|
||
} else {
|
||
// 默认显示在实时识别结果中
|
||
if (data) {
|
||
resultDiv.innerText = data || "";
|
||
} else {
|
||
resultDiv.innerText = data || "";
|
||
}
|
||
}
|
||
// 根据消息类型显示在对应的结果框中
|
||
// if (data.type === 'realtime') {
|
||
// if (data.final) {
|
||
// // 如果是最终结果,直接显示
|
||
// resultDiv.innerText = data.text || "";
|
||
// } else {
|
||
// // 如果是中间结果,可以追加显示或者替换显示
|
||
// resultDiv.innerText = data.text || "";
|
||
// }
|
||
// } else if (data.type === 'manual') {
|
||
// if (data.final && data.text) {
|
||
// // 累积显示手动识别结果
|
||
// manualResultDiv.innerText += (manualResultDiv.innerText ? '\n' : '') + (data.text || "");
|
||
// } else if (data.text) {
|
||
// // 实时更新最后一行
|
||
// const lines = manualResultDiv.innerText.split('\n');
|
||
// if (lines.length > 0) {
|
||
// lines[lines.length - 1] = data.text || "";
|
||
// manualResultDiv.innerText = lines.join('\n');
|
||
// } else {
|
||
// manualResultDiv.innerText = data.text || "";
|
||
// }
|
||
// }
|
||
// } else if (data.type === 'speaker') {
|
||
// if (data.final && data.text) {
|
||
// // 累积显示扬声器识别结果
|
||
// speakerResultDiv.innerText += (speakerResultDiv.innerText ? '\n' : '') + (data.text || "");
|
||
// } else if (data.text) {
|
||
// // 实时更新最后一行
|
||
// const lines = speakerResultDiv.innerText.split('\n');
|
||
// if (lines.length > 0) {
|
||
// lines[lines.length - 1] = data.text || "";
|
||
// speakerResultDiv.innerText = lines.join('\n');
|
||
// } else {
|
||
// speakerResultDiv.innerText = data.text || "";
|
||
// }
|
||
// }
|
||
// } else {
|
||
// // 默认根据当前激活的标签页显示在对应的结果框中
|
||
// const activeTab = document.querySelector('.tab-content.active').id;
|
||
// if (activeTab === 'realtime') {
|
||
// if (data.final) {
|
||
// resultDiv.innerText = data.text || "";
|
||
// } else {
|
||
// resultDiv.innerText = data.text || "";
|
||
// }
|
||
// } else if (activeTab === 'manual') {
|
||
// if (data.final && data.text) {
|
||
// manualResultDiv.innerText += (manualResultDiv.innerText ? '\n' : '') + (data.text || "");
|
||
// } else if (data.text) {
|
||
// const lines = manualResultDiv.innerText.split('\n');
|
||
// if (lines.length > 0) {
|
||
// lines[lines.length - 1] = data.text || "";
|
||
// manualResultDiv.innerText = lines.join('\n');
|
||
// } else {
|
||
// manualResultDiv.innerText = data.text || "";
|
||
// }
|
||
// }
|
||
// } else if (activeTab === 'speaker') {
|
||
// if (data.final && data.text) {
|
||
// speakerResultDiv.innerText += (speakerResultDiv.innerText ? '\n' : '') + (data.text || "");
|
||
// } else if (data.text) {
|
||
// const lines = speakerResultDiv.innerText.split('\n');
|
||
// if (lines.length > 0) {
|
||
// lines[lines.length - 1] = data.text || "";
|
||
// speakerResultDiv.innerText = lines.join('\n');
|
||
// } else {
|
||
// speakerResultDiv.innerText = data.text || "";
|
||
// }
|
||
// }
|
||
// } else {
|
||
// // 默认显示在实时识别结果中
|
||
// if (data.final) {
|
||
// resultDiv.innerText = data.text || "";
|
||
// } else {
|
||
// resultDiv.innerText = data.text || "";
|
||
// }
|
||
// }
|
||
// }
|
||
} catch (err) {
|
||
console.error('处理WebSocket消息时出错:', err);
|
||
resultDiv.innerText = e.data;
|
||
}
|
||
};
|
||
|
||
websocket.onclose = () => {
|
||
clearTimeout(connectionTimeout);
|
||
stopHeartbeat();
|
||
statusDiv.innerText = "🔌 已断开连接,正在重连...";
|
||
manualStatusDiv.innerText = "🔌 已断开连接,正在重连...";
|
||
speakerStatusDiv.innerText = "🔌 已断开连接,正在重连...";
|
||
setTimeout(connectWebSocket, 3000); // 重连
|
||
};
|
||
|
||
websocket.onerror = (e) => {
|
||
clearTimeout(connectionTimeout);
|
||
stopHeartbeat();
|
||
statusDiv.innerText = "❌ WebSocket 错误";
|
||
manualStatusDiv.innerText = "❌ WebSocket 错误";
|
||
speakerStatusDiv.innerText = "❌ WebSocket 错误";
|
||
console.error('WebSocket 错误:', e);
|
||
};
|
||
} catch (error) {
|
||
statusDiv.innerText = `❌ WebSocket 连接失败: ${error.message}`;
|
||
manualStatusDiv.innerText = `❌ WebSocket 连接失败: ${error.message}`;
|
||
speakerStatusDiv.innerText = `❌ WebSocket 连接失败: ${error.message}`;
|
||
console.error('WebSocket 连接失败:', error);
|
||
}
|
||
}
|
||
|
||
// 实时识别控制按钮
|
||
startBtn.addEventListener('click', () => {
|
||
if (myvad) {
|
||
try {
|
||
myvad.start();
|
||
startBtn.disabled = true;
|
||
stopBtn.disabled = false;
|
||
statusDiv.innerHTML = "<span class='recording-indicator'></span>🎤 正在监听中...";
|
||
visualizationDiv.style.opacity = "0.5";
|
||
} catch (err) {
|
||
console.error('启动 VAD 失败:', err);
|
||
statusDiv.innerText = `❌ 启动失败: ${err.message}`;
|
||
}
|
||
}
|
||
});
|
||
|
||
stopBtn.addEventListener('click', () => {
|
||
if (myvad) {
|
||
try {
|
||
// 停止 VAD 和音频流
|
||
myvad.pause()
|
||
// 关闭音频流
|
||
// if (myvad.stream) {
|
||
// myvad.stream.getTracks().forEach(track => track.stop());
|
||
// myvad.stream = null;
|
||
// }
|
||
|
||
startBtn.disabled = false;
|
||
stopBtn.disabled = true;
|
||
statusDiv.innerText = "⏸️ 已停止监听";
|
||
visualizationDiv.style.opacity = "0.3";
|
||
} catch (err) {
|
||
console.error('停止 VAD 失败:', err);
|
||
statusDiv.innerText = `❌ 停止失败: ${err.message}`;
|
||
}
|
||
}
|
||
});
|
||
|
||
// 手动识别功能
|
||
manualBtn.addEventListener('click', async () => {
|
||
try {
|
||
if (!manualMediaRecorder || manualMediaRecorder.state === 'inactive') {
|
||
manualStatusDiv.innerHTML = "🔄 正在请求麦克风权限...";
|
||
const stream = await navigator.mediaDevices.getUserMedia({audio: true});
|
||
manualAudioContext = new (window.AudioContext || window.webkitAudioContext)({
|
||
sampleRate: 16000 // 设置采样率以匹配语音识别需求
|
||
});
|
||
|
||
|
||
// 检查音频上下文状态
|
||
if (!manualAudioContext) {
|
||
throw new Error('无法创建音频上下文');
|
||
}
|
||
|
||
const source = manualAudioContext.createMediaStreamSource(stream);
|
||
|
||
// 使用 AudioWorklet 替代已弃用的 ScriptProcessorNode
|
||
if (manualAudioContext.audioWorklet) {
|
||
try {
|
||
// 创建一个简单的处理器来收集音频数据
|
||
const processorScript = `
|
||
class AudioCollector extends AudioWorkletProcessor {
|
||
process(inputs, outputs, parameters) {
|
||
const input = inputs[0];
|
||
if (input && input[0]) {
|
||
this.port.postMessage(input[0]);
|
||
}
|
||
return true;
|
||
}
|
||
}
|
||
|
||
registerProcessor('audio-collector', AudioCollector);
|
||
`;
|
||
|
||
const blob = new Blob([processorScript], {type: 'application/javascript'});
|
||
const url = URL.createObjectURL(blob);
|
||
await manualAudioContext.audioWorklet.addModule(url);
|
||
const processor = new AudioWorkletNode(manualAudioContext, 'audio-collector');
|
||
|
||
processor.port.onmessage = (e) => {
|
||
try {
|
||
manualChunks.push(new Float32Array(e.data));
|
||
} catch (err) {
|
||
console.error('处理音频数据时出错:', err);
|
||
}
|
||
};
|
||
|
||
source.connect(processor);
|
||
processor.connect(manualAudioContext.destination);
|
||
} catch (workletErr) {
|
||
console.warn('AudioWorklet 不可用,回退到 ScriptProcessorNode:', workletErr);
|
||
// 回退到 ScriptProcessorNode
|
||
const processor = manualAudioContext.createScriptProcessor(1024, 1, 1);
|
||
source.connect(processor);
|
||
processor.connect(manualAudioContext.destination);
|
||
|
||
processor.onaudioprocess = (e) => {
|
||
try {
|
||
const inputData = e.inputBuffer.getChannelData(0);
|
||
manualChunks.push(new Float32Array(inputData));
|
||
} catch (err) {
|
||
console.error('处理音频数据时出错:', err);
|
||
}
|
||
};
|
||
}
|
||
} else {
|
||
// 浏览器不支持 AudioWorklet,使用 ScriptProcessorNode
|
||
const processor = manualAudioContext.createScriptProcessor(1024, 1, 1);
|
||
source.connect(processor);
|
||
processor.connect(manualAudioContext.destination);
|
||
|
||
processor.onaudioprocess = (e) => {
|
||
try {
|
||
const inputData = e.inputBuffer.getChannelData(0);
|
||
manualChunks.push(new Float32Array(inputData));
|
||
} catch (err) {
|
||
console.error('处理音频数据时出错:', err);
|
||
}
|
||
};
|
||
}
|
||
|
||
manualChunks = [];
|
||
manualBtn.textContent = '停止录音';
|
||
manualBtn.style.background = 'linear-gradient(45deg, #f44336, #e91e63)';
|
||
sendManualBtn.disabled = false;
|
||
manualStatusDiv.innerHTML = "<span class='recording-indicator'></span>🎤 正在录音...";
|
||
} else {
|
||
try {
|
||
if (manualAudioContext) {
|
||
await manualAudioContext.close();
|
||
}
|
||
} catch (err) {
|
||
console.error('关闭音频上下文时出错:', err);
|
||
}
|
||
manualBtn.textContent = '开始录音';
|
||
manualBtn.style.background = 'linear-gradient(45deg, #2196F3, #03A9F4)';
|
||
manualStatusDiv.innerText = "⏹️ 录音已停止,点击【发送识别】";
|
||
}
|
||
} catch (err) {
|
||
manualStatusDiv.innerText = `❌ 操作失败: ${err.message}`;
|
||
console.error('手动识别操作失败:', err);
|
||
// 重置按钮状态
|
||
manualBtn.textContent = '开始录音';
|
||
manualBtn.style.background = 'linear-gradient(45deg, #2196F3, #03A9F4)';
|
||
sendManualBtn.disabled = true;
|
||
}
|
||
});
|
||
|
||
sendManualBtn.addEventListener('click', () => {
|
||
if (manualChunks.length > 0 && websocket && websocket.readyState === WebSocket.OPEN) {
|
||
try {
|
||
// 合并所有录音片段
|
||
const totalLength = manualChunks.reduce((acc, chunk) => acc + chunk.length, 0);
|
||
const fullAudio = new Float32Array(totalLength);
|
||
let offset = 0;
|
||
|
||
for (const chunk of manualChunks) {
|
||
fullAudio.set(chunk, offset);
|
||
offset += chunk.length;
|
||
}
|
||
|
||
// 转换为 PCM 并发送
|
||
const pcm16 = floatTo16BitPCM(fullAudio);
|
||
websocket.send(pcm16);
|
||
manualStatusDiv.innerText = `📤 发送录音数据 (${fullAudio.length} 个采样点)`;
|
||
} catch (err) {
|
||
manualStatusDiv.innerText = `❌ 发送数据时出错: ${err.message}`;
|
||
console.error('发送录音数据时出错:', err);
|
||
}
|
||
} else if (!websocket || websocket.readyState !== WebSocket.OPEN) {
|
||
manualStatusDiv.innerText = "⚠️ WebSocket未连接,请等待连接建立";
|
||
} else {
|
||
manualStatusDiv.innerText = "⚠️ 没有录音数据可发送";
|
||
}
|
||
});
|
||
|
||
|
||
// 扬声器识别功能
|
||
speakerBtn.addEventListener('click', async () => {
|
||
|
||
|
||
if (!speakerMediaRecorder || speakerMediaRecorder.state === 'inactive') {
|
||
try {
|
||
speakerStatusDiv.innerHTML = "🔄 正在请求桌面媒体流...";
|
||
|
||
// 直接使用getDisplayMedia API获取桌面音频流
|
||
// 注意:某些浏览器要求必须请求视频才能获取音频
|
||
const constraints = {
|
||
video: true, // 必须请求视频
|
||
audio: true
|
||
};
|
||
|
||
speakerStatusDiv.innerHTML = "🔄 正在请求音频流...";
|
||
const stream = await navigator.mediaDevices.getDisplayMedia(constraints);
|
||
const audioTracks = stream.getAudioTracks();
|
||
const videoTracks = stream.getVideoTracks();
|
||
|
||
|
||
// 停止所有视频轨道,因为我们只关心音频
|
||
videoTracks.forEach(track => track.stop());
|
||
|
||
speakerStatusDiv.innerHTML = "🔄 正在设置音频处理...";
|
||
|
||
// 检查流中是否包含音频轨道
|
||
console.log('音频轨道数量:', audioTracks.length);
|
||
console.log('音频轨道详情:', audioTracks);
|
||
|
||
if (audioTracks.length === 0) {
|
||
speakerStatusDiv.innerText = "⚠️ 音频流不包含音频轨道";
|
||
stream.getTracks().forEach(track => track.stop());
|
||
return;
|
||
}
|
||
|
||
// 设置音频轨道属性以获取更好的音频质量
|
||
audioTracks.forEach(track => {
|
||
console.log('音频轨道设置:', track);
|
||
track.addEventListener('ended', () => {
|
||
console.log('音频轨道结束');
|
||
if (speakerAudioContext) {
|
||
speakerAudioContext.close();
|
||
speakerAudioContext = null;
|
||
}
|
||
speakerBtn.textContent = '开始录制扬声器';
|
||
speakerBtn.style.background = 'linear-gradient(45deg, #9C27B0, #E91E63)';
|
||
speakerStatusDiv.innerText = "⏹️ 录制已停止";
|
||
stopSpeakerBtn.disabled = true;
|
||
speakerChunks = [];
|
||
});
|
||
});
|
||
|
||
// 确保之前的音频上下文已关闭
|
||
if (speakerAudioContext) {
|
||
speakerAudioContext.close();
|
||
speakerAudioContext = null;
|
||
}
|
||
|
||
speakerAudioContext = new (window.AudioContext || window.webkitAudioContext)({
|
||
sampleRate: 16000 // 设置采样率以匹配语音识别需求
|
||
});
|
||
|
||
|
||
console.log('创建音频上下文:', speakerAudioContext);
|
||
|
||
const sourceNode = speakerAudioContext.createMediaStreamSource(stream);
|
||
console.log('创建媒体流源:', sourceNode);
|
||
|
||
// 使用 AudioWorklet 替代已弃用的 ScriptProcessorNode
|
||
if (speakerAudioContext.audioWorklet) {
|
||
try {
|
||
// 创建一个简单的处理器来收集音频数据
|
||
const processorScript = `
|
||
class AudioCollector extends AudioWorkletProcessor {
|
||
process(inputs, outputs, parameters) {
|
||
const input = inputs[0];
|
||
if (input && input[0]) {
|
||
this.port.postMessage(input[0]);
|
||
}
|
||
return true;
|
||
}
|
||
}
|
||
|
||
registerProcessor('audio-collector', AudioCollector);
|
||
`;
|
||
|
||
const blob = new Blob([processorScript], {type: 'application/javascript'});
|
||
const url = URL.createObjectURL(blob);
|
||
await speakerAudioContext.audioWorklet.addModule(url);
|
||
const processor = new AudioWorkletNode(speakerAudioContext, 'audio-collector');
|
||
|
||
processor.port.onmessage = (e) => {
|
||
try {
|
||
speakerChunks.push(new Float32Array(e.data));
|
||
// 实时显示缓冲区大小
|
||
if (speakerChunks.length % 10 === 0) {
|
||
speakerStatusDiv.innerHTML = `<span class='recording-indicator'></span>🎤 正在录制扬声器... (缓冲区: ${speakerChunks.length})`;
|
||
}
|
||
} catch (err) {
|
||
console.error('处理音频数据时出错:', err);
|
||
}
|
||
};
|
||
|
||
sourceNode.connect(processor);
|
||
processor.connect(speakerAudioContext.destination);
|
||
} catch (workletErr) {
|
||
console.warn('AudioWorklet 不可用,回退到 ScriptProcessorNode:', workletErr);
|
||
// 回退到 ScriptProcessorNode
|
||
const processor = speakerAudioContext.createScriptProcessor(4096, 1, 1);
|
||
console.log('创建处理器:', processor);
|
||
|
||
sourceNode.connect(processor);
|
||
processor.connect(speakerAudioContext.destination);
|
||
|
||
speakerChunks = [];
|
||
|
||
processor.onaudioprocess = (e) => {
|
||
try {
|
||
const inputData = e.inputBuffer.getChannelData(0);
|
||
speakerChunks.push(new Float32Array(inputData));
|
||
// 实时显示缓冲区大小
|
||
if (speakerChunks.length % 10 === 0) {
|
||
speakerStatusDiv.innerHTML = `<span class='recording-indicator'></span>🎤 正在录制扬声器... (缓冲区: ${speakerChunks.length})`;
|
||
}
|
||
} catch (err) {
|
||
console.error('处理音频数据时出错:', err);
|
||
}
|
||
};
|
||
}
|
||
} else {
|
||
// 浏览器不支持 AudioWorklet,使用 ScriptProcessorNode
|
||
const processor = speakerAudioContext.createScriptProcessor(4096, 1, 1);
|
||
console.log('创建处理器:', processor);
|
||
|
||
sourceNode.connect(processor);
|
||
processor.connect(speakerAudioContext.destination);
|
||
|
||
speakerChunks = [];
|
||
|
||
processor.onaudioprocess = (e) => {
|
||
try {
|
||
const inputData = e.inputBuffer.getChannelData(0);
|
||
speakerChunks.push(new Float32Array(inputData));
|
||
// 实时显示缓冲区大小
|
||
if (speakerChunks.length % 10 === 0) {
|
||
speakerStatusDiv.innerHTML = `<span class='recording-indicator'></span>🎤 正在录制扬声器... (缓冲区: ${speakerChunks.length})`;
|
||
}
|
||
} catch (err) {
|
||
console.error('处理音频数据时出错:', err);
|
||
}
|
||
};
|
||
}
|
||
|
||
speakerBtn.textContent = '停止录制';
|
||
speakerBtn.style.background = 'linear-gradient(45deg, #f44336, #e91e63)';
|
||
stopSpeakerBtn.disabled = false;
|
||
speakerStatusDiv.innerHTML = "<span class='recording-indicator'></span>🎤 正在录制扬声器...";
|
||
|
||
// 存储流引用以便在需要时停止
|
||
window.speakerStream = stream;
|
||
|
||
} catch (err) {
|
||
console.error('获取扬声器完整错误:', err);
|
||
if (err.name === 'NotSupportedError') {
|
||
speakerStatusDiv.innerHTML = "❌ 系统不支持扬声器录制功能<br/>" +
|
||
"请尝试以下解决方案:<br/>" +
|
||
"1. 确保在屏幕共享时选择包含音频的选项<br/>" +
|
||
"2. 检查系统音频设置和权限<br/>" +
|
||
"3. 在 Windows 系统中,确保已启用立体声混音设备";
|
||
} else if (err.name === 'NotAllowedError' || err.name === 'PermissionDeniedError') {
|
||
speakerStatusDiv.innerText = "❌ 用户拒绝了屏幕共享或音频访问权限";
|
||
} else if (err.name === 'NotFoundError' || err.name === 'OverconstrainedError') {
|
||
speakerStatusDiv.innerText = "❌ 未找到可用的音频输入设备";
|
||
} else if (err.message && err.message.includes('Error starting capture')) {
|
||
speakerStatusDiv.innerHTML = "❌ 启动捕获失败<br/>" +
|
||
"请尝试以下解决方案:<br/>" +
|
||
"1. 重新点击按钮再次尝试<br/>" +
|
||
"2. 重启应用程序<br/>" +
|
||
"3. 检查系统音频驱动程序<br/>" +
|
||
"4. 确保没有其他应用程序正在使用音频设备";
|
||
} else {
|
||
speakerStatusDiv.innerHTML = `❌ 获取扬声器失败: ${err.message || err.name}<br/>` +
|
||
"请尝试以下解决方案:<br/>" +
|
||
"1. 确保使用最新版本的 Electron<br/>" +
|
||
"2. 检查应用权限设置<br/>" +
|
||
"3. 重启应用程序";
|
||
}
|
||
}
|
||
} else {
|
||
// 停止录制(当点击"停止录制"按钮时)
|
||
// 停止音频上下文
|
||
if (speakerAudioContext) {
|
||
speakerAudioContext.close();
|
||
speakerAudioContext = null;
|
||
}
|
||
|
||
// 停止所有轨道
|
||
if (window.speakerStream) {
|
||
window.speakerStream.getTracks().forEach(track => {
|
||
track.stop();
|
||
});
|
||
window.speakerStream = null;
|
||
}
|
||
|
||
speakerBtn.textContent = '开始录制扬声器';
|
||
speakerBtn.style.background = 'linear-gradient(45deg, #9C27B0, #E91E63)';
|
||
speakerStatusDiv.innerText = "⏹️ 录制已停止";
|
||
stopSpeakerBtn.disabled = true;
|
||
speakerChunks = [];
|
||
}
|
||
});
|
||
|
||
// 假设使用1秒作为发送间隔
|
||
const sendInterval = setInterval(() => {
|
||
if (speakerChunks.length > 0 && websocket.readyState === WebSocket.OPEN) {
|
||
// 合并所有录音片段
|
||
const totalLength = speakerChunks.reduce((acc, chunk) => acc + chunk.length, 0);
|
||
const fullAudio = new Float32Array(totalLength);
|
||
let offset = 0;
|
||
|
||
for (const chunk of speakerChunks) {
|
||
fullAudio.set(chunk, offset);
|
||
offset += chunk.length;
|
||
}
|
||
|
||
// 转换为 PCM 并发送
|
||
const pcm16 = floatTo16BitPCM(fullAudio);
|
||
websocket.send(pcm16);
|
||
speakerStatusDiv.innerText = `📤 发送扬声器数据 (${fullAudio.length} 个采样点)`;
|
||
|
||
// 清空已发送的chunks
|
||
speakerChunks = [];
|
||
}
|
||
}, 3000); // 每秒发送一次
|
||
|
||
stopSpeakerBtn.addEventListener('click', () => {
|
||
clearInterval(sendInterval);
|
||
|
||
// 发送类型标识,告诉服务器这是扬声器识别数据
|
||
if (speakerChunks && speakerChunks.length > 0 && websocket && websocket.readyState === WebSocket.OPEN) {
|
||
// 合并所有录音片段
|
||
const totalLength = speakerChunks.reduce((acc, chunk) => acc + chunk.length, 0);
|
||
const fullAudio = new Float32Array(totalLength);
|
||
let offset = 0;
|
||
|
||
for (const chunk of speakerChunks) {
|
||
fullAudio.set(chunk, offset);
|
||
offset += chunk.length;
|
||
}
|
||
|
||
// 转换为 PCM 并发送
|
||
const pcm16 = floatTo16BitPCM(fullAudio);
|
||
websocket.send(pcm16);
|
||
speakerStatusDiv.innerText = `📤 发送扬声器数据 (${fullAudio.length} 个采样点)`;
|
||
} else if (!speakerChunks || speakerChunks.length === 0) {
|
||
speakerStatusDiv.innerText = "⚠️ 没有录制到音频数据";
|
||
} else {
|
||
speakerStatusDiv.innerText = "⚠️ 没有录制数据可发送或WebSocket未连接";
|
||
}
|
||
|
||
// 真正停止录制
|
||
// 停止音频上下文
|
||
if (speakerAudioContext) {
|
||
speakerAudioContext.close();
|
||
speakerAudioContext = null;
|
||
}
|
||
|
||
// 停止所有轨道
|
||
if (window.speakerStream) {
|
||
window.speakerStream.getTracks().forEach(track => {
|
||
track.stop();
|
||
});
|
||
window.speakerStream = null;
|
||
}
|
||
|
||
// 重置按钮和状态
|
||
speakerBtn.textContent = '开始录制扬声器';
|
||
speakerBtn.style.background = 'linear-gradient(45deg, #9C27B0, #E91E63)';
|
||
speakerStatusDiv.innerText = "⏹️ 录制已停止";
|
||
stopSpeakerBtn.disabled = true;
|
||
speakerChunks = [];
|
||
});
|
||
|
||
// 启动
|
||
window.onload = () => {
|
||
initVAD();
|
||
connectWebSocket();
|
||
};
|
||
</script>
|
||
</body>
|
||
|
||
</html> |