Files
PythonProject/speaker.html

488 lines
18 KiB
HTML
Raw Permalink Normal View History

2025-09-18 17:50:03 +08:00
<!DOCTYPE html>
<html lang="zh">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>扬声器识别系统</title>
<style>
body {
font-family: Arial, sans-serif;
padding: 20px;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
margin: 0;
color: white;
}
.container {
max-width: 1000px;
margin: 0 auto;
background: rgba(255, 255, 255, 0.1);
padding: 20px;
border-radius: 15px;
backdrop-filter: blur(10px);
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.2);
}
h1 {
text-align: center;
margin-bottom: 20px;
font-size: 2.5em;
text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.3);
}
.controls {
display: flex;
justify-content: center;
gap: 20px;
margin-bottom: 20px;
flex-wrap: wrap;
}
button {
padding: 15px 30px;
font-size: 18px;
border: none;
border-radius: 50px;
cursor: pointer;
transition: all 0.3s ease;
font-weight: bold;
box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2);
}
#speakerBtn {
background: linear-gradient(45deg, #9C27B0, #E91E63);
color: white;
}
#stopSpeakerBtn {
background: linear-gradient(45deg, #f44336, #e91e63);
color: white;
}
#translateBtn {
background: linear-gradient(45deg, #FF9800, #FF5722);
color: white;
}
button:hover:not(:disabled) {
transform: translateY(-3px);
box-shadow: 0 6px 20px rgba(0, 0, 0, 0.3);
}
button:disabled {
opacity: 0.6;
cursor: not-allowed;
}
.status-container {
text-align: center;
margin: 20px 0;
padding: 15px;
background: rgba(255, 255, 255, 0.15);
border-radius: 10px;
font-size: 16px;
}
#speakerStatus {
font-weight: bold;
margin-bottom: 10px;
}
.result-container {
margin: 20px 0;
}
.result-box {
min-height: 120px;
padding: 20px;
background: rgba(0, 0, 0, 0.2);
border-radius: 10px;
font-size: 18px;
line-height: 1.6;
white-space: pre-wrap;
overflow-y: auto;
max-height: 200px;
margin-bottom: 15px;
}
.section-title {
font-size: 1.5em;
margin-bottom: 15px;
text-align: center;
color: #FFD700;
}
.recording-indicator {
display: inline-block;
width: 12px;
height: 12px;
background-color: #f44336;
border-radius: 50%;
margin-right: 10px;
animation: blink 1s infinite;
}
@keyframes blink {
0%, 100% {
opacity: 1;
}
50% {
opacity: 0.3;
}
}
</style>
</head>
<body>
<div class="container">
<h1>🎙️ 扬声器识别系统</h1>
<div class="controls">
<button id="speakerBtn">开始录制扬声器</button>
<button id="stopSpeakerBtn" disabled>停止录制</button>
<button id="translateBtn" disabled>翻译结果</button>
</div>
<div class="status-container">
<div id="speakerStatus">状态:等待录制...</div>
</div>
<div class="result-container">
<h2 class="section-title">识别结果</h2>
<div id="speakerResult" class="result-box">识别结果将显示在这里...</div>
<h2 class="section-title">翻译结果</h2>
<div id="translatedResult" class="result-box">翻译结果将显示在这里...</div>
</div>
</div>
<script>
let speakerAudioContext = null;
let speakerMediaRecorder = null;
let speakerChunks = [];
let websocket = null;
// DOM 元素
const speakerBtn = document.getElementById('speakerBtn');
const stopSpeakerBtn = document.getElementById('stopSpeakerBtn');
const translateBtn = document.getElementById('translateBtn');
const speakerStatusDiv = document.getElementById('speakerStatus');
const speakerResultDiv = document.getElementById('speakerResult');
const translatedResultDiv = document.getElementById('translatedResult');
// Float32 → 16-bit PCM
function floatTo16BitPCM(float32Array) {
const buffer = new ArrayBuffer(float32Array.length * 2);
const view = new DataView(buffer);
for (let i = 0; i < float32Array.length; i++) {
const s = Math.max(-1, Math.min(1, float32Array[i]));
const val = s < 0 ? s * 0x8000 : s * 0x7FFF;
view.setInt16(i * 2, val, true);
}
return buffer;
}
// 连接 WebSocket
function connectWebSocket() {
try {
// 如果已有连接且处于开启状态,则不重新连接
if (websocket && websocket.readyState === WebSocket.OPEN) {
console.log('WebSocket已连接无需重新连接');
speakerStatusDiv.innerText = "🟢 WebSocket 已连接";
return;
}
// 如果连接正在建立中,也不重新连接
if (websocket && websocket.readyState === WebSocket.CONNECTING) {
console.log('WebSocket正在连接中无需重新连接');
return;
}
speakerStatusDiv.innerText = "🔄 正在连接 WebSocket...";
websocket = new WebSocket('ws://localhost:8000/ws/asr');
// 添加连接超时处理
const connectionTimeout = setTimeout(() => {
if (websocket && websocket.readyState === WebSocket.CONNECTING) {
speakerStatusDiv.innerText = "❌ WebSocket 连接超时";
}
}, 10000); // 10秒超时
websocket.onopen = () => {
clearTimeout(connectionTimeout);
speakerStatusDiv.innerText = "🟢 WebSocket 已连接";
};
websocket.onmessage = (e) => {
try {
const data = e.data;
console.log('WebSocket 收到消息:', data);
if (data) {
speakerResultDiv.innerText = data;
translateBtn.disabled = false;
}
} catch (err) {
console.error('处理WebSocket消息时出错:', err);
speakerResultDiv.innerText = e.data;
}
};
websocket.onclose = () => {
clearTimeout(connectionTimeout);
speakerStatusDiv.innerText = "🔌 已断开连接,正在重连...";
setTimeout(connectWebSocket, 3000); // 重连
};
websocket.onerror = (e) => {
clearTimeout(connectionTimeout);
speakerStatusDiv.innerText = "❌ WebSocket 错误";
console.error('WebSocket 错误:', e);
};
} catch (error) {
speakerStatusDiv.innerText = `❌ WebSocket 连接失败: ${error.message}`;
console.error('WebSocket 连接失败:', error);
}
}
// 扬声器识别功能
speakerBtn.addEventListener('click', async () => {
if (!speakerMediaRecorder || speakerMediaRecorder.state === 'inactive') {
try {
speakerStatusDiv.innerHTML = "🔄 正在请求桌面媒体流...";
// 使用getDisplayMedia API获取桌面音频流
const constraints = {
video: true, // 只请求音频
audio: {
echoCancellation: false,
noiseSuppression: false,
autoGainControl: false
}
};
speakerStatusDiv.innerHTML = "🔄 正在请求音频流...";
const stream = await navigator.mediaDevices.getDisplayMedia(constraints);
// 停止所有视频轨道,因为我们只关心音频
const videoTracks = stream.getVideoTracks();
videoTracks.forEach(track => track.stop());
speakerStatusDiv.innerHTML = "🔄 正在设置音频处理...";
// 检查流中是否包含音频轨道
const audioTracks = stream.getAudioTracks();
console.log('音频轨道数量:', audioTracks.length);
console.log('音频轨道详情:', audioTracks);
if (audioTracks.length === 0) {
speakerStatusDiv.innerText = "⚠️ 音频流不包含音频轨道";
stream.getTracks().forEach(track => track.stop());
return;
}
// 设置音频轨道属性
audioTracks.forEach(track => {
console.log('音频轨道设置:', track);
track.addEventListener('ended', () => {
console.log('音频轨道结束');
if (speakerAudioContext) {
speakerAudioContext.close();
speakerAudioContext = null;
}
speakerBtn.textContent = '开始录制扬声器';
speakerBtn.style.background = 'linear-gradient(45deg, #9C27B0, #E91E63)';
speakerStatusDiv.innerText = "⏹️ 录制已停止";
stopSpeakerBtn.disabled = true;
speakerChunks = [];
});
});
// 确保之前的音频上下文已关闭
if (speakerAudioContext) {
speakerAudioContext.close();
speakerAudioContext = null;
}
// 创建新的音频上下文使用48000采样率以匹配大多数系统音频
speakerAudioContext = new (window.AudioContext || window.webkitAudioContext)({
sampleRate: 16000
});
console.log('创建音频上下文:', speakerAudioContext);
const sourceNode = speakerAudioContext.createMediaStreamSource(stream);
console.log('创建媒体流源:', sourceNode);
// 创建处理器来收集音频数据
const processor = speakerAudioContext.createScriptProcessor(4096, 1, 1);
console.log('创建处理器:', processor);
sourceNode.connect(processor);
processor.connect(speakerAudioContext.destination);
speakerChunks = [];
processor.onaudioprocess = (e) => {
try {
const inputData = e.inputBuffer.getChannelData(0);
speakerChunks.push(new Float32Array(inputData));
// 实时显示缓冲区大小
if (speakerChunks.length % 10 === 0) {
speakerStatusDiv.innerHTML = `<span class='recording-indicator'></span>🎤 正在录制扬声器... (缓冲区: ${speakerChunks.length})`;
}
} catch (err) {
console.error('处理音频数据时出错:', err);
}
};
speakerBtn.textContent = '停止录制';
speakerBtn.style.background = 'linear-gradient(45deg, #f44336, #e91e63)';
stopSpeakerBtn.disabled = false;
speakerStatusDiv.innerHTML = "<span class='recording-indicator'></span>🎤 正在录制扬声器...";
// 存储流引用以便在需要时停止
window.speakerStream = stream;
} catch (err) {
console.error('获取扬声器完整错误:', err);
if (err.name === 'NotSupportedError') {
speakerStatusDiv.innerHTML = "❌ 系统不支持扬声器录制功能<br/>" +
"请尝试以下解决方案:<br/>" +
"1. 确保在屏幕共享时选择包含音频的选项<br/>" +
"2. 检查系统音频设置和权限<br/>" +
"3. 在 Windows 系统中,确保已启用立体声混音设备";
} else if (err.name === 'NotAllowedError' || err.name === 'PermissionDeniedError') {
speakerStatusDiv.innerText = "❌ 用户拒绝了屏幕共享或音频访问权限";
} else if (err.name === 'NotFoundError' || err.name === 'OverconstrainedError') {
speakerStatusDiv.innerText = "❌ 未找到可用的音频输入设备";
} else if (err.message && err.message.includes('Error starting capture')) {
speakerStatusDiv.innerHTML = "❌ 启动捕获失败<br/>" +
"请尝试以下解决方案:<br/>" +
"1. 重新点击按钮再次尝试<br/>" +
"2. 重启应用程序<br/>" +
"3. 检查系统音频驱动程序<br/>" +
"4. 确保没有其他应用程序正在使用音频设备";
} else {
speakerStatusDiv.innerHTML = `❌ 获取扬声器失败: ${err.message || err.name}<br/>` +
"请尝试以下解决方案:<br/>" +
"1. 确保使用最新版本的浏览器<br/>" +
"2. 检查应用权限设置<br/>" +
"3. 重启应用程序";
}
}
} else {
// 停止录制
if (speakerAudioContext) {
speakerAudioContext.close();
speakerAudioContext = null;
}
// 停止所有轨道
if (window.speakerStream) {
window.speakerStream.getTracks().forEach(track => {
track.stop();
});
window.speakerStream = null;
}
speakerBtn.textContent = '开始录制扬声器';
speakerBtn.style.background = 'linear-gradient(45deg, #9C27B0, #E91E63)';
speakerStatusDiv.innerText = "⏹️ 录制已停止";
stopSpeakerBtn.disabled = true;
speakerChunks = [];
}
});
// 停止录制并发送数据
stopSpeakerBtn.addEventListener('click', () => {
// 发送类型标识,告诉服务器这是扬声器识别数据
if (speakerChunks && speakerChunks.length > 0 && websocket && websocket.readyState === WebSocket.OPEN) {
// 合并所有录音片段
const totalLength = speakerChunks.reduce((acc, chunk) => acc + chunk.length, 0);
const fullAudio = new Float32Array(totalLength);
let offset = 0;
for (const chunk of speakerChunks) {
fullAudio.set(chunk, offset);
offset += chunk.length;
}
// 转换为 PCM 并发送
const pcm16 = floatTo16BitPCM(fullAudio);
websocket.send(pcm16);
speakerStatusDiv.innerText = `📤 发送扬声器数据 (${fullAudio.length} 个采样点)`;
} else if (!speakerChunks || speakerChunks.length === 0) {
speakerStatusDiv.innerText = "⚠️ 没有录制到音频数据";
} else {
speakerStatusDiv.innerText = "⚠️ 没有录制数据可发送或WebSocket未连接";
}
// 真正停止录制
if (speakerAudioContext) {
speakerAudioContext.close();
speakerAudioContext = null;
}
if (window.speakerStream) {
window.speakerStream.getTracks().forEach(track => {
track.stop();
});
window.speakerStream = null;
}
// 重置按钮和状态
speakerBtn.textContent = '开始录制扬声器';
speakerBtn.style.background = 'linear-gradient(45deg, #9C27B0, #E91E63)';
speakerStatusDiv.innerText = "⏹️ 录制已停止";
stopSpeakerBtn.disabled = true;
speakerChunks = [];
});
// 翻译功能
translateBtn.addEventListener('click', async () => {
if (!speakerResultDiv.innerText || speakerResultDiv.innerText === "识别结果将显示在这里...") {
alert("请先进行语音识别");
return;
}
try {
speakerStatusDiv.innerText = "🔄 正在翻译...";
translateBtn.disabled = true;
// 发送翻译请求到后端
const response = await fetch('http://localhost:8000/translate', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({
text: speakerResultDiv.innerText
})
});
if (response.ok) {
const result = await response.json();
translatedResultDiv.innerText = result.translated_text || "翻译失败";
speakerStatusDiv.innerText = "✅ 翻译完成";
} else {
throw new Error(`翻译服务返回错误: ${response.status}`);
}
} catch (error) {
console.error('翻译失败:', error);
speakerStatusDiv.innerText = `❌ 翻译失败: ${error.message}`;
translatedResultDiv.innerText = "翻译失败,请查看控制台了解详情";
} finally {
translateBtn.disabled = false;
}
});
// 启动
window.onload = () => {
connectWebSocket();
};
</script>
</body>
</html>