Files
PythonProject/speaker.html
lingxiao865 f5c4158fc1 first commit
2025-09-18 17:50:03 +08:00

488 lines
18 KiB
HTML
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html>
<html lang="zh">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>扬声器识别系统</title>
<style>
body {
font-family: Arial, sans-serif;
padding: 20px;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
margin: 0;
color: white;
}
.container {
max-width: 1000px;
margin: 0 auto;
background: rgba(255, 255, 255, 0.1);
padding: 20px;
border-radius: 15px;
backdrop-filter: blur(10px);
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.2);
}
h1 {
text-align: center;
margin-bottom: 20px;
font-size: 2.5em;
text-shadow: 2px 2px 4px rgba(0, 0, 0, 0.3);
}
.controls {
display: flex;
justify-content: center;
gap: 20px;
margin-bottom: 20px;
flex-wrap: wrap;
}
button {
padding: 15px 30px;
font-size: 18px;
border: none;
border-radius: 50px;
cursor: pointer;
transition: all 0.3s ease;
font-weight: bold;
box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2);
}
#speakerBtn {
background: linear-gradient(45deg, #9C27B0, #E91E63);
color: white;
}
#stopSpeakerBtn {
background: linear-gradient(45deg, #f44336, #e91e63);
color: white;
}
#translateBtn {
background: linear-gradient(45deg, #FF9800, #FF5722);
color: white;
}
button:hover:not(:disabled) {
transform: translateY(-3px);
box-shadow: 0 6px 20px rgba(0, 0, 0, 0.3);
}
button:disabled {
opacity: 0.6;
cursor: not-allowed;
}
.status-container {
text-align: center;
margin: 20px 0;
padding: 15px;
background: rgba(255, 255, 255, 0.15);
border-radius: 10px;
font-size: 16px;
}
#speakerStatus {
font-weight: bold;
margin-bottom: 10px;
}
.result-container {
margin: 20px 0;
}
.result-box {
min-height: 120px;
padding: 20px;
background: rgba(0, 0, 0, 0.2);
border-radius: 10px;
font-size: 18px;
line-height: 1.6;
white-space: pre-wrap;
overflow-y: auto;
max-height: 200px;
margin-bottom: 15px;
}
.section-title {
font-size: 1.5em;
margin-bottom: 15px;
text-align: center;
color: #FFD700;
}
.recording-indicator {
display: inline-block;
width: 12px;
height: 12px;
background-color: #f44336;
border-radius: 50%;
margin-right: 10px;
animation: blink 1s infinite;
}
@keyframes blink {
0%, 100% {
opacity: 1;
}
50% {
opacity: 0.3;
}
}
</style>
</head>
<body>
<div class="container">
<h1>🎙️ 扬声器识别系统</h1>
<div class="controls">
<button id="speakerBtn">开始录制扬声器</button>
<button id="stopSpeakerBtn" disabled>停止录制</button>
<button id="translateBtn" disabled>翻译结果</button>
</div>
<div class="status-container">
<div id="speakerStatus">状态:等待录制...</div>
</div>
<div class="result-container">
<h2 class="section-title">识别结果</h2>
<div id="speakerResult" class="result-box">识别结果将显示在这里...</div>
<h2 class="section-title">翻译结果</h2>
<div id="translatedResult" class="result-box">翻译结果将显示在这里...</div>
</div>
</div>
<script>
let speakerAudioContext = null;
let speakerMediaRecorder = null;
let speakerChunks = [];
let websocket = null;
// DOM 元素
const speakerBtn = document.getElementById('speakerBtn');
const stopSpeakerBtn = document.getElementById('stopSpeakerBtn');
const translateBtn = document.getElementById('translateBtn');
const speakerStatusDiv = document.getElementById('speakerStatus');
const speakerResultDiv = document.getElementById('speakerResult');
const translatedResultDiv = document.getElementById('translatedResult');
// Float32 → 16-bit PCM
function floatTo16BitPCM(float32Array) {
const buffer = new ArrayBuffer(float32Array.length * 2);
const view = new DataView(buffer);
for (let i = 0; i < float32Array.length; i++) {
const s = Math.max(-1, Math.min(1, float32Array[i]));
const val = s < 0 ? s * 0x8000 : s * 0x7FFF;
view.setInt16(i * 2, val, true);
}
return buffer;
}
// 连接 WebSocket
function connectWebSocket() {
try {
// 如果已有连接且处于开启状态,则不重新连接
if (websocket && websocket.readyState === WebSocket.OPEN) {
console.log('WebSocket已连接无需重新连接');
speakerStatusDiv.innerText = "🟢 WebSocket 已连接";
return;
}
// 如果连接正在建立中,也不重新连接
if (websocket && websocket.readyState === WebSocket.CONNECTING) {
console.log('WebSocket正在连接中无需重新连接');
return;
}
speakerStatusDiv.innerText = "🔄 正在连接 WebSocket...";
websocket = new WebSocket('ws://localhost:8000/ws/asr');
// 添加连接超时处理
const connectionTimeout = setTimeout(() => {
if (websocket && websocket.readyState === WebSocket.CONNECTING) {
speakerStatusDiv.innerText = "❌ WebSocket 连接超时";
}
}, 10000); // 10秒超时
websocket.onopen = () => {
clearTimeout(connectionTimeout);
speakerStatusDiv.innerText = "🟢 WebSocket 已连接";
};
websocket.onmessage = (e) => {
try {
const data = e.data;
console.log('WebSocket 收到消息:', data);
if (data) {
speakerResultDiv.innerText = data;
translateBtn.disabled = false;
}
} catch (err) {
console.error('处理WebSocket消息时出错:', err);
speakerResultDiv.innerText = e.data;
}
};
websocket.onclose = () => {
clearTimeout(connectionTimeout);
speakerStatusDiv.innerText = "🔌 已断开连接,正在重连...";
setTimeout(connectWebSocket, 3000); // 重连
};
websocket.onerror = (e) => {
clearTimeout(connectionTimeout);
speakerStatusDiv.innerText = "❌ WebSocket 错误";
console.error('WebSocket 错误:', e);
};
} catch (error) {
speakerStatusDiv.innerText = `❌ WebSocket 连接失败: ${error.message}`;
console.error('WebSocket 连接失败:', error);
}
}
// 扬声器识别功能
speakerBtn.addEventListener('click', async () => {
if (!speakerMediaRecorder || speakerMediaRecorder.state === 'inactive') {
try {
speakerStatusDiv.innerHTML = "🔄 正在请求桌面媒体流...";
// 使用getDisplayMedia API获取桌面音频流
const constraints = {
video: true, // 只请求音频
audio: {
echoCancellation: false,
noiseSuppression: false,
autoGainControl: false
}
};
speakerStatusDiv.innerHTML = "🔄 正在请求音频流...";
const stream = await navigator.mediaDevices.getDisplayMedia(constraints);
// 停止所有视频轨道,因为我们只关心音频
const videoTracks = stream.getVideoTracks();
videoTracks.forEach(track => track.stop());
speakerStatusDiv.innerHTML = "🔄 正在设置音频处理...";
// 检查流中是否包含音频轨道
const audioTracks = stream.getAudioTracks();
console.log('音频轨道数量:', audioTracks.length);
console.log('音频轨道详情:', audioTracks);
if (audioTracks.length === 0) {
speakerStatusDiv.innerText = "⚠️ 音频流不包含音频轨道";
stream.getTracks().forEach(track => track.stop());
return;
}
// 设置音频轨道属性
audioTracks.forEach(track => {
console.log('音频轨道设置:', track);
track.addEventListener('ended', () => {
console.log('音频轨道结束');
if (speakerAudioContext) {
speakerAudioContext.close();
speakerAudioContext = null;
}
speakerBtn.textContent = '开始录制扬声器';
speakerBtn.style.background = 'linear-gradient(45deg, #9C27B0, #E91E63)';
speakerStatusDiv.innerText = "⏹️ 录制已停止";
stopSpeakerBtn.disabled = true;
speakerChunks = [];
});
});
// 确保之前的音频上下文已关闭
if (speakerAudioContext) {
speakerAudioContext.close();
speakerAudioContext = null;
}
// 创建新的音频上下文使用48000采样率以匹配大多数系统音频
speakerAudioContext = new (window.AudioContext || window.webkitAudioContext)({
sampleRate: 16000
});
console.log('创建音频上下文:', speakerAudioContext);
const sourceNode = speakerAudioContext.createMediaStreamSource(stream);
console.log('创建媒体流源:', sourceNode);
// 创建处理器来收集音频数据
const processor = speakerAudioContext.createScriptProcessor(4096, 1, 1);
console.log('创建处理器:', processor);
sourceNode.connect(processor);
processor.connect(speakerAudioContext.destination);
speakerChunks = [];
processor.onaudioprocess = (e) => {
try {
const inputData = e.inputBuffer.getChannelData(0);
speakerChunks.push(new Float32Array(inputData));
// 实时显示缓冲区大小
if (speakerChunks.length % 10 === 0) {
speakerStatusDiv.innerHTML = `<span class='recording-indicator'></span>🎤 正在录制扬声器... (缓冲区: ${speakerChunks.length})`;
}
} catch (err) {
console.error('处理音频数据时出错:', err);
}
};
speakerBtn.textContent = '停止录制';
speakerBtn.style.background = 'linear-gradient(45deg, #f44336, #e91e63)';
stopSpeakerBtn.disabled = false;
speakerStatusDiv.innerHTML = "<span class='recording-indicator'></span>🎤 正在录制扬声器...";
// 存储流引用以便在需要时停止
window.speakerStream = stream;
} catch (err) {
console.error('获取扬声器完整错误:', err);
if (err.name === 'NotSupportedError') {
speakerStatusDiv.innerHTML = "❌ 系统不支持扬声器录制功能<br/>" +
"请尝试以下解决方案:<br/>" +
"1. 确保在屏幕共享时选择包含音频的选项<br/>" +
"2. 检查系统音频设置和权限<br/>" +
"3. 在 Windows 系统中,确保已启用立体声混音设备";
} else if (err.name === 'NotAllowedError' || err.name === 'PermissionDeniedError') {
speakerStatusDiv.innerText = "❌ 用户拒绝了屏幕共享或音频访问权限";
} else if (err.name === 'NotFoundError' || err.name === 'OverconstrainedError') {
speakerStatusDiv.innerText = "❌ 未找到可用的音频输入设备";
} else if (err.message && err.message.includes('Error starting capture')) {
speakerStatusDiv.innerHTML = "❌ 启动捕获失败<br/>" +
"请尝试以下解决方案:<br/>" +
"1. 重新点击按钮再次尝试<br/>" +
"2. 重启应用程序<br/>" +
"3. 检查系统音频驱动程序<br/>" +
"4. 确保没有其他应用程序正在使用音频设备";
} else {
speakerStatusDiv.innerHTML = `❌ 获取扬声器失败: ${err.message || err.name}<br/>` +
"请尝试以下解决方案:<br/>" +
"1. 确保使用最新版本的浏览器<br/>" +
"2. 检查应用权限设置<br/>" +
"3. 重启应用程序";
}
}
} else {
// 停止录制
if (speakerAudioContext) {
speakerAudioContext.close();
speakerAudioContext = null;
}
// 停止所有轨道
if (window.speakerStream) {
window.speakerStream.getTracks().forEach(track => {
track.stop();
});
window.speakerStream = null;
}
speakerBtn.textContent = '开始录制扬声器';
speakerBtn.style.background = 'linear-gradient(45deg, #9C27B0, #E91E63)';
speakerStatusDiv.innerText = "⏹️ 录制已停止";
stopSpeakerBtn.disabled = true;
speakerChunks = [];
}
});
// 停止录制并发送数据
stopSpeakerBtn.addEventListener('click', () => {
// 发送类型标识,告诉服务器这是扬声器识别数据
if (speakerChunks && speakerChunks.length > 0 && websocket && websocket.readyState === WebSocket.OPEN) {
// 合并所有录音片段
const totalLength = speakerChunks.reduce((acc, chunk) => acc + chunk.length, 0);
const fullAudio = new Float32Array(totalLength);
let offset = 0;
for (const chunk of speakerChunks) {
fullAudio.set(chunk, offset);
offset += chunk.length;
}
// 转换为 PCM 并发送
const pcm16 = floatTo16BitPCM(fullAudio);
websocket.send(pcm16);
speakerStatusDiv.innerText = `📤 发送扬声器数据 (${fullAudio.length} 个采样点)`;
} else if (!speakerChunks || speakerChunks.length === 0) {
speakerStatusDiv.innerText = "⚠️ 没有录制到音频数据";
} else {
speakerStatusDiv.innerText = "⚠️ 没有录制数据可发送或WebSocket未连接";
}
// 真正停止录制
if (speakerAudioContext) {
speakerAudioContext.close();
speakerAudioContext = null;
}
if (window.speakerStream) {
window.speakerStream.getTracks().forEach(track => {
track.stop();
});
window.speakerStream = null;
}
// 重置按钮和状态
speakerBtn.textContent = '开始录制扬声器';
speakerBtn.style.background = 'linear-gradient(45deg, #9C27B0, #E91E63)';
speakerStatusDiv.innerText = "⏹️ 录制已停止";
stopSpeakerBtn.disabled = true;
speakerChunks = [];
});
// 翻译功能
translateBtn.addEventListener('click', async () => {
if (!speakerResultDiv.innerText || speakerResultDiv.innerText === "识别结果将显示在这里...") {
alert("请先进行语音识别");
return;
}
try {
speakerStatusDiv.innerText = "🔄 正在翻译...";
translateBtn.disabled = true;
// 发送翻译请求到后端
const response = await fetch('http://localhost:8000/translate', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({
text: speakerResultDiv.innerText
})
});
if (response.ok) {
const result = await response.json();
translatedResultDiv.innerText = result.translated_text || "翻译失败";
speakerStatusDiv.innerText = "✅ 翻译完成";
} else {
throw new Error(`翻译服务返回错误: ${response.status}`);
}
} catch (error) {
console.error('翻译失败:', error);
speakerStatusDiv.innerText = `❌ 翻译失败: ${error.message}`;
translatedResultDiv.innerText = "翻译失败,请查看控制台了解详情";
} finally {
translateBtn.disabled = false;
}
});
// 启动
window.onload = () => {
connectWebSocket();
};
</script>
</body>
</html>