first commit

This commit is contained in:
lingxiao865
2025-09-18 17:50:03 +08:00
parent 4c3c83092c
commit f5c4158fc1
16 changed files with 31819 additions and 0 deletions

113
asr_api.py Normal file
View File

@@ -0,0 +1,113 @@
import numpy as np
import torch
# server.py
from fastapi import FastAPI, WebSocket
from fastapi import Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import HTMLResponse
from funasr import AutoModel
from funasr.utils.postprocess_utils import rich_transcription_postprocess
# 初始化FastAPI应用
app = FastAPI(title="ASR API", description="语音识别API", version="1.0.0")
# 添加跨域支持
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
model_dir = "iic/SenseVoiceSmall"
# 检查是否有可用的CUDA设备
device = "cuda" if torch.cuda.is_available() else "cpu"
model = AutoModel(
model=model_dir,
trust_remote_code=True,
vad_model="fsmn-vad",
vad_kwargs={"max_single_segment_time": 30000},
device=device, # 根据实际环境选择设备
language="auto", # 自动检测语言
use_itn=True, # 启用逆文本归一化
)
def render_template(template_name: str, context: dict = None):
"""简化模板渲染函数避免依赖Jinja2"""
context = context or {}
if template_name == "3.html":
# 直接返回静态HTML内容
with open("3.html", "r", encoding="utf-8") as f:
content = f.read()
return HTMLResponse(content=content)
return HTMLResponse(content="Template not found")
def bytes_to_float_array(audio_bytes: bytes) -> np.ndarray:
"""Convert 16-bit PCM bytes to float32 array normalized to [-1, 1]"""
# 从 bytes 创建 int16 数组
audio_np = np.frombuffer(audio_bytes, dtype=np.int16)
# 转为 float32 并归一化
return audio_np.astype(np.float32) / 32768.0
def convert_audio_data(audio_data: bytes) -> np.ndarray:
# 首先尝试作为 PCM 数据处理
try:
if len(audio_data) % 2 == 0 and len(audio_data) > 0: # 16-bit数据长度应该是2的倍数
audio_array = bytes_to_float_array(audio_data)
# 确保数据是float32类型
return audio_array.astype(np.float32)
except Exception as e:
print(f"⚠️ 16-bit PCM处理失败: {e}")
return np.array([], dtype=np.float32)
# chatbot = QwenChatbot()
@app.websocket("/ws/asr")
async def websocket_asr(websocket: WebSocket):
# 绕过origin检查解决403 Forbidden问题
await websocket.accept()
while True:
# 🔁 接收前端发送的音频数据
audio_data = await websocket.receive_bytes()
# 转换音频数据为模型可处理的格式
audio_array = convert_audio_data(audio_data)
# 确保数据是正确的格式
if audio_array.dtype != np.float32:
audio_array = audio_array.astype(np.float32)
# ✅ 使用 SenseVoice 推理
result = model.generate(
audio_array,
sampling_rate=16000,
language="auto", # 自动检测语言
use_itn=True, # 启用逆文本归一化
batch_size_s=60
)
text = rich_transcription_postprocess(result[0]["text"])
# print(chatbot.generate_response(text))
print(f"🔍 Raw result type: {result}, content: {text}")
await websocket.send_text(text)
@app.get("/", response_class=HTMLResponse)
async def index(request: Request):
"""
主页提供Web界面
"""
return render_template("3.html", {"request": request})
if __name__ == '__main__':
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000, log_level="info")