Files
PythonProject/asr_api.py
lingxiao865 f5c4158fc1 first commit
2025-09-18 17:50:03 +08:00

114 lines
3.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import numpy as np
import torch
# server.py
from fastapi import FastAPI, WebSocket
from fastapi import Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import HTMLResponse
from funasr import AutoModel
from funasr.utils.postprocess_utils import rich_transcription_postprocess
# 初始化FastAPI应用
app = FastAPI(title="ASR API", description="语音识别API", version="1.0.0")
# 添加跨域支持
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
model_dir = "iic/SenseVoiceSmall"
# 检查是否有可用的CUDA设备
device = "cuda" if torch.cuda.is_available() else "cpu"
model = AutoModel(
model=model_dir,
trust_remote_code=True,
vad_model="fsmn-vad",
vad_kwargs={"max_single_segment_time": 30000},
device=device, # 根据实际环境选择设备
language="auto", # 自动检测语言
use_itn=True, # 启用逆文本归一化
)
def render_template(template_name: str, context: dict = None):
"""简化模板渲染函数避免依赖Jinja2"""
context = context or {}
if template_name == "3.html":
# 直接返回静态HTML内容
with open("3.html", "r", encoding="utf-8") as f:
content = f.read()
return HTMLResponse(content=content)
return HTMLResponse(content="Template not found")
def bytes_to_float_array(audio_bytes: bytes) -> np.ndarray:
"""Convert 16-bit PCM bytes to float32 array normalized to [-1, 1]"""
# 从 bytes 创建 int16 数组
audio_np = np.frombuffer(audio_bytes, dtype=np.int16)
# 转为 float32 并归一化
return audio_np.astype(np.float32) / 32768.0
def convert_audio_data(audio_data: bytes) -> np.ndarray:
# 首先尝试作为 PCM 数据处理
try:
if len(audio_data) % 2 == 0 and len(audio_data) > 0: # 16-bit数据长度应该是2的倍数
audio_array = bytes_to_float_array(audio_data)
# 确保数据是float32类型
return audio_array.astype(np.float32)
except Exception as e:
print(f"⚠️ 16-bit PCM处理失败: {e}")
return np.array([], dtype=np.float32)
# chatbot = QwenChatbot()
@app.websocket("/ws/asr")
async def websocket_asr(websocket: WebSocket):
# 绕过origin检查解决403 Forbidden问题
await websocket.accept()
while True:
# 🔁 接收前端发送的音频数据
audio_data = await websocket.receive_bytes()
# 转换音频数据为模型可处理的格式
audio_array = convert_audio_data(audio_data)
# 确保数据是正确的格式
if audio_array.dtype != np.float32:
audio_array = audio_array.astype(np.float32)
# ✅ 使用 SenseVoice 推理
result = model.generate(
audio_array,
sampling_rate=16000,
language="auto", # 自动检测语言
use_itn=True, # 启用逆文本归一化
batch_size_s=60
)
text = rich_transcription_postprocess(result[0]["text"])
# print(chatbot.generate_response(text))
print(f"🔍 Raw result type: {result}, content: {text}")
await websocket.send_text(text)
@app.get("/", response_class=HTMLResponse)
async def index(request: Request):
"""
主页提供Web界面
"""
return render_template("3.html", {"request": request})
if __name__ == '__main__':
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000, log_level="info")