import asyncio from fastapi import APIRouter, File, Form, Request, UploadFile from fastapi.responses import JSONResponse, Response from pydantic import BaseModel from services.tencent_speech_service import tencent_speech_service from utils.logger import logger router = APIRouter(prefix="/speech") class SpeechSynthesizeRequest(BaseModel): text: str = "" voice_type: int | None = None speed: float | None = None volume: float | None = None def _speech_error_response(message: str, exc: Exception) -> JSONResponse: detail = str(exc) status_code = 503 if "PkgExhausted" in detail else 500 return JSONResponse( status_code=status_code, content={ "statusCode": status_code, "msg": f"{message}: {detail}", }, ) @router.post("/transcribe") async def transcribe_audio( request: Request, file: UploadFile = File(...), user_id: str = Form(""), ): user = request.state.user if not user: return JSONResponse(status_code=401, content={"statusCode": 401, "msg": "未授权"}) if not file or not file.filename: return JSONResponse(status_code=400, content={"statusCode": 400, "msg": "缺少音频文件"}) try: result = await asyncio.to_thread( tencent_speech_service.transcribe_file, file, user_id or str(getattr(user, "user_id", "")), ) return { "statusCode": 200, "msg": "success", "data": { "text": result["text"], "request_id": result.get("request_id", ""), }, } except ValueError as exc: return JSONResponse(status_code=400, content={"statusCode": 400, "msg": str(exc)}) except Exception as exc: logger.error("[speech] transcribe failed: %s", exc) return _speech_error_response("语音转文字失败", exc) @router.post("/synthesize") async def synthesize_speech(request: Request, data: SpeechSynthesizeRequest): user = request.state.user if not user: return JSONResponse(status_code=401, content={"statusCode": 401, "msg": "未授权"}) normalized_text = (data.text or "").strip() if not normalized_text: return JSONResponse(status_code=400, content={"statusCode": 400, "msg": "播报文本不能为空"}) try: result = await asyncio.to_thread( tencent_speech_service.synthesize_text, normalized_text, voice_type=data.voice_type, speed=data.speed, volume=data.volume, ) headers = { "X-Speech-Request-Id": result.get("request_id", ""), "Cache-Control": "no-store", } return Response( content=result["audio_bytes"], media_type=result["content_type"], headers=headers, ) except ValueError as exc: return JSONResponse(status_code=400, content={"statusCode": 400, "msg": str(exc)}) except Exception as exc: logger.error("[speech] synthesize failed: %s", exc) return _speech_error_response("语音播报失败", exc)