speech.py 3.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. import asyncio
  2. from fastapi import APIRouter, File, Form, Request, UploadFile
  3. from fastapi.responses import JSONResponse, Response
  4. from pydantic import BaseModel
  5. from services.tencent_speech_service import tencent_speech_service
  6. from utils.logger import logger
  7. router = APIRouter(prefix="/speech")
  8. class SpeechSynthesizeRequest(BaseModel):
  9. text: str = ""
  10. voice_type: int | None = None
  11. speed: float | None = None
  12. volume: float | None = None
  13. def _speech_error_response(message: str, exc: Exception) -> JSONResponse:
  14. detail = str(exc)
  15. status_code = 503 if "PkgExhausted" in detail else 500
  16. return JSONResponse(
  17. status_code=status_code,
  18. content={
  19. "statusCode": status_code,
  20. "msg": f"{message}: {detail}",
  21. },
  22. )
  23. @router.post("/transcribe")
  24. async def transcribe_audio(
  25. request: Request,
  26. file: UploadFile = File(...),
  27. user_id: str = Form(""),
  28. ):
  29. user = request.state.user
  30. if not user:
  31. return JSONResponse(status_code=401, content={"statusCode": 401, "msg": "未授权"})
  32. if not file or not file.filename:
  33. return JSONResponse(status_code=400, content={"statusCode": 400, "msg": "缺少音频文件"})
  34. try:
  35. result = await asyncio.to_thread(
  36. tencent_speech_service.transcribe_file,
  37. file,
  38. user_id or str(getattr(user, "user_id", "")),
  39. )
  40. return {
  41. "statusCode": 200,
  42. "msg": "success",
  43. "data": {
  44. "text": result["text"],
  45. "request_id": result.get("request_id", ""),
  46. },
  47. }
  48. except ValueError as exc:
  49. return JSONResponse(status_code=400, content={"statusCode": 400, "msg": str(exc)})
  50. except Exception as exc:
  51. logger.error("[speech] transcribe failed: %s", exc)
  52. return _speech_error_response("语音转文字失败", exc)
  53. @router.post("/synthesize")
  54. async def synthesize_speech(request: Request, data: SpeechSynthesizeRequest):
  55. user = request.state.user
  56. if not user:
  57. return JSONResponse(status_code=401, content={"statusCode": 401, "msg": "未授权"})
  58. normalized_text = (data.text or "").strip()
  59. if not normalized_text:
  60. return JSONResponse(status_code=400, content={"statusCode": 400, "msg": "播报文本不能为空"})
  61. try:
  62. result = await asyncio.to_thread(
  63. tencent_speech_service.synthesize_text,
  64. normalized_text,
  65. voice_type=data.voice_type,
  66. speed=data.speed,
  67. volume=data.volume,
  68. )
  69. headers = {
  70. "X-Speech-Request-Id": result.get("request_id", ""),
  71. "Cache-Control": "no-store",
  72. }
  73. return Response(
  74. content=result["audio_bytes"],
  75. media_type=result["content_type"],
  76. headers=headers,
  77. )
  78. except ValueError as exc:
  79. return JSONResponse(status_code=400, content={"statusCode": 400, "msg": str(exc)})
  80. except Exception as exc:
  81. logger.error("[speech] synthesize failed: %s", exc)
  82. return _speech_error_response("语音播报失败", exc)