| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798 |
- import asyncio
- from fastapi import APIRouter, File, Form, Request, UploadFile
- from fastapi.responses import JSONResponse, Response
- from pydantic import BaseModel
- from services.tencent_speech_service import tencent_speech_service
- from utils.logger import logger
- router = APIRouter(prefix="/speech")
- class SpeechSynthesizeRequest(BaseModel):
- text: str = ""
- voice_type: int | None = None
- speed: float | None = None
- volume: float | None = None
- def _speech_error_response(message: str, exc: Exception) -> JSONResponse:
- detail = str(exc)
- status_code = 503 if "PkgExhausted" in detail else 500
- return JSONResponse(
- status_code=status_code,
- content={
- "statusCode": status_code,
- "msg": f"{message}: {detail}",
- },
- )
- @router.post("/transcribe")
- async def transcribe_audio(
- request: Request,
- file: UploadFile = File(...),
- user_id: str = Form(""),
- ):
- user = request.state.user
- if not user:
- return JSONResponse(status_code=401, content={"statusCode": 401, "msg": "未授权"})
- if not file or not file.filename:
- return JSONResponse(status_code=400, content={"statusCode": 400, "msg": "缺少音频文件"})
- try:
- result = await asyncio.to_thread(
- tencent_speech_service.transcribe_file,
- file,
- user_id or str(getattr(user, "user_id", "")),
- )
- return {
- "statusCode": 200,
- "msg": "success",
- "data": {
- "text": result["text"],
- "request_id": result.get("request_id", ""),
- },
- }
- except ValueError as exc:
- return JSONResponse(status_code=400, content={"statusCode": 400, "msg": str(exc)})
- except Exception as exc:
- logger.error("[speech] transcribe failed: %s", exc)
- return _speech_error_response("语音转文字失败", exc)
- @router.post("/synthesize")
- async def synthesize_speech(request: Request, data: SpeechSynthesizeRequest):
- user = request.state.user
- if not user:
- return JSONResponse(status_code=401, content={"statusCode": 401, "msg": "未授权"})
- normalized_text = (data.text or "").strip()
- if not normalized_text:
- return JSONResponse(status_code=400, content={"statusCode": 400, "msg": "播报文本不能为空"})
- try:
- result = await asyncio.to_thread(
- tencent_speech_service.synthesize_text,
- normalized_text,
- voice_type=data.voice_type,
- speed=data.speed,
- volume=data.volume,
- )
- headers = {
- "X-Speech-Request-Id": result.get("request_id", ""),
- "Cache-Control": "no-store",
- }
- return Response(
- content=result["audio_bytes"],
- media_type=result["content_type"],
- headers=headers,
- )
- except ValueError as exc:
- return JSONResponse(status_code=400, content={"statusCode": 400, "msg": str(exc)})
- except Exception as exc:
- logger.error("[speech] synthesize failed: %s", exc)
- return _speech_error_response("语音播报失败", exc)
|