SD-SafeAI
/
shudao-main


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
							import asyncio

from fastapi import APIRouter, File, Form, Request, UploadFile
from fastapi.responses import JSONResponse, Response
from pydantic import BaseModel

from services.tencent_speech_service import tencent_speech_service
from utils.logger import logger


router = APIRouter(prefix="/speech")


class SpeechSynthesizeRequest(BaseModel):
    text: str = ""
    voice_type: int | None = None
    speed: float | None = None
    volume: float | None = None


def _speech_error_response(message: str, exc: Exception) -> JSONResponse:
    detail = str(exc)
    status_code = 503 if "PkgExhausted" in detail else 500
    return JSONResponse(
        status_code=status_code,
        content={
            "statusCode": status_code,
            "msg": f"{message}: {detail}",
        },
    )


@router.post("/transcribe")
async def transcribe_audio(
    request: Request,
    file: UploadFile = File(...),
    user_id: str = Form(""),
):
    user = request.state.user
    if not user:
        return JSONResponse(status_code=401, content={"statusCode": 401, "msg": "未授权"})

    if not file or not file.filename:
        return JSONResponse(status_code=400, content={"statusCode": 400, "msg": "缺少音频文件"})

    try:
        result = await asyncio.to_thread(
            tencent_speech_service.transcribe_file,
            file,
            user_id or str(getattr(user, "user_id", "")),
        )
        return {
            "statusCode": 200,
            "msg": "success",
            "data": {
                "text": result["text"],
                "request_id": result.get("request_id", ""),
            },
        }
    except ValueError as exc:
        return JSONResponse(status_code=400, content={"statusCode": 400, "msg": str(exc)})
    except Exception as exc:
        logger.error("[speech] transcribe failed: %s", exc)
        return _speech_error_response("语音转文字失败", exc)


@router.post("/synthesize")
async def synthesize_speech(request: Request, data: SpeechSynthesizeRequest):
    user = request.state.user
    if not user:
        return JSONResponse(status_code=401, content={"statusCode": 401, "msg": "未授权"})

    normalized_text = (data.text or "").strip()
    if not normalized_text:
        return JSONResponse(status_code=400, content={"statusCode": 400, "msg": "播报文本不能为空"})

    try:
        result = await asyncio.to_thread(
            tencent_speech_service.synthesize_text,
            normalized_text,
            voice_type=data.voice_type,
            speed=data.speed,
            volume=data.volume,
        )
        headers = {
            "X-Speech-Request-Id": result.get("request_id", ""),
            "Cache-Control": "no-store",
        }
        return Response(
            content=result["audio_bytes"],
            media_type=result["content_type"],
            headers=headers,
        )
    except ValueError as exc:
        return JSONResponse(status_code=400, content={"statusCode": 400, "msg": str(exc)})
    except Exception as exc:
        logger.error("[speech] synthesize failed: %s", exc)
        return _speech_error_response("语音播报失败", exc)