audio_router.py 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939
  1. """
  2. AI语音API路由
  3. 提供语音合成(TTS)、语音识别(ASR)、声音复刻和音色管理的RESTful API端点
  4. 需求: 6.1-6.13, 7.1, 8.1-8.7
  5. """
  6. from datetime import datetime
  7. from typing import List, Optional
  8. from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Form, Query
  9. from fastapi.responses import StreamingResponse
  10. from sqlalchemy.orm import Session
  11. from app.database import get_db, SessionLocal
  12. from app.models.user import User
  13. from app.middleware import get_current_user_from_request
  14. from app.schemas.model_schema import ApiResponse
  15. from app.schemas.audio_schema import (
  16. # TTS相关
  17. TTSRequest, TTSResponse, LongTTSResponse, TTSModelResponse,
  18. # ASR相关
  19. ASRRequest, ASRResponse, TranscribeRequest, TaskResponse, ASRModelResponse,
  20. # 声音复刻相关
  21. VoiceCreateRequest, VoiceUpdateRequest, VoiceResponse, VoiceListResponse,
  22. # 系统音色相关
  23. SystemVoiceResponse,
  24. # 创作历史相关
  25. AudioHistoryItem, AudioHistoryListResponse, UpdateAudioNameRequest,
  26. )
  27. from app.models.audio import AudioSynthesis, ASRTask, ASRRecognition
  28. from app.services.tts_service import TTSService
  29. from app.services.asr_service import ASRService
  30. from app.services.voice_clone_service import VoiceCloneService
  31. from app.services.system_voice_service import SystemVoiceService
  32. from app.services.oss_service import get_oss_service
  33. from app.services.system_config_manager import get_config_int
  34. router = APIRouter(prefix="/api/audio", tags=["AI语音"])
  35. # ==================== TTS端点 ====================
  36. @router.get("/tts/models", response_model=ApiResponse[List[TTSModelResponse]])
  37. def get_tts_models(
  38. db: Session = Depends(get_db),
  39. current_user: User = Depends(get_current_user_from_request)
  40. ):
  41. """
  42. 获取TTS模型列表
  43. 需求: 6.1
  44. """
  45. service = TTSService(db, current_user.id, current_user.apikey)
  46. models = service.get_tts_models()
  47. return ApiResponse(
  48. code=200,
  49. message="success",
  50. data=models
  51. )
  52. @router.post("/tts/synthesize", response_model=ApiResponse[TTSResponse])
  53. async def synthesize_speech(
  54. request: TTSRequest,
  55. db: Session = Depends(get_db),
  56. current_user: User = Depends(get_current_user_from_request)
  57. ):
  58. """
  59. 语音合成(非流式),需要余额检查
  60. 将文本转换为语音,返回OSS上的音频文件URL
  61. 文本长度不超过2000字符
  62. 需求: 6.2
  63. """
  64. if not current_user.apikey:
  65. raise HTTPException(status_code=403, detail="未配置API密钥,请在用户设置中配置apikey")
  66. # 检查文本长度限制
  67. max_chars = get_config_int("max_audio_chars", 5000)
  68. if len(request.text) > max_chars:
  69. raise HTTPException(status_code=400, detail=f"文本长度超过限制(最大{max_chars}字符)")
  70. # 如果请求流式输出,返回流式响应
  71. if request.stream:
  72. from app.services.crypto_utils import get_effective_api_key
  73. effective_key = get_effective_api_key(db, request.model, current_user.apikey)
  74. stream_db = SessionLocal()
  75. async def audio_stream_and_close():
  76. try:
  77. service = TTSService(stream_db, current_user.id, effective_key)
  78. async for chunk in service.synthesize_stream(request):
  79. yield chunk
  80. finally:
  81. stream_db.close()
  82. return StreamingResponse(
  83. audio_stream_and_close(),
  84. media_type=f"audio/{request.format}",
  85. headers={
  86. "Content-Disposition": f"attachment; filename=audio.{request.format}"
  87. }
  88. )
  89. from app.services.crypto_utils import get_effective_api_key
  90. effective_key = get_effective_api_key(db, request.model, current_user.apikey)
  91. service = TTSService(db, current_user.id, effective_key)
  92. result = await service.synthesize(request)
  93. return ApiResponse(
  94. code=200,
  95. message="success",
  96. data=result
  97. )
  98. @router.post("/tts/synthesize-long", response_model=ApiResponse[LongTTSResponse])
  99. async def synthesize_long_speech(
  100. request: TTSRequest,
  101. db: Session = Depends(get_db),
  102. current_user: User = Depends(get_current_user_from_request)
  103. ):
  104. """
  105. 长文本语音合成,需要余额检查
  106. 支持超过2000字符的长文本,自动按句子边界切割并合并
  107. 需求: 6.3
  108. """
  109. if not current_user.apikey:
  110. raise HTTPException(status_code=403, detail="未配置API密钥,请在用户设置中配置apikey")
  111. # 检查文本长度限制(长文本允许更大,最低放宽到20万字符)
  112. max_chars = max(get_config_int("max_audio_chars", 5000) * 10, 200000)
  113. if len(request.text) > max_chars:
  114. raise HTTPException(status_code=400, detail=f"文本长度超过限制(最大{max_chars}字符)")
  115. from app.services.crypto_utils import get_effective_api_key
  116. effective_key = get_effective_api_key(db, request.model, current_user.apikey)
  117. service = TTSService(db, current_user.id, effective_key)
  118. result = await service.synthesize_long(request)
  119. return ApiResponse(
  120. code=200,
  121. message="success",
  122. data=result
  123. )
  124. # ==================== ASR端点 ====================
  125. @router.get("/asr/models", response_model=ApiResponse[List[ASRModelResponse]])
  126. def get_asr_models(
  127. db: Session = Depends(get_db),
  128. current_user: User = Depends(get_current_user_from_request)
  129. ):
  130. """
  131. 获取ASR模型列表
  132. 需求: 6.4
  133. """
  134. service = ASRService(db, current_user.id, current_user.apikey)
  135. models = service.get_asr_models()
  136. return ApiResponse(
  137. code=200,
  138. message="success",
  139. data=models
  140. )
  141. @router.post("/asr/recognize", response_model=ApiResponse[ASRResponse])
  142. async def recognize_speech(
  143. request: ASRRequest,
  144. db: Session = Depends(get_db),
  145. current_user: User = Depends(get_current_user_from_request)
  146. ):
  147. """
  148. 同步语音识别(JSON方式),需要余额检查
  149. 适用于短音频识别,低延迟,实时返回识别结果
  150. 限制:音频文件大小不超过10MB,且时长不超过5分钟
  151. 支持 audio_url 或 audio_base64 方式
  152. 需求: 6.5
  153. """
  154. if not current_user.apikey:
  155. raise HTTPException(status_code=403, detail="未配置API密钥,请在用户设置中配置apikey")
  156. from app.services.crypto_utils import get_effective_api_key
  157. effective_key = get_effective_api_key(db, request.model, current_user.apikey)
  158. service = ASRService(db, current_user.id, effective_key)
  159. result = await service.recognize(request)
  160. return ApiResponse(
  161. code=200,
  162. message="success",
  163. data=result
  164. )
  165. @router.post("/asr/recognize/file", response_model=ApiResponse[ASRResponse])
  166. async def recognize_speech_with_file(
  167. file: UploadFile = File(..., description="音频文件(MP3/WAV/M4A,最大10MB,时长不超过5分钟)"),
  168. model: str = Form(..., description="识别模型:qwen3-asr-flash、qwen-audio-asr"),
  169. language: Optional[str] = Form(default=None, description="指定语种:zh、en、ja、ko等,不指定则自动检测"),
  170. enable_itn: bool = Form(default=False, description="是否启用逆文本标准化(仅中英文)"),
  171. context: Optional[str] = Form(default=None, description="上下文提示,提升特定场景识别准确率"),
  172. db: Session = Depends(get_db),
  173. current_user: User = Depends(get_current_user_from_request),
  174. oss_service = Depends(get_oss_service)
  175. ):
  176. """
  177. 同步语音识别(文件上传方式),需要余额检查
  178. 适用于短音频识别,实时返回识别结果
  179. 限制:音频文件大小不超过10MB,且时长不超过5分钟
  180. 流程:上传文件到OSS -> 获取URL -> 调用识别服务
  181. 需求: 6.5
  182. """
  183. if not current_user.apikey:
  184. raise HTTPException(status_code=403, detail="未配置API密钥,请在用户设置中配置apikey")
  185. # 验证文件大小(最大10MB)
  186. MAX_FILE_SIZE = 10 * 1024 * 1024
  187. file_content = await file.read()
  188. if len(file_content) > MAX_FILE_SIZE:
  189. raise HTTPException(
  190. status_code=400,
  191. detail=f"文件大小超过限制(最大10MB,且时长不超过5分钟),当前文件大小:{len(file_content) / 1024 / 1024:.2f}MB"
  192. )
  193. # 验证文件格式
  194. allowed_extensions = ['.mp3', '.wav', '.m4a']
  195. file_extension = None
  196. if file.filename:
  197. file_extension = '.' + file.filename.split('.')[-1].lower()
  198. if file_extension and file_extension not in allowed_extensions:
  199. raise HTTPException(
  200. status_code=400,
  201. detail=f"不支持的音频格式。支持的格式:{', '.join(allowed_extensions)}"
  202. )
  203. # 上传文件到OSS获取URL
  204. try:
  205. audio_url = oss_service.upload_file(
  206. file_content,
  207. prefix="audio/asr",
  208. original_filename=file.filename
  209. )
  210. except RuntimeError as e:
  211. raise HTTPException(
  212. status_code=500,
  213. detail=f"文件上传失败:{str(e)}"
  214. )
  215. # 构建 ASRRequest 对象
  216. request = ASRRequest(
  217. model=model,
  218. audio_url=audio_url,
  219. language=language,
  220. enable_itn=enable_itn,
  221. context=context
  222. )
  223. # 调用识别服务
  224. from app.services.crypto_utils import get_effective_api_key
  225. effective_key = get_effective_api_key(db, model, current_user.apikey)
  226. service = ASRService(db, current_user.id, effective_key)
  227. result = await service.recognize(request)
  228. return ApiResponse(
  229. code=200,
  230. message="success",
  231. data=result
  232. )
  233. @router.post("/asr/transcribe", response_model=ApiResponse[TaskResponse])
  234. async def transcribe_audio(
  235. request: TranscribeRequest,
  236. db: Session = Depends(get_db),
  237. current_user: User = Depends(get_current_user_from_request)
  238. ):
  239. """
  240. 提交异步转写任务(JSON方式),需要余额检查
  241. 适用于长音频转写,支持最长12小时录音,具备情感识别与句粒度时间戳功能
  242. 限制:音频文件大小不超过2GB,且时长不超过12小时
  243. 需要提供公网可访问的音频文件URL
  244. 需求: 6.6
  245. """
  246. if not current_user.apikey:
  247. raise HTTPException(status_code=403, detail="未配置API密钥,请在用户设置中配置apikey")
  248. from app.services.crypto_utils import get_effective_api_key
  249. effective_key = get_effective_api_key(db, request.model, current_user.apikey)
  250. service = ASRService(db, current_user.id, effective_key)
  251. result = await service.transcribe(request)
  252. return ApiResponse(
  253. code=200,
  254. message="success",
  255. data=result
  256. )
  257. @router.get("/asr/upload-url", response_model=ApiResponse[dict])
  258. def get_asr_upload_url(
  259. filename: str = Query(..., description="原始文件名,用于保留扩展名"),
  260. content_type: Optional[str] = Query(default=None, description="文件 Content-Type"),
  261. current_user: User = Depends(get_current_user_from_request)
  262. ):
  263. """
  264. 获取音频文件直传 OSS 的预签名 PUT URL(用于异步转写)
  265. 前端拿到 upload_url 后,直接 PUT 文件到 OSS,
  266. 上传完成后将 public_url 作为 file_url 传给 /asr/transcribe 接口。
  267. 预签名 URL 有效期 15 分钟(大文件上传留足时间)。
  268. """
  269. oss = get_oss_service()
  270. result = oss.generate_presigned_put_url(
  271. prefix="audio/asr/transcribe",
  272. original_filename=filename,
  273. expires=900,
  274. content_type=content_type or None
  275. )
  276. return ApiResponse(code=200, message="success", data=result)
  277. async def transcribe_audio_with_file(
  278. file: UploadFile = File(..., description="音频文件(MP3/WAV/M4A等,最大2GB,时长不超过12小时)"),
  279. model: str = Form(..., description="识别模型:qwen3-asr-flash-filetrans"),
  280. language: Optional[str] = Form(default=None, description="指定语种:zh、en、ja、ko等,不指定则自动检测"),
  281. enable_itn: bool = Form(default=False, description="是否启用逆文本标准化(仅中英文)"),
  282. context: Optional[str] = Form(default=None, description="上下文提示,提升特定场景识别准确率"),
  283. channel_id: Optional[str] = Form(default="0", description="多音轨文件的音轨索引,逗号分隔,如:0,1"),
  284. db: Session = Depends(get_db),
  285. current_user: User = Depends(get_current_user_from_request),
  286. oss_service = Depends(get_oss_service)
  287. ):
  288. """
  289. 提交异步转写任务(文件上传方式),需要余额检查
  290. 适用于长音频转写,支持最长12小时录音,具备情感识别与句粒度时间戳功能
  291. 限制:音频文件大小不超过2GB,且时长不超过12小时
  292. 流程:上传文件到OSS -> 获取URL -> 提交异步转写任务
  293. 需求: 6.6
  294. """
  295. if not current_user.apikey:
  296. raise HTTPException(status_code=403, detail="未配置API密钥,请在用户设置中配置apikey")
  297. # 验证文件大小(长音频最大2GB)
  298. MAX_FILE_SIZE = 2 * 1024 * 1024 * 1024
  299. file_content = await file.read()
  300. if len(file_content) > MAX_FILE_SIZE:
  301. raise HTTPException(
  302. status_code=400,
  303. detail=f"文件大小超过限制(最大2GB,且时长不超过12小时),当前文件大小:{len(file_content) / 1024 / 1024 / 1024:.2f}GB"
  304. )
  305. # 验证文件格式(支持常见音频格式)
  306. allowed_extensions = ['.mp3', '.wav', '.m4a', '.flac', '.aac', '.ogg', '.opus', '.amr']
  307. file_extension = None
  308. if file.filename:
  309. file_extension = '.' + file.filename.split('.')[-1].lower()
  310. if file_extension and file_extension not in allowed_extensions:
  311. raise HTTPException(
  312. status_code=400,
  313. detail=f"不支持的音频格式。支持的格式:{', '.join(allowed_extensions)}"
  314. )
  315. # 验证模型:查数据库,call_type 为 async 的 STT 模型都允许
  316. from app.services.asr_service import ASRService
  317. asr_svc = ASRService(db, current_user.id, current_user.apikey)
  318. async_models = [m.title for m in asr_svc.get_asr_models() if m.call_type == "async"]
  319. if model not in async_models:
  320. raise HTTPException(
  321. status_code=400,
  322. detail=f"异步转写仅支持模型:{', '.join(async_models) or 'paraformer-v1, paraformer-v2'}"
  323. )
  324. # 解析channel_id
  325. channel_ids = [0] # 默认值
  326. if channel_id:
  327. try:
  328. channel_ids = [int(cid.strip()) for cid in channel_id.split(",") if cid.strip()]
  329. if not channel_ids:
  330. channel_ids = [0]
  331. except ValueError:
  332. raise HTTPException(
  333. status_code=400,
  334. detail="channel_id格式错误,应为逗号分隔的整数,如:0,1"
  335. )
  336. # 上传文件到OSS获取URL
  337. try:
  338. audio_url = oss_service.upload_file(
  339. file_content,
  340. prefix="audio/asr/transcribe",
  341. original_filename=file.filename
  342. )
  343. except RuntimeError as e:
  344. raise HTTPException(
  345. status_code=500,
  346. detail=f"文件上传失败:{str(e)}"
  347. )
  348. # 构建 TranscribeRequest 对象
  349. request = TranscribeRequest(
  350. model=model,
  351. file_url=audio_url,
  352. language=language,
  353. enable_itn=enable_itn,
  354. context=context,
  355. channel_id=channel_ids
  356. )
  357. # 调用转写服务
  358. from app.services.crypto_utils import get_effective_api_key
  359. effective_key = get_effective_api_key(db, model, current_user.apikey)
  360. service = ASRService(db, current_user.id, effective_key)
  361. result = await service.transcribe(request)
  362. return ApiResponse(
  363. code=200,
  364. message="success",
  365. data=result
  366. )
  367. @router.get("/asr/task/{task_id}", response_model=ApiResponse[TaskResponse])
  368. async def get_task_status(
  369. task_id: str,
  370. db: Session = Depends(get_db),
  371. current_user: User = Depends(get_current_user_from_request)
  372. ):
  373. """
  374. 查询转写任务状态
  375. 根据任务ID查询异步转写任务的状态和结果
  376. 需求: 6.7
  377. """
  378. if not current_user.apikey:
  379. raise HTTPException(status_code=403, detail="未配置API密钥,请在用户设置中配置apikey")
  380. # 先查询任务获取 model,以便使用正确的 API Key
  381. from app.models.audio import ASRTask
  382. local_task = db.query(ASRTask).filter(
  383. ASRTask.task_id == task_id,
  384. ASRTask.user_id == current_user.id
  385. ).first()
  386. if not local_task:
  387. raise HTTPException(status_code=404, detail="任务不存在")
  388. # 使用和提交任务时相同的 API Key 获取逻辑
  389. from app.services.crypto_utils import get_effective_api_key
  390. effective_key = get_effective_api_key(db, local_task.model, current_user.apikey)
  391. service = ASRService(db, current_user.id, effective_key)
  392. result = await service.get_task_status(task_id)
  393. return ApiResponse(
  394. code=200,
  395. message="success",
  396. data=result
  397. )
  398. # ==================== 声音复刻端点 ====================
  399. @router.get("/voice/upload-url", response_model=ApiResponse[dict])
  400. def get_voice_upload_url(
  401. filename: str = Query(..., description="原始文件名,用于保留扩展名"),
  402. content_type: Optional[str] = Query(default=None, description="文件 Content-Type"),
  403. current_user: User = Depends(get_current_user_from_request)
  404. ):
  405. """
  406. 获取声音文件直传 OSS 的预签名 PUT URL
  407. 前端拿到 upload_url 后,直接用 PUT 请求上传文件到 OSS,
  408. 上传完成后将 public_url 作为 audio_url 传给 /voice/create 接口。
  409. 预签名 URL 有效期 5 分钟。
  410. """
  411. oss = get_oss_service()
  412. result = oss.generate_presigned_put_url(
  413. prefix="audio/voice",
  414. original_filename=filename,
  415. expires=300,
  416. content_type=content_type or None
  417. )
  418. return ApiResponse(code=200, message="success", data=result)
  419. @router.post("/voice/create", response_model=ApiResponse[VoiceResponse])
  420. async def create_voice(
  421. file: Optional[UploadFile] = File(default=None, description="音频文件(WAV/MP3/M4A,最大10MB)"),
  422. target_model: str = Form(..., description="目标模型:cosyvoice-v3-plus、cosyvoice-v3-flash、cosyvoice-v2"),
  423. prefix: str = Form(..., description="音色名称前缀,仅允许数字、字母和下划线,不超过10字符"),
  424. voice_name: Optional[str] = Form(default=None, description="音色名称(用户输入的中文名称)"),
  425. audio_url: Optional[str] = Form(default=None, description="音频文件URL(与file二选一)"),
  426. language_hints: Optional[str] = Form(default=None, description="语言提示,逗号分隔:en,fr,de,ja,ko,ru"),
  427. db: Session = Depends(get_db),
  428. current_user: User = Depends(get_current_user_from_request)
  429. ):
  430. """
  431. 创建复刻音色
  432. 上传音频文件创建专属音色,支持文件上传或URL方式
  433. 需求: 6.8
  434. """
  435. if not current_user.apikey:
  436. raise HTTPException(status_code=403, detail="未配置API密钥,请在用户设置中配置apikey")
  437. # 解析language_hints
  438. lang_hints = None
  439. if language_hints:
  440. lang_hints = [h.strip() for h in language_hints.split(",") if h.strip()]
  441. # 构建请求对象
  442. request = VoiceCreateRequest(
  443. target_model=target_model,
  444. prefix=prefix,
  445. voice_name=voice_name,
  446. audio_url=audio_url,
  447. language_hints=lang_hints
  448. )
  449. from app.services.crypto_utils import get_effective_api_key
  450. effective_key = get_effective_api_key(db, target_model, current_user.apikey)
  451. service = VoiceCloneService(db, current_user.id, effective_key)
  452. result = await service.create_voice(request, file)
  453. return ApiResponse(
  454. code=200,
  455. message="success",
  456. data=result
  457. )
  458. @router.get("/voice/list", response_model=ApiResponse[VoiceListResponse])
  459. async def list_voices(
  460. prefix: Optional[str] = Query(default=None, description="按前缀筛选"),
  461. page: int = Query(default=0, ge=0, description="页码(从0开始)"),
  462. page_size: int = Query(default=10, ge=1, le=100, description="每页数量"),
  463. model: Optional[str] = Query(default=None, description="按目标模型筛选(如:cosyvoice-v3-flash、cosyvoice-v3-plus)"),
  464. db: Session = Depends(get_db),
  465. current_user: User = Depends(get_current_user_from_request)
  466. ):
  467. """
  468. 查询用户音色列表
  469. 返回当前用户创建的所有复刻音色,支持按模型筛选
  470. 需求: 6.9
  471. """
  472. if not current_user.apikey:
  473. raise HTTPException(status_code=403, detail="未配置API密钥,请在用户设置中配置apikey")
  474. service = VoiceCloneService(db, current_user.id, current_user.apikey)
  475. result = await service.list_voices(prefix, page, page_size, model)
  476. return ApiResponse(
  477. code=200,
  478. message="success",
  479. data=result
  480. )
  481. @router.get("/voice/system", response_model=ApiResponse[List[SystemVoiceResponse]])
  482. def get_system_voices(
  483. model: Optional[str] = Query(default=None, description="按模型筛选"),
  484. category: Optional[str] = Query(default=None, description="按场景分类筛选"),
  485. db: Session = Depends(get_db),
  486. current_user: User = Depends(get_current_user_from_request)
  487. ):
  488. """
  489. 获取系统音色列表
  490. 返回系统预置的音色列表,支持按模型和场景分类筛选
  491. 需求: 6.13
  492. """
  493. service = SystemVoiceService(db)
  494. voices = service.get_system_voices(model, category)
  495. return ApiResponse(
  496. code=200,
  497. message="success",
  498. data=voices
  499. )
  500. @router.get("/voice/{voice_id}", response_model=ApiResponse[VoiceResponse])
  501. async def get_voice(
  502. voice_id: str,
  503. db: Session = Depends(get_db),
  504. current_user: User = Depends(get_current_user_from_request)
  505. ):
  506. """
  507. 查询指定音色详情
  508. 根据音色ID查询音色的详细信息和状态
  509. 需求: 6.10
  510. """
  511. if not current_user.apikey:
  512. raise HTTPException(status_code=403, detail="未配置API密钥,请在用户设置中配置apikey")
  513. service = VoiceCloneService(db, current_user.id, current_user.apikey)
  514. result = await service.query_voice(voice_id)
  515. return ApiResponse(
  516. code=200,
  517. message="success",
  518. data=result
  519. )
  520. @router.put("/voice/{voice_id}", response_model=ApiResponse[VoiceResponse])
  521. async def update_voice(
  522. voice_id: str,
  523. file: Optional[UploadFile] = File(default=None, description="新的音频文件"),
  524. audio_url: Optional[str] = Form(default=None, description="新的音频URL(与file二选一)"),
  525. db: Session = Depends(get_db),
  526. current_user: User = Depends(get_current_user_from_request)
  527. ):
  528. """
  529. 更新音色
  530. 使用新的音频文件更新已有音色
  531. 需求: 6.11
  532. """
  533. if not current_user.apikey:
  534. raise HTTPException(status_code=403, detail="未配置API密钥,请在用户设置中配置apikey")
  535. service = VoiceCloneService(db, current_user.id, current_user.apikey)
  536. result = await service.update_voice(voice_id, file, audio_url)
  537. return ApiResponse(
  538. code=200,
  539. message="success",
  540. data=result
  541. )
  542. @router.delete("/voice/{voice_id}", response_model=ApiResponse[None])
  543. async def delete_voice(
  544. voice_id: str,
  545. db: Session = Depends(get_db),
  546. current_user: User = Depends(get_current_user_from_request)
  547. ):
  548. """
  549. 删除音色
  550. 删除指定的复刻音色
  551. 需求: 6.12
  552. """
  553. if not current_user.apikey:
  554. raise HTTPException(status_code=403, detail="未配置API密钥,请在用户设置中配置apikey")
  555. service = VoiceCloneService(db, current_user.id, current_user.apikey)
  556. await service.delete_voice(voice_id)
  557. return ApiResponse(
  558. code=200,
  559. message="success",
  560. data=None
  561. )
  562. # ==================== 创作历史端点 ====================
  563. @router.get("/history/synthesis", response_model=ApiResponse[AudioHistoryListResponse])
  564. def get_synthesis_history(
  565. page: int = Query(default=0, ge=0, description="页码(从0开始)"),
  566. page_size: int = Query(default=10, ge=1, le=100, description="每页数量"),
  567. db: Session = Depends(get_db),
  568. current_user: User = Depends(get_current_user_from_request)
  569. ):
  570. """
  571. 获取当前用户的语音合成历史记录
  572. 目前仅返回TTS合成记录,后续可扩展为统一的创作历史。
  573. """
  574. # 查询当前用户的合成记录,按时间倒序,排除被拒绝的内容
  575. query = db.query(AudioSynthesis).filter(
  576. AudioSynthesis.user_id == current_user.id
  577. ).filter(
  578. (AudioSynthesis.review_status != 'rejected') | (AudioSynthesis.review_status.is_(None))
  579. ).order_by(AudioSynthesis.created_at.desc())
  580. total = query.count()
  581. records = query.offset(page * page_size).limit(page_size).all()
  582. items: List[AudioHistoryItem] = []
  583. for record in records:
  584. # 名称优先使用自定义名称,否则使用文件名或文本截断
  585. name = record.custom_name
  586. if not name:
  587. if record.audio_url:
  588. name = record.audio_url.split("/")[-1] or "语音合成音频"
  589. elif record.text:
  590. text_preview = record.text.strip()
  591. name = (text_preview[:20] + "...") if len(text_preview) > 20 else text_preview
  592. else:
  593. name = f"TTS合成-{record.id}"
  594. items.append(
  595. AudioHistoryItem(
  596. id=record.id,
  597. name=name,
  598. custom_name=record.custom_name,
  599. mode="声音合成",
  600. duration=float(record.duration) if record.duration is not None else None,
  601. characters=record.characters,
  602. status="已完成",
  603. audio_url=record.audio_url,
  604. created_at=record.created_at.isoformat() if record.created_at else "",
  605. completed_at=record.completed_at.isoformat() if record.completed_at else None
  606. )
  607. )
  608. return ApiResponse(
  609. code=200,
  610. message="success",
  611. data=AudioHistoryListResponse(
  612. total=total,
  613. items=items
  614. )
  615. )
  616. @router.get("/history/recognition", response_model=ApiResponse[AudioHistoryListResponse])
  617. def get_recognition_history(
  618. page: int = Query(default=0, ge=0, description="页码(从0开始)"),
  619. page_size: int = Query(default=10, ge=1, le=100, description="每页数量"),
  620. db: Session = Depends(get_db),
  621. current_user: User = Depends(get_current_user_from_request)
  622. ):
  623. """
  624. 获取当前用户的语音识别历史记录
  625. 返回同步识别和异步转写任务的历史记录,按时间倒序合并
  626. """
  627. from sqlalchemy import union_all, select, literal
  628. # 查询同步识别记录
  629. sync_query = select(
  630. ASRRecognition.id,
  631. ASRRecognition.user_id,
  632. ASRRecognition.model,
  633. ASRRecognition.audio_url,
  634. ASRRecognition.result_text,
  635. ASRRecognition.detected_language,
  636. ASRRecognition.duration,
  637. ASRRecognition.created_at,
  638. literal("同步识别").label("mode"),
  639. literal("已完成").label("status")
  640. ).filter(
  641. ASRRecognition.user_id == current_user.id
  642. )
  643. # 查询异步转写任务记录
  644. async_query = select(
  645. ASRTask.id,
  646. ASRTask.user_id,
  647. ASRTask.model,
  648. ASRTask.file_url.label("audio_url"),
  649. ASRTask.result_text,
  650. literal(None).label("detected_language"),
  651. ASRTask.duration,
  652. ASRTask.created_at,
  653. literal("异步转写").label("mode"),
  654. ASRTask.status
  655. ).filter(
  656. ASRTask.user_id == current_user.id
  657. )
  658. # 分别查询同步识别和异步转写记录(数据库层分页,避免全量加载)
  659. # 先各自 COUNT,再按比例分页
  660. sync_total = db.query(ASRRecognition).filter(
  661. ASRRecognition.user_id == current_user.id
  662. ).count()
  663. async_total = db.query(ASRTask).filter(
  664. ASRTask.user_id == current_user.id
  665. ).count()
  666. total = sync_total + async_total
  667. # 计算分页偏移
  668. offset = page * page_size
  669. # 从两张表各取足够的数据,合并后再切片
  670. # 取 offset + page_size 条,保证合并后能切出正确的一页
  671. fetch_limit = offset + page_size
  672. sync_records = db.query(ASRRecognition).filter(
  673. ASRRecognition.user_id == current_user.id
  674. ).order_by(ASRRecognition.created_at.desc()).limit(fetch_limit).all()
  675. async_records = db.query(ASRTask).filter(
  676. ASRTask.user_id == current_user.id
  677. ).order_by(ASRTask.created_at.desc()).limit(fetch_limit).all()
  678. # 合并记录并按时间排序
  679. all_records = []
  680. for record in sync_records:
  681. all_records.append(('sync', record))
  682. for record in async_records:
  683. all_records.append(('async', record))
  684. # 按创建时间倒序排序
  685. all_records.sort(key=lambda x: x[1].created_at if x[1].created_at else datetime.min, reverse=True)
  686. # 分页切片
  687. paginated_records = all_records[offset:offset + page_size]
  688. items: List[AudioHistoryItem] = []
  689. for record_type, record in paginated_records:
  690. if record_type == 'sync':
  691. # 同步识别记录
  692. rec = record # type: ASRRecognition
  693. name = ""
  694. if rec.audio_url:
  695. from urllib.parse import unquote
  696. decoded = unquote(rec.audio_url)
  697. path_part = decoded.split("?")[0]
  698. name = path_part.split("/")[-1] or "语音识别音频"
  699. elif rec.audio_base64:
  700. name = "Base64音频"
  701. if not name:
  702. name = f"同步识别-{rec.id}"
  703. characters = len(rec.result_text) if rec.result_text else None
  704. recognition_text = None
  705. if rec.result_text:
  706. recognition_text = rec.result_text[:100] + "..." if len(rec.result_text) > 100 else rec.result_text
  707. items.append(
  708. AudioHistoryItem(
  709. id=f"sync-{rec.id}", # 添加前缀避免与异步任务ID冲突
  710. name=name,
  711. mode="同步识别",
  712. duration=float(rec.duration) if rec.duration is not None else None,
  713. characters=characters,
  714. status="已完成",
  715. audio_url=rec.audio_url,
  716. created_at=rec.created_at.isoformat() if rec.created_at else "",
  717. recognition_text=recognition_text,
  718. language=rec.detected_language
  719. )
  720. )
  721. else:
  722. # 异步转写任务记录
  723. task = record # type: ASRTask
  724. name = ""
  725. if task.file_url:
  726. from urllib.parse import unquote, urlparse
  727. # file_url 可能是带签名的完整 OSS URL,路径部分可能是 URL 编码
  728. # 先解码,再取路径最后一段,再去掉查询参数
  729. decoded = unquote(task.file_url)
  730. path_part = decoded.split("?")[0] # 去掉签名参数
  731. name = path_part.split("/")[-1] or "语音识别音频"
  732. if not name:
  733. name = f"识别任务-{task.task_id[:8]}"
  734. status_map = {
  735. 'SUCCEEDED': '已完成',
  736. 'PENDING': '排队中',
  737. 'RUNNING': '处理中',
  738. 'FAILED': '失败'
  739. }
  740. display_status = status_map.get(task.status, task.status)
  741. characters = len(task.result_text) if task.result_text else None
  742. recognition_text = task.result_text # 完整文本,供下载使用
  743. items.append(
  744. AudioHistoryItem(
  745. id=f"async-{task.id}",
  746. name=name,
  747. mode="异步转写",
  748. duration=float(task.duration) if task.duration is not None else None,
  749. characters=characters,
  750. status=display_status,
  751. audio_url=None, # 异步转写结果是文本,不提供音频下载
  752. created_at=task.created_at.isoformat() if task.created_at else "",
  753. recognition_text=recognition_text,
  754. language=None
  755. )
  756. )
  757. return ApiResponse(
  758. code=200,
  759. message="success",
  760. data=AudioHistoryListResponse(
  761. total=total,
  762. items=items
  763. )
  764. )
  765. @router.put("/history/synthesis/{record_id}/name", response_model=ApiResponse[None])
  766. def update_synthesis_name(
  767. record_id: int,
  768. request: UpdateAudioNameRequest,
  769. db: Session = Depends(get_db),
  770. current_user: User = Depends(get_current_user_from_request)
  771. ):
  772. """
  773. 更新语音合成记录的自定义名称
  774. """
  775. record = db.query(AudioSynthesis).filter(
  776. AudioSynthesis.id == record_id,
  777. AudioSynthesis.user_id == current_user.id
  778. ).first()
  779. if not record:
  780. raise HTTPException(status_code=404, detail="记录不存在")
  781. record.custom_name = request.custom_name.strip()
  782. db.commit()
  783. return ApiResponse(
  784. code=200,
  785. message="success",
  786. data=None
  787. )