asr_service.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717
  1. """
  2. ASR语音识别服务
  3. 提供语音识别的业务逻辑处理,集成阿里云百炼平台DashScope
  4. 需求: 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7
  5. 支持: 同步识别、异步转写、任务状态查询
  6. """
  7. import json
  8. import logging
  9. import os
  10. from datetime import datetime
  11. from typing import List, Optional
  12. from decimal import Decimal
  13. import dashscope
  14. import requests
  15. from sqlalchemy.orm import Session
  16. from fastapi import HTTPException
  17. from app.models.audio import ASRTask, ASRRecognition
  18. from app.schemas.audio_schema import (
  19. ASRRequest, ASRResponse, ASRUsage,
  20. TranscribeRequest, TaskResponse, TranscribeResult,
  21. TranscriptChannel, TranscriptSentence, TaskUsage,
  22. ASRModelResponse
  23. )
  24. from app.services.oss_service import get_oss_service
  25. logger = logging.getLogger(__name__)
  26. class ASRService:
  27. """ASR语音识别服务类"""
  28. # ASR模型配置
  29. ASR_MODELS = [
  30. {
  31. "id": 1,
  32. "title": "qwen3-asr-flash",
  33. "name": "通义千问3-ASR-Flash",
  34. "description": "快速识别,支持上下文增强",
  35. "call_type": "sync",
  36. "features": ["上下文增强", "情感识别", "多语种"]
  37. },
  38. {
  39. "id": 2,
  40. "title": "qwen-audio-asr",
  41. "name": "通义千问Audio ASR",
  42. "description": "通用语音识别",
  43. "call_type": "sync",
  44. "features": ["通用识别", "多语种"]
  45. },
  46. {
  47. "id": 3,
  48. "title": "qwen3-asr-flash-filetrans",
  49. "name": "通义千问3-ASR-Flash-Filetrans",
  50. "description": "长音频转写,支持多音轨",
  51. "call_type": "async",
  52. "features": ["长音频", "多音轨", "时间戳"]
  53. }
  54. ]
  55. # 有效的同步ASR模型
  56. VALID_SYNC_MODELS = ["qwen3-asr-flash", "qwen-audio-asr"]
  57. # 有效的异步ASR模型
  58. VALID_ASYNC_MODELS = ["qwen3-asr-flash-filetrans"]
  59. # DashScope API基础URL
  60. DASHSCOPE_BASE_URL = "https://dashscope.aliyuncs.com/api/v1"
  61. def __init__(self, db: Session, user_id: str, api_key: str = None):
  62. """
  63. 初始化ASR服务
  64. Args:
  65. db: 数据库会话
  66. user_id: 用户ID
  67. api_key: 用户的API密钥(从用户数据动态加载)
  68. """
  69. self.db = db
  70. self.user_id = user_id
  71. self.api_key = api_key or os.getenv("DASHSCOPE_API_KEY")
  72. dashscope.api_key = self.api_key
  73. def _validate_sync_request(self, request: ASRRequest) -> None:
  74. from app.models.model import ModelNew, ModelCategory
  75. from sqlalchemy import cast
  76. from sqlalchemy.dialects.postgresql import ARRAY, INTEGER
  77. valid = self.db.query(ModelNew).filter(
  78. ModelNew.model_code == request.model,
  79. ModelNew.categories.contains(cast([int(ModelCategory.STT)], ARRAY(INTEGER))),
  80. ModelNew.is_api_enabled == True,
  81. ).first()
  82. if not valid:
  83. # 宽松兜底:只要 model_code 存在且 is_api_enabled 就允许
  84. valid = self.db.query(ModelNew).filter(
  85. ModelNew.model_code == request.model,
  86. ModelNew.is_api_enabled == True,
  87. ).first()
  88. if not valid:
  89. raise HTTPException(status_code=400, detail=f"无效的语音识别模型: {request.model}")
  90. if not request.audio_url and not request.audio_base64:
  91. raise HTTPException(status_code=400, detail="必须提供audio_url或audio_base64之一")
  92. def _validate_async_request(self, request: TranscribeRequest) -> None:
  93. from app.models.model import ModelNew, ModelCategory
  94. from sqlalchemy import cast
  95. from sqlalchemy.dialects.postgresql import ARRAY, INTEGER
  96. valid = self.db.query(ModelNew).filter(
  97. ModelNew.model_code == request.model,
  98. ModelNew.is_api_enabled == True,
  99. ).first()
  100. if not valid:
  101. raise HTTPException(status_code=400, detail=f"无效的异步转写模型: {request.model}")
  102. if not request.file_url:
  103. raise HTTPException(status_code=400, detail="必须提供file_url")
  104. async def recognize(self, request: ASRRequest) -> ASRResponse:
  105. """
  106. 同步语音识别
  107. Args:
  108. request: ASR请求对象
  109. Returns:
  110. ASR响应对象
  111. Raises:
  112. HTTPException: 识别失败
  113. """
  114. from dashscope import MultiModalConversation
  115. # 验证请求
  116. self._validate_sync_request(request)
  117. try:
  118. # 构建消息内容
  119. audio_content = []
  120. if request.audio_url:
  121. audio_content.append({"audio": request.audio_url})
  122. elif request.audio_base64:
  123. try:
  124. import base64 as b64mod
  125. from app.services.oss_service import OSSService
  126. audio_bytes = b64mod.b64decode(request.audio_base64)
  127. # 根据文件头推断扩展名
  128. if audio_bytes[:3] == b'ID3' or audio_bytes[:2] == b'\xff\xfb':
  129. ext = 'mp3'
  130. elif audio_bytes[:4] == b'RIFF':
  131. ext = 'wav'
  132. elif audio_bytes[:4] == b'OggS':
  133. ext = 'ogg'
  134. elif audio_bytes[:4] == b'fLaC':
  135. ext = 'flac'
  136. else:
  137. ext = 'mp3' # 默认 mp3
  138. oss = OSSService()
  139. audio_url = oss.upload_file(audio_bytes, prefix="asr/temp", original_filename=f"audio.{ext}")
  140. # 生成签名URL,确保阿里云ASR能访问(https + 路径不编码)
  141. from urllib.parse import urlparse, unquote
  142. parsed = urlparse(audio_url)
  143. object_key = unquote(parsed.path.lstrip('/'))
  144. signed = oss.bucket.sign_url('GET', object_key, 3600, slash_safe=True)
  145. # 强制 https
  146. audio_url = signed.replace('http://', 'https://', 1)
  147. logger.info(f"ASR音频签名URL: {audio_url[:80]}...")
  148. audio_content.append({"audio": audio_url})
  149. except Exception as oss_err:
  150. logger.error(f"OSS上传失败: {oss_err}")
  151. raise HTTPException(status_code=500, detail=f"音频上传失败: {oss_err}")
  152. messages = [
  153. {"role": "user", "content": audio_content}
  154. ]
  155. # 根据模型类型选择不同的 API
  156. # paraformer/fun-asr/qwen3-asr-flash-realtime 系列用 Transcription 异步接口
  157. TRANSCRIPTION_MODELS = ('paraformer', 'fun-asr', 'qwen3-asr-flash-realtime')
  158. use_recognition_api = any(request.model.lower().startswith(m) for m in TRANSCRIPTION_MODELS)
  159. if use_recognition_api:
  160. import dashscope
  161. audio_url_for_api = audio_content[0]["audio"] if audio_content else None
  162. if not audio_url_for_api:
  163. raise HTTPException(status_code=400, detail="无有效音频")
  164. # Transcription API 要求公开可访问的 URL,对 OSS 私有 URL 生成签名
  165. try:
  166. from app.services.oss_service import OSSService
  167. from urllib.parse import urlparse, unquote
  168. oss = OSSService()
  169. parsed = urlparse(audio_url_for_api)
  170. object_key = unquote(parsed.path.lstrip('/'))
  171. signed = oss.bucket.sign_url('GET', object_key, 3600, slash_safe=True)
  172. audio_url_for_api = signed.replace('http://', 'https://', 1)
  173. logger.info(f"Transcription API 签名URL: {audio_url_for_api[:80]}...")
  174. except Exception as sign_err:
  175. logger.warning(f"生成签名URL失败,使用原始URL: {sign_err}")
  176. task_response = dashscope.audio.asr.Transcription.async_call(
  177. api_key=self.api_key,
  178. model=request.model,
  179. file_urls=[audio_url_for_api],
  180. )
  181. if task_response.status_code != 200:
  182. error_msg = getattr(task_response, 'message', '未知错误')
  183. logger.error(f"ASR识别失败: {task_response.status_code} - {error_msg}")
  184. raise HTTPException(status_code=502, detail=f"语音识别失败: {error_msg}")
  185. # 等待转写完成
  186. trans_response = dashscope.audio.asr.Transcription.wait(
  187. task=task_response.output.task_id,
  188. api_key=self.api_key,
  189. )
  190. if trans_response.status_code != 200:
  191. error_msg = getattr(trans_response, 'message', '未知错误')
  192. logger.error(f"ASR转写等待失败: {trans_response.status_code} - {error_msg}")
  193. raise HTTPException(status_code=502, detail=f"语音识别失败: {error_msg}")
  194. # 解析转写结果
  195. import json as _json
  196. from urllib import request as _urllib_request
  197. text = ''
  198. seconds = 0
  199. language = request.language or 'unknown'
  200. try:
  201. results = trans_response.output.get('results', [])
  202. if results:
  203. trans_url = results[0].get('transcription_url')
  204. if trans_url:
  205. trans_data = _json.loads(_urllib_request.urlopen(trans_url).read().decode('utf-8'))
  206. logger.info(f"[ASR] trans_data keys={list(trans_data.keys())}, transcripts[0]={str(trans_data.get('transcripts', [{}])[0])[:300]}")
  207. transcripts = trans_data.get('transcripts', [])
  208. if transcripts:
  209. t = transcripts[0]
  210. text = t.get('text', '')
  211. # 时长字段:content_duration_in_milliseconds(毫秒)
  212. duration_ms = t.get('content_duration_in_milliseconds', 0) or t.get('duration', 0)
  213. if not duration_ms:
  214. channel_info = t.get('channel_info', [])
  215. if channel_info:
  216. duration_ms = channel_info[0].get('content_duration_in_milliseconds', 0) or channel_info[0].get('duration', 0)
  217. seconds = int(round(duration_ms / 1000)) if duration_ms else 0
  218. # 语种
  219. if not request.language:
  220. lang_info = t.get('language', '')
  221. if lang_info:
  222. language = lang_info
  223. except Exception as parse_err:
  224. logger.warning(f"解析转写结果失败: {parse_err}")
  225. input_tokens = 0
  226. output_tokens = 0
  227. emotion = None
  228. else:
  229. # 使用 MultiModalConversation 接口(qwen-audio-asr, qwen3-asr-flash 等)
  230. # 构建ASR选项
  231. asr_options = {}
  232. if request.language:
  233. asr_options["language"] = request.language
  234. if request.enable_itn:
  235. asr_options["enable_itn"] = True
  236. if request.context:
  237. asr_options["corpus"] = {"text": request.context}
  238. call_kwargs = {
  239. "api_key": self.api_key,
  240. "model": request.model,
  241. "messages": messages,
  242. "result_format": "message"
  243. }
  244. if asr_options:
  245. call_kwargs["asr_options"] = asr_options
  246. response = MultiModalConversation.call(**call_kwargs)
  247. if response.status_code != 200:
  248. error_msg = getattr(response, 'message', '未知错误')
  249. logger.error(f"ASR识别失败: {response.status_code} - {error_msg}")
  250. emsg = str(error_msg).lower()
  251. if 'silent' in emsg or 'audio silent' in emsg:
  252. raise HTTPException(status_code=400, detail="未检测到有效语音或音频被判定为静音")
  253. raise HTTPException(status_code=502, detail=f"语音识别失败: {error_msg}")
  254. output = response.output
  255. if not output or not output.choices:
  256. raise HTTPException(status_code=400, detail="未检测到有效识别结果")
  257. choice = output.choices[0]
  258. message = choice.message
  259. text = ""
  260. if message.content:
  261. for item in message.content:
  262. if isinstance(item, dict) and "text" in item:
  263. text = item["text"]; break
  264. elif isinstance(item, str):
  265. text = item; break
  266. language = "unknown"
  267. emotion = None
  268. if message.annotations:
  269. for annotation in message.annotations:
  270. if isinstance(annotation, dict):
  271. language = annotation.get("language", "unknown")
  272. emotion = annotation.get("emotion")
  273. break
  274. usage = response.usage if response.usage else {}
  275. def get_usage_value(key, default=0):
  276. if isinstance(usage, dict): return usage.get(key, default)
  277. return getattr(usage, key, default)
  278. input_tokens = 0
  279. output_tokens = 0
  280. if isinstance(usage, dict):
  281. input_tokens = (usage.get("input_tokens_details") or {}).get("text_tokens", 0)
  282. output_tokens = (usage.get("output_tokens_details") or {}).get("text_tokens", 0)
  283. elif hasattr(usage, 'input_tokens_details'):
  284. input_tokens = (usage.input_tokens_details or {}).get("text_tokens", 0) if isinstance(usage.input_tokens_details, dict) else 0
  285. seconds = get_usage_value("seconds", 0)
  286. if seconds == 0:
  287. audio_tokens = get_usage_value("audio_tokens", 0)
  288. if audio_tokens > 0:
  289. seconds = audio_tokens // 16
  290. # 计算费用(API调用免费)
  291. bill = Decimal("0")
  292. # 保存识别记录到数据库
  293. try:
  294. asr_recognition = ASRRecognition(
  295. user_id=self.user_id,
  296. model=request.model,
  297. audio_url=request.audio_url,
  298. audio_base64=request.audio_base64[:1000] if request.audio_base64 else None,
  299. language=request.language,
  300. enable_itn=request.enable_itn,
  301. context=request.context[:5000] if request.context else None,
  302. result_text=text,
  303. detected_language=language,
  304. emotion=emotion,
  305. duration=seconds,
  306. input_tokens=input_tokens,
  307. output_tokens=output_tokens,
  308. bill=bill
  309. )
  310. self.db.add(asr_recognition)
  311. self.db.commit()
  312. self.db.refresh(asr_recognition)
  313. except HTTPException:
  314. raise
  315. except Exception as e:
  316. logger.error(f"保存识别记录失败: {type(e).__name__}: {str(e)}")
  317. self.db.rollback()
  318. raise HTTPException(status_code=500, detail="保存识别记录失败")
  319. return ASRResponse(
  320. text=text,
  321. language=language,
  322. emotion=emotion,
  323. duration=seconds,
  324. usage=ASRUsage(
  325. input_tokens=input_tokens,
  326. output_tokens=output_tokens,
  327. seconds=seconds
  328. )
  329. )
  330. except HTTPException:
  331. raise
  332. except Exception as e:
  333. # 捕获并映射常见第三方错误
  334. err_str = str(e).lower()
  335. logger.error(f"ASR识别失败: {type(e).__name__}: {str(e)}")
  336. if 'silent' in err_str or 'audio silent' in err_str:
  337. raise HTTPException(status_code=400, detail="未检测到有效语音或音频被判定为静音,请检查麦克风并重新录制(建议 ≥5 秒清晰朗读)")
  338. raise HTTPException(status_code=502, detail="语音识别失败:服务暂时不可用,请稍后重试")
  339. async def transcribe(self, request: TranscribeRequest) -> TaskResponse:
  340. """
  341. 提交异步转写任务
  342. Args:
  343. request: 转写请求对象
  344. Returns:
  345. 任务响应对象
  346. Raises:
  347. HTTPException: 提交失败
  348. """
  349. # 验证请求
  350. self._validate_async_request(request)
  351. try:
  352. url = f"{self.DASHSCOPE_BASE_URL}/services/audio/asr/transcription"
  353. headers = {
  354. "Authorization": f"Bearer {self.api_key}",
  355. "Content-Type": "application/json",
  356. "X-DashScope-Async": "enable"
  357. }
  358. # 构建请求参数
  359. parameters = {
  360. "channel_id": request.channel_id,
  361. "enable_itn": request.enable_itn
  362. }
  363. if request.language:
  364. parameters["language"] = request.language
  365. if request.context:
  366. parameters["corpus"] = {"text": request.context}
  367. payload = {
  368. "model": request.model,
  369. "input": {"file_urls": [request.file_url]},
  370. "parameters": parameters
  371. }
  372. # 发送请求
  373. response = requests.post(url, headers=headers, json=payload, timeout=30)
  374. if response.status_code != 200:
  375. error_data = response.json() if response.text else {}
  376. error_msg = error_data.get("message", f"HTTP {response.status_code}")
  377. logger.error(f"异步转写提交失败: {error_msg}")
  378. raise HTTPException(status_code=502, detail=f"提交转写任务失败: {error_msg}")
  379. data = response.json()
  380. # 检查响应
  381. if "output" not in data:
  382. raise HTTPException(status_code=502, detail="提交转写任务失败,响应格式错误")
  383. output = data["output"]
  384. task_id = output.get("task_id")
  385. task_status = output.get("task_status", "PENDING")
  386. if not task_id:
  387. raise HTTPException(status_code=502, detail="提交转写任务失败,未返回task_id")
  388. # 保存任务记录到数据库
  389. asr_task = ASRTask(
  390. user_id=self.user_id,
  391. task_id=task_id,
  392. model=request.model,
  393. file_url=request.file_url,
  394. status=task_status
  395. )
  396. self.db.add(asr_task)
  397. self.db.commit()
  398. return TaskResponse(
  399. task_id=task_id,
  400. task_status=task_status,
  401. submit_time=output.get("submit_time"),
  402. scheduled_time=output.get("scheduled_time")
  403. )
  404. except HTTPException:
  405. raise
  406. except requests.exceptions.Timeout:
  407. raise HTTPException(status_code=504, detail="提交转写任务超时")
  408. except Exception as e:
  409. logger.error(f"异步转写提交失败: {type(e).__name__}: {str(e)}")
  410. raise HTTPException(status_code=502, detail=f"提交转写任务失败: {str(e)}")
  411. async def get_task_status(self, task_id: str) -> TaskResponse:
  412. """
  413. 查询转写任务状态
  414. Args:
  415. task_id: 任务ID
  416. Returns:
  417. 任务响应对象
  418. Raises:
  419. HTTPException: 查询失败或任务不存在
  420. """
  421. # 首先检查本地数据库中是否存在该任务且属于当前用户
  422. local_task = self.db.query(ASRTask).filter(
  423. ASRTask.task_id == task_id,
  424. ASRTask.user_id == self.user_id
  425. ).first()
  426. if not local_task:
  427. raise HTTPException(status_code=404, detail="任务不存在")
  428. try:
  429. url = f"{self.DASHSCOPE_BASE_URL}/tasks/{task_id}"
  430. headers = {
  431. "Authorization": f"Bearer {self.api_key}",
  432. "X-DashScope-Async": "enable"
  433. }
  434. # 发送请求
  435. response = requests.get(url, headers=headers, timeout=30)
  436. if response.status_code == 404:
  437. raise HTTPException(status_code=404, detail="任务不存在")
  438. if response.status_code != 200:
  439. error_data = response.json() if response.text else {}
  440. error_msg = error_data.get("message", f"HTTP {response.status_code}")
  441. logger.error(f"查询任务状态失败: {error_msg}")
  442. raise HTTPException(status_code=502, detail=f"查询任务状态失败: {error_msg}")
  443. data = response.json()
  444. # 检查响应
  445. if "output" not in data:
  446. raise HTTPException(status_code=502, detail="查询任务状态失败,响应格式错误")
  447. output = data["output"]
  448. task_status = output.get("task_status", "UNKNOWN")
  449. logger.info(f"[ASR task] task_id={task_id}, status={task_status}, output_keys={list(output.keys())}")
  450. # 更新本地数据库记录
  451. local_task.status = task_status
  452. local_task.updated_at = datetime.utcnow()
  453. # 解析结果(paraformer系列返回 output.results,通用接口返回 output.result)
  454. result = None
  455. result_data = output.get("result") or (output["results"][0] if output.get("results") else None)
  456. if result_data:
  457. transcripts = []
  458. transcription_url = result_data.get("transcription_url")
  459. logger.info(f"[ASR task] output.result keys={list(result_data.keys())}, transcription_url={transcription_url}")
  460. # 优先使用直接返回的transcripts
  461. if result_data.get("transcripts"):
  462. for transcript in result_data["transcripts"]:
  463. sentences = []
  464. if transcript.get("sentences"):
  465. for sentence in transcript["sentences"]:
  466. sentences.append(TranscriptSentence(
  467. begin_time=sentence.get("begin_time", 0),
  468. end_time=sentence.get("end_time", 0),
  469. text=sentence.get("text", ""),
  470. sentence_id=sentence.get("sentence_id", 0),
  471. language=sentence.get("language"),
  472. emotion=sentence.get("emotion")
  473. ))
  474. transcripts.append(TranscriptChannel(
  475. channel_id=transcript.get("channel_id", 0),
  476. text=transcript.get("text", ""),
  477. sentences=sentences
  478. ))
  479. # 如果没有直接返回transcripts,但有transcription_url,则下载并解析
  480. elif transcription_url and task_status == "SUCCEEDED":
  481. try:
  482. transcripts, file_duration = self._download_and_parse_transcription(transcription_url)
  483. logger.info(f"成功下载并解析转写结果文件: {transcription_url}, duration={file_duration}s")
  484. # 用文件里解析到的时长更新 local_task
  485. if file_duration > 0:
  486. local_task.duration = file_duration
  487. except Exception as e:
  488. logger.error(f"下载或解析转写结果文件失败: {str(e)}")
  489. result = TranscribeResult(
  490. transcription_url=transcription_url,
  491. transcripts=transcripts
  492. )
  493. # 更新本地记录的结果
  494. if transcripts:
  495. local_task.result_text = transcripts[0].text
  496. if transcription_url:
  497. local_task.result_url = transcription_url
  498. # 解析使用统计
  499. usage = None
  500. if data.get("usage"):
  501. usage_data = data["usage"]
  502. seconds = usage_data.get("seconds", 0) or usage_data.get("duration", 0)
  503. usage = TaskUsage(seconds=seconds)
  504. if seconds > 0:
  505. local_task.duration = seconds
  506. # 任务完成时记录费用(API调用免费)
  507. if task_status == "SUCCEEDED" and not local_task.bill:
  508. local_task.bill = Decimal("0")
  509. self.db.commit()
  510. # 提取失败原因(FAILED 时 output 里有 code/message 字段)
  511. error_message = None
  512. if task_status == "FAILED":
  513. error_message = output.get("message") or output.get("code")
  514. # 也尝试从 result_data 里取子任务错误
  515. if not error_message and result_data:
  516. error_message = result_data.get("message") or result_data.get("code")
  517. if error_message:
  518. logger.info(f"[ASR task] FAILED reason: {error_message}")
  519. return TaskResponse(
  520. task_id=task_id,
  521. task_status=task_status,
  522. submit_time=output.get("submit_time"),
  523. scheduled_time=output.get("scheduled_time"),
  524. end_time=output.get("end_time"),
  525. result=result,
  526. usage=usage,
  527. error_message=error_message
  528. )
  529. except HTTPException:
  530. raise
  531. except requests.exceptions.Timeout:
  532. raise HTTPException(status_code=504, detail="查询任务状态超时")
  533. except Exception as e:
  534. logger.error(f"查询任务状态失败: {type(e).__name__}: {str(e)}")
  535. raise HTTPException(status_code=502, detail=f"查询任务状态失败: {str(e)}")
  536. def _download_and_parse_transcription(self, transcription_url: str):
  537. """
  538. 下载并解析转写结果JSON文件
  539. Returns:
  540. (transcripts: List[TranscriptChannel], duration_seconds: int)
  541. """
  542. try:
  543. response = requests.get(transcription_url, timeout=30)
  544. response.raise_for_status()
  545. transcription_data = response.json()
  546. logger.info(f"[ASR async] transcription_data keys={list(transcription_data.keys())}, transcripts[0]={str((transcription_data.get('transcripts') or [{}])[0])[:200]}")
  547. transcripts = []
  548. duration_seconds = 0
  549. for transcript in (transcription_data.get("transcripts") or []):
  550. sentences = []
  551. for sentence in (transcript.get("sentences") or []):
  552. sentences.append(TranscriptSentence(
  553. begin_time=sentence.get("begin_time", 0),
  554. end_time=sentence.get("end_time", 0),
  555. text=sentence.get("text", ""),
  556. sentence_id=sentence.get("sentence_id", 0),
  557. language=sentence.get("language"),
  558. emotion=sentence.get("emotion")
  559. ))
  560. transcripts.append(TranscriptChannel(
  561. channel_id=transcript.get("channel_id", 0),
  562. text=transcript.get("text", ""),
  563. sentences=sentences
  564. ))
  565. # 提取时长(毫秒 → 秒)
  566. dur_ms = (
  567. transcript.get("content_duration_in_milliseconds")
  568. or transcript.get("duration")
  569. or 0
  570. )
  571. if dur_ms:
  572. duration_seconds = max(duration_seconds, int(round(dur_ms / 1000)))
  573. return transcripts, duration_seconds
  574. except requests.exceptions.RequestException as e:
  575. logger.error(f"下载转写结果文件失败: {str(e)}")
  576. raise HTTPException(status_code=502, detail=f"下载转写结果文件失败: {str(e)}")
  577. except json.JSONDecodeError as e:
  578. logger.error(f"解析转写结果JSON失败: {str(e)}")
  579. raise HTTPException(status_code=502, detail=f"解析转写结果JSON失败: {str(e)}")
  580. except Exception as e:
  581. logger.error(f"处理转写结果文件失败: {type(e).__name__}: {str(e)}")
  582. raise HTTPException(status_code=502, detail=f"处理转写结果文件失败: {str(e)}")
  583. def get_asr_models(self) -> List[ASRModelResponse]:
  584. """获取ASR模型列表(从数据库动态查询 STT 分类模型)"""
  585. from app.models.model import ModelNew, ModelCategory
  586. models = self.db.query(ModelNew).filter(
  587. ModelNew.categories.any(int(ModelCategory.STT)),
  588. ModelNew.is_api_enabled == True,
  589. ModelNew.is_show_enabled == True,
  590. ).all()
  591. result = []
  592. for i, m in enumerate(models):
  593. code = m.model_code.lower()
  594. # realtime 模型只支持流式输入,不支持文件 URL,不放入列表
  595. if 'realtime' in code:
  596. continue
  597. # 含 filetrans / async / trans 关键词的是异步转写
  598. # paraformer / fun-asr 系列走 Transcription 异步接口,也标为 async
  599. ASYNC_PREFIXES = ("paraformer", "fun-asr")
  600. call_type = (
  601. "async"
  602. if any(code.startswith(p) for p in ASYNC_PREFIXES)
  603. or any(k in code for k in ("filetrans", "async", "trans"))
  604. else "sync"
  605. )
  606. features = []
  607. if m.features and isinstance(m.features, dict):
  608. features = [k for k, v in m.features.items() if v]
  609. elif m.features and isinstance(m.features, list):
  610. features = m.features
  611. result.append(ASRModelResponse(
  612. id=m.id,
  613. title=m.model_code,
  614. name=m.display_name or m.model_code,
  615. description=m.custom_description or m.description or "",
  616. call_type=call_type,
  617. features=features,
  618. ))
  619. return result