| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717 |
- """
- ASR语音识别服务
- 提供语音识别的业务逻辑处理,集成阿里云百炼平台DashScope
- 需求: 2.1, 2.2, 2.3, 2.4, 2.5, 2.6, 2.7
- 支持: 同步识别、异步转写、任务状态查询
- """
- import json
- import logging
- import os
- from datetime import datetime
- from typing import List, Optional
- from decimal import Decimal
- import dashscope
- import requests
- from sqlalchemy.orm import Session
- from fastapi import HTTPException
- from app.models.audio import ASRTask, ASRRecognition
- from app.schemas.audio_schema import (
- ASRRequest, ASRResponse, ASRUsage,
- TranscribeRequest, TaskResponse, TranscribeResult,
- TranscriptChannel, TranscriptSentence, TaskUsage,
- ASRModelResponse
- )
- from app.services.oss_service import get_oss_service
- logger = logging.getLogger(__name__)
- class ASRService:
- """ASR语音识别服务类"""
-
- # ASR模型配置
- ASR_MODELS = [
- {
- "id": 1,
- "title": "qwen3-asr-flash",
- "name": "通义千问3-ASR-Flash",
- "description": "快速识别,支持上下文增强",
- "call_type": "sync",
- "features": ["上下文增强", "情感识别", "多语种"]
- },
- {
- "id": 2,
- "title": "qwen-audio-asr",
- "name": "通义千问Audio ASR",
- "description": "通用语音识别",
- "call_type": "sync",
- "features": ["通用识别", "多语种"]
- },
- {
- "id": 3,
- "title": "qwen3-asr-flash-filetrans",
- "name": "通义千问3-ASR-Flash-Filetrans",
- "description": "长音频转写,支持多音轨",
- "call_type": "async",
- "features": ["长音频", "多音轨", "时间戳"]
- }
- ]
-
- # 有效的同步ASR模型
- VALID_SYNC_MODELS = ["qwen3-asr-flash", "qwen-audio-asr"]
-
- # 有效的异步ASR模型
- VALID_ASYNC_MODELS = ["qwen3-asr-flash-filetrans"]
-
- # DashScope API基础URL
- DASHSCOPE_BASE_URL = "https://dashscope.aliyuncs.com/api/v1"
-
- def __init__(self, db: Session, user_id: str, api_key: str = None):
- """
- 初始化ASR服务
-
- Args:
- db: 数据库会话
- user_id: 用户ID
- api_key: 用户的API密钥(从用户数据动态加载)
- """
- self.db = db
- self.user_id = user_id
- self.api_key = api_key or os.getenv("DASHSCOPE_API_KEY")
- dashscope.api_key = self.api_key
-
- def _validate_sync_request(self, request: ASRRequest) -> None:
- from app.models.model import ModelNew, ModelCategory
- from sqlalchemy import cast
- from sqlalchemy.dialects.postgresql import ARRAY, INTEGER
- valid = self.db.query(ModelNew).filter(
- ModelNew.model_code == request.model,
- ModelNew.categories.contains(cast([int(ModelCategory.STT)], ARRAY(INTEGER))),
- ModelNew.is_api_enabled == True,
- ).first()
- if not valid:
- # 宽松兜底:只要 model_code 存在且 is_api_enabled 就允许
- valid = self.db.query(ModelNew).filter(
- ModelNew.model_code == request.model,
- ModelNew.is_api_enabled == True,
- ).first()
- if not valid:
- raise HTTPException(status_code=400, detail=f"无效的语音识别模型: {request.model}")
- if not request.audio_url and not request.audio_base64:
- raise HTTPException(status_code=400, detail="必须提供audio_url或audio_base64之一")
-
- def _validate_async_request(self, request: TranscribeRequest) -> None:
- from app.models.model import ModelNew, ModelCategory
- from sqlalchemy import cast
- from sqlalchemy.dialects.postgresql import ARRAY, INTEGER
- valid = self.db.query(ModelNew).filter(
- ModelNew.model_code == request.model,
- ModelNew.is_api_enabled == True,
- ).first()
- if not valid:
- raise HTTPException(status_code=400, detail=f"无效的异步转写模型: {request.model}")
- if not request.file_url:
- raise HTTPException(status_code=400, detail="必须提供file_url")
-
- async def recognize(self, request: ASRRequest) -> ASRResponse:
- """
- 同步语音识别
-
- Args:
- request: ASR请求对象
-
- Returns:
- ASR响应对象
-
- Raises:
- HTTPException: 识别失败
- """
- from dashscope import MultiModalConversation
-
- # 验证请求
- self._validate_sync_request(request)
- try:
- # 构建消息内容
- audio_content = []
- if request.audio_url:
- audio_content.append({"audio": request.audio_url})
- elif request.audio_base64:
- try:
- import base64 as b64mod
- from app.services.oss_service import OSSService
- audio_bytes = b64mod.b64decode(request.audio_base64)
- # 根据文件头推断扩展名
- if audio_bytes[:3] == b'ID3' or audio_bytes[:2] == b'\xff\xfb':
- ext = 'mp3'
- elif audio_bytes[:4] == b'RIFF':
- ext = 'wav'
- elif audio_bytes[:4] == b'OggS':
- ext = 'ogg'
- elif audio_bytes[:4] == b'fLaC':
- ext = 'flac'
- else:
- ext = 'mp3' # 默认 mp3
- oss = OSSService()
- audio_url = oss.upload_file(audio_bytes, prefix="asr/temp", original_filename=f"audio.{ext}")
- # 生成签名URL,确保阿里云ASR能访问(https + 路径不编码)
- from urllib.parse import urlparse, unquote
- parsed = urlparse(audio_url)
- object_key = unquote(parsed.path.lstrip('/'))
- signed = oss.bucket.sign_url('GET', object_key, 3600, slash_safe=True)
- # 强制 https
- audio_url = signed.replace('http://', 'https://', 1)
- logger.info(f"ASR音频签名URL: {audio_url[:80]}...")
- audio_content.append({"audio": audio_url})
- except Exception as oss_err:
- logger.error(f"OSS上传失败: {oss_err}")
- raise HTTPException(status_code=500, detail=f"音频上传失败: {oss_err}")
-
- messages = [
- {"role": "user", "content": audio_content}
- ]
-
- # 根据模型类型选择不同的 API
- # paraformer/fun-asr/qwen3-asr-flash-realtime 系列用 Transcription 异步接口
- TRANSCRIPTION_MODELS = ('paraformer', 'fun-asr', 'qwen3-asr-flash-realtime')
- use_recognition_api = any(request.model.lower().startswith(m) for m in TRANSCRIPTION_MODELS)
- if use_recognition_api:
- import dashscope
- audio_url_for_api = audio_content[0]["audio"] if audio_content else None
- if not audio_url_for_api:
- raise HTTPException(status_code=400, detail="无有效音频")
- # Transcription API 要求公开可访问的 URL,对 OSS 私有 URL 生成签名
- try:
- from app.services.oss_service import OSSService
- from urllib.parse import urlparse, unquote
- oss = OSSService()
- parsed = urlparse(audio_url_for_api)
- object_key = unquote(parsed.path.lstrip('/'))
- signed = oss.bucket.sign_url('GET', object_key, 3600, slash_safe=True)
- audio_url_for_api = signed.replace('http://', 'https://', 1)
- logger.info(f"Transcription API 签名URL: {audio_url_for_api[:80]}...")
- except Exception as sign_err:
- logger.warning(f"生成签名URL失败,使用原始URL: {sign_err}")
- task_response = dashscope.audio.asr.Transcription.async_call(
- api_key=self.api_key,
- model=request.model,
- file_urls=[audio_url_for_api],
- )
- if task_response.status_code != 200:
- error_msg = getattr(task_response, 'message', '未知错误')
- logger.error(f"ASR识别失败: {task_response.status_code} - {error_msg}")
- raise HTTPException(status_code=502, detail=f"语音识别失败: {error_msg}")
- # 等待转写完成
- trans_response = dashscope.audio.asr.Transcription.wait(
- task=task_response.output.task_id,
- api_key=self.api_key,
- )
- if trans_response.status_code != 200:
- error_msg = getattr(trans_response, 'message', '未知错误')
- logger.error(f"ASR转写等待失败: {trans_response.status_code} - {error_msg}")
- raise HTTPException(status_code=502, detail=f"语音识别失败: {error_msg}")
- # 解析转写结果
- import json as _json
- from urllib import request as _urllib_request
- text = ''
- seconds = 0
- language = request.language or 'unknown'
- try:
- results = trans_response.output.get('results', [])
- if results:
- trans_url = results[0].get('transcription_url')
- if trans_url:
- trans_data = _json.loads(_urllib_request.urlopen(trans_url).read().decode('utf-8'))
- logger.info(f"[ASR] trans_data keys={list(trans_data.keys())}, transcripts[0]={str(trans_data.get('transcripts', [{}])[0])[:300]}")
- transcripts = trans_data.get('transcripts', [])
- if transcripts:
- t = transcripts[0]
- text = t.get('text', '')
- # 时长字段:content_duration_in_milliseconds(毫秒)
- duration_ms = t.get('content_duration_in_milliseconds', 0) or t.get('duration', 0)
- if not duration_ms:
- channel_info = t.get('channel_info', [])
- if channel_info:
- duration_ms = channel_info[0].get('content_duration_in_milliseconds', 0) or channel_info[0].get('duration', 0)
- seconds = int(round(duration_ms / 1000)) if duration_ms else 0
- # 语种
- if not request.language:
- lang_info = t.get('language', '')
- if lang_info:
- language = lang_info
- except Exception as parse_err:
- logger.warning(f"解析转写结果失败: {parse_err}")
- input_tokens = 0
- output_tokens = 0
- emotion = None
- else:
- # 使用 MultiModalConversation 接口(qwen-audio-asr, qwen3-asr-flash 等)
- # 构建ASR选项
- asr_options = {}
- if request.language:
- asr_options["language"] = request.language
- if request.enable_itn:
- asr_options["enable_itn"] = True
- if request.context:
- asr_options["corpus"] = {"text": request.context}
- call_kwargs = {
- "api_key": self.api_key,
- "model": request.model,
- "messages": messages,
- "result_format": "message"
- }
- if asr_options:
- call_kwargs["asr_options"] = asr_options
- response = MultiModalConversation.call(**call_kwargs)
- if response.status_code != 200:
- error_msg = getattr(response, 'message', '未知错误')
- logger.error(f"ASR识别失败: {response.status_code} - {error_msg}")
- emsg = str(error_msg).lower()
- if 'silent' in emsg or 'audio silent' in emsg:
- raise HTTPException(status_code=400, detail="未检测到有效语音或音频被判定为静音")
- raise HTTPException(status_code=502, detail=f"语音识别失败: {error_msg}")
- output = response.output
- if not output or not output.choices:
- raise HTTPException(status_code=400, detail="未检测到有效识别结果")
- choice = output.choices[0]
- message = choice.message
- text = ""
- if message.content:
- for item in message.content:
- if isinstance(item, dict) and "text" in item:
- text = item["text"]; break
- elif isinstance(item, str):
- text = item; break
- language = "unknown"
- emotion = None
- if message.annotations:
- for annotation in message.annotations:
- if isinstance(annotation, dict):
- language = annotation.get("language", "unknown")
- emotion = annotation.get("emotion")
- break
- usage = response.usage if response.usage else {}
- def get_usage_value(key, default=0):
- if isinstance(usage, dict): return usage.get(key, default)
- return getattr(usage, key, default)
- input_tokens = 0
- output_tokens = 0
- if isinstance(usage, dict):
- input_tokens = (usage.get("input_tokens_details") or {}).get("text_tokens", 0)
- output_tokens = (usage.get("output_tokens_details") or {}).get("text_tokens", 0)
- elif hasattr(usage, 'input_tokens_details'):
- input_tokens = (usage.input_tokens_details or {}).get("text_tokens", 0) if isinstance(usage.input_tokens_details, dict) else 0
- seconds = get_usage_value("seconds", 0)
- if seconds == 0:
- audio_tokens = get_usage_value("audio_tokens", 0)
- if audio_tokens > 0:
- seconds = audio_tokens // 16
- # 计算费用(API调用免费)
- bill = Decimal("0")
- # 保存识别记录到数据库
- try:
- asr_recognition = ASRRecognition(
- user_id=self.user_id,
- model=request.model,
- audio_url=request.audio_url,
- audio_base64=request.audio_base64[:1000] if request.audio_base64 else None,
- language=request.language,
- enable_itn=request.enable_itn,
- context=request.context[:5000] if request.context else None,
- result_text=text,
- detected_language=language,
- emotion=emotion,
- duration=seconds,
- input_tokens=input_tokens,
- output_tokens=output_tokens,
- bill=bill
- )
- self.db.add(asr_recognition)
- self.db.commit()
- self.db.refresh(asr_recognition)
-
- except HTTPException:
- raise
- except Exception as e:
- logger.error(f"保存识别记录失败: {type(e).__name__}: {str(e)}")
- self.db.rollback()
- raise HTTPException(status_code=500, detail="保存识别记录失败")
-
- return ASRResponse(
- text=text,
- language=language,
- emotion=emotion,
- duration=seconds,
- usage=ASRUsage(
- input_tokens=input_tokens,
- output_tokens=output_tokens,
- seconds=seconds
- )
- )
-
- except HTTPException:
- raise
- except Exception as e:
- # 捕获并映射常见第三方错误
- err_str = str(e).lower()
- logger.error(f"ASR识别失败: {type(e).__name__}: {str(e)}")
- if 'silent' in err_str or 'audio silent' in err_str:
- raise HTTPException(status_code=400, detail="未检测到有效语音或音频被判定为静音,请检查麦克风并重新录制(建议 ≥5 秒清晰朗读)")
- raise HTTPException(status_code=502, detail="语音识别失败:服务暂时不可用,请稍后重试")
-
- async def transcribe(self, request: TranscribeRequest) -> TaskResponse:
- """
- 提交异步转写任务
-
- Args:
- request: 转写请求对象
-
- Returns:
- 任务响应对象
-
- Raises:
- HTTPException: 提交失败
- """
- # 验证请求
- self._validate_async_request(request)
- try:
- url = f"{self.DASHSCOPE_BASE_URL}/services/audio/asr/transcription"
-
- headers = {
- "Authorization": f"Bearer {self.api_key}",
- "Content-Type": "application/json",
- "X-DashScope-Async": "enable"
- }
-
- # 构建请求参数
- parameters = {
- "channel_id": request.channel_id,
- "enable_itn": request.enable_itn
- }
-
- if request.language:
- parameters["language"] = request.language
- if request.context:
- parameters["corpus"] = {"text": request.context}
-
- payload = {
- "model": request.model,
- "input": {"file_urls": [request.file_url]},
- "parameters": parameters
- }
-
- # 发送请求
- response = requests.post(url, headers=headers, json=payload, timeout=30)
-
- if response.status_code != 200:
- error_data = response.json() if response.text else {}
- error_msg = error_data.get("message", f"HTTP {response.status_code}")
- logger.error(f"异步转写提交失败: {error_msg}")
- raise HTTPException(status_code=502, detail=f"提交转写任务失败: {error_msg}")
-
- data = response.json()
-
- # 检查响应
- if "output" not in data:
- raise HTTPException(status_code=502, detail="提交转写任务失败,响应格式错误")
-
- output = data["output"]
- task_id = output.get("task_id")
- task_status = output.get("task_status", "PENDING")
-
- if not task_id:
- raise HTTPException(status_code=502, detail="提交转写任务失败,未返回task_id")
-
- # 保存任务记录到数据库
- asr_task = ASRTask(
- user_id=self.user_id,
- task_id=task_id,
- model=request.model,
- file_url=request.file_url,
- status=task_status
- )
- self.db.add(asr_task)
- self.db.commit()
-
- return TaskResponse(
- task_id=task_id,
- task_status=task_status,
- submit_time=output.get("submit_time"),
- scheduled_time=output.get("scheduled_time")
- )
-
- except HTTPException:
- raise
- except requests.exceptions.Timeout:
- raise HTTPException(status_code=504, detail="提交转写任务超时")
- except Exception as e:
- logger.error(f"异步转写提交失败: {type(e).__name__}: {str(e)}")
- raise HTTPException(status_code=502, detail=f"提交转写任务失败: {str(e)}")
- async def get_task_status(self, task_id: str) -> TaskResponse:
- """
- 查询转写任务状态
-
- Args:
- task_id: 任务ID
-
- Returns:
- 任务响应对象
-
- Raises:
- HTTPException: 查询失败或任务不存在
- """
- # 首先检查本地数据库中是否存在该任务且属于当前用户
- local_task = self.db.query(ASRTask).filter(
- ASRTask.task_id == task_id,
- ASRTask.user_id == self.user_id
- ).first()
-
- if not local_task:
- raise HTTPException(status_code=404, detail="任务不存在")
-
- try:
- url = f"{self.DASHSCOPE_BASE_URL}/tasks/{task_id}"
-
- headers = {
- "Authorization": f"Bearer {self.api_key}",
- "X-DashScope-Async": "enable"
- }
-
- # 发送请求
- response = requests.get(url, headers=headers, timeout=30)
-
- if response.status_code == 404:
- raise HTTPException(status_code=404, detail="任务不存在")
-
- if response.status_code != 200:
- error_data = response.json() if response.text else {}
- error_msg = error_data.get("message", f"HTTP {response.status_code}")
- logger.error(f"查询任务状态失败: {error_msg}")
- raise HTTPException(status_code=502, detail=f"查询任务状态失败: {error_msg}")
-
- data = response.json()
-
- # 检查响应
- if "output" not in data:
- raise HTTPException(status_code=502, detail="查询任务状态失败,响应格式错误")
-
- output = data["output"]
- task_status = output.get("task_status", "UNKNOWN")
- logger.info(f"[ASR task] task_id={task_id}, status={task_status}, output_keys={list(output.keys())}")
-
- # 更新本地数据库记录
- local_task.status = task_status
- local_task.updated_at = datetime.utcnow()
-
- # 解析结果(paraformer系列返回 output.results,通用接口返回 output.result)
- result = None
- result_data = output.get("result") or (output["results"][0] if output.get("results") else None)
- if result_data:
- transcripts = []
- transcription_url = result_data.get("transcription_url")
- logger.info(f"[ASR task] output.result keys={list(result_data.keys())}, transcription_url={transcription_url}")
-
- # 优先使用直接返回的transcripts
- if result_data.get("transcripts"):
- for transcript in result_data["transcripts"]:
- sentences = []
- if transcript.get("sentences"):
- for sentence in transcript["sentences"]:
- sentences.append(TranscriptSentence(
- begin_time=sentence.get("begin_time", 0),
- end_time=sentence.get("end_time", 0),
- text=sentence.get("text", ""),
- sentence_id=sentence.get("sentence_id", 0),
- language=sentence.get("language"),
- emotion=sentence.get("emotion")
- ))
-
- transcripts.append(TranscriptChannel(
- channel_id=transcript.get("channel_id", 0),
- text=transcript.get("text", ""),
- sentences=sentences
- ))
-
- # 如果没有直接返回transcripts,但有transcription_url,则下载并解析
- elif transcription_url and task_status == "SUCCEEDED":
- try:
- transcripts, file_duration = self._download_and_parse_transcription(transcription_url)
- logger.info(f"成功下载并解析转写结果文件: {transcription_url}, duration={file_duration}s")
- # 用文件里解析到的时长更新 local_task
- if file_duration > 0:
- local_task.duration = file_duration
- except Exception as e:
- logger.error(f"下载或解析转写结果文件失败: {str(e)}")
-
- result = TranscribeResult(
- transcription_url=transcription_url,
- transcripts=transcripts
- )
-
- # 更新本地记录的结果
- if transcripts:
- local_task.result_text = transcripts[0].text
- if transcription_url:
- local_task.result_url = transcription_url
-
- # 解析使用统计
- usage = None
- if data.get("usage"):
- usage_data = data["usage"]
- seconds = usage_data.get("seconds", 0) or usage_data.get("duration", 0)
- usage = TaskUsage(seconds=seconds)
- if seconds > 0:
- local_task.duration = seconds
- # 任务完成时记录费用(API调用免费)
- if task_status == "SUCCEEDED" and not local_task.bill:
- local_task.bill = Decimal("0")
-
- self.db.commit()
-
- # 提取失败原因(FAILED 时 output 里有 code/message 字段)
- error_message = None
- if task_status == "FAILED":
- error_message = output.get("message") or output.get("code")
- # 也尝试从 result_data 里取子任务错误
- if not error_message and result_data:
- error_message = result_data.get("message") or result_data.get("code")
- if error_message:
- logger.info(f"[ASR task] FAILED reason: {error_message}")
- return TaskResponse(
- task_id=task_id,
- task_status=task_status,
- submit_time=output.get("submit_time"),
- scheduled_time=output.get("scheduled_time"),
- end_time=output.get("end_time"),
- result=result,
- usage=usage,
- error_message=error_message
- )
-
- except HTTPException:
- raise
- except requests.exceptions.Timeout:
- raise HTTPException(status_code=504, detail="查询任务状态超时")
- except Exception as e:
- logger.error(f"查询任务状态失败: {type(e).__name__}: {str(e)}")
- raise HTTPException(status_code=502, detail=f"查询任务状态失败: {str(e)}")
-
- def _download_and_parse_transcription(self, transcription_url: str):
- """
- 下载并解析转写结果JSON文件
- Returns:
- (transcripts: List[TranscriptChannel], duration_seconds: int)
- """
- try:
- response = requests.get(transcription_url, timeout=30)
- response.raise_for_status()
- transcription_data = response.json()
- logger.info(f"[ASR async] transcription_data keys={list(transcription_data.keys())}, transcripts[0]={str((transcription_data.get('transcripts') or [{}])[0])[:200]}")
- transcripts = []
- duration_seconds = 0
- for transcript in (transcription_data.get("transcripts") or []):
- sentences = []
- for sentence in (transcript.get("sentences") or []):
- sentences.append(TranscriptSentence(
- begin_time=sentence.get("begin_time", 0),
- end_time=sentence.get("end_time", 0),
- text=sentence.get("text", ""),
- sentence_id=sentence.get("sentence_id", 0),
- language=sentence.get("language"),
- emotion=sentence.get("emotion")
- ))
- transcripts.append(TranscriptChannel(
- channel_id=transcript.get("channel_id", 0),
- text=transcript.get("text", ""),
- sentences=sentences
- ))
- # 提取时长(毫秒 → 秒)
- dur_ms = (
- transcript.get("content_duration_in_milliseconds")
- or transcript.get("duration")
- or 0
- )
- if dur_ms:
- duration_seconds = max(duration_seconds, int(round(dur_ms / 1000)))
- return transcripts, duration_seconds
- except requests.exceptions.RequestException as e:
- logger.error(f"下载转写结果文件失败: {str(e)}")
- raise HTTPException(status_code=502, detail=f"下载转写结果文件失败: {str(e)}")
- except json.JSONDecodeError as e:
- logger.error(f"解析转写结果JSON失败: {str(e)}")
- raise HTTPException(status_code=502, detail=f"解析转写结果JSON失败: {str(e)}")
- except Exception as e:
- logger.error(f"处理转写结果文件失败: {type(e).__name__}: {str(e)}")
- raise HTTPException(status_code=502, detail=f"处理转写结果文件失败: {str(e)}")
-
- def get_asr_models(self) -> List[ASRModelResponse]:
- """获取ASR模型列表(从数据库动态查询 STT 分类模型)"""
- from app.models.model import ModelNew, ModelCategory
- models = self.db.query(ModelNew).filter(
- ModelNew.categories.any(int(ModelCategory.STT)),
- ModelNew.is_api_enabled == True,
- ModelNew.is_show_enabled == True,
- ).all()
- result = []
- for i, m in enumerate(models):
- code = m.model_code.lower()
- # realtime 模型只支持流式输入,不支持文件 URL,不放入列表
- if 'realtime' in code:
- continue
- # 含 filetrans / async / trans 关键词的是异步转写
- # paraformer / fun-asr 系列走 Transcription 异步接口,也标为 async
- ASYNC_PREFIXES = ("paraformer", "fun-asr")
- call_type = (
- "async"
- if any(code.startswith(p) for p in ASYNC_PREFIXES)
- or any(k in code for k in ("filetrans", "async", "trans"))
- else "sync"
- )
- features = []
- if m.features and isinstance(m.features, dict):
- features = [k for k, v in m.features.items() if v]
- elif m.features and isinstance(m.features, list):
- features = m.features
- result.append(ASRModelResponse(
- id=m.id,
- title=m.model_code,
- name=m.display_name or m.model_code,
- description=m.custom_description or m.description or "",
- call_type=call_type,
- features=features,
- ))
- return result
|