voice_clone_service.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518
  1. """
  2. 声音复刻服务
  3. 提供声音复刻的业务逻辑处理,集成阿里云百炼平台DashScope
  4. 需求: 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9
  5. 支持: 创建音色、查询列表、查询详情、更新音色、删除音色
  6. """
  7. import logging
  8. import os
  9. from datetime import datetime
  10. from typing import List, Optional
  11. from decimal import Decimal
  12. from sqlalchemy.orm import Session
  13. from fastapi import HTTPException, UploadFile
  14. from app.models.audio import VoiceClone
  15. from app.schemas.audio_schema import (
  16. VoiceCreateRequest, VoiceResponse, VoiceListResponse
  17. )
  18. from app.services.oss_service import get_oss_service
  19. logger = logging.getLogger(__name__)
  20. class VoiceCloneService:
  21. """声音复刻服务类"""
  22. # 支持的音频格式
  23. ALLOWED_AUDIO_TYPES = [
  24. "audio/wav",
  25. "audio/x-wav",
  26. "audio/mpeg",
  27. "audio/mp3",
  28. "audio/m4a",
  29. "audio/x-m4a",
  30. "audio/mp4"
  31. ]
  32. # 支持的文件扩展名
  33. ALLOWED_EXTENSIONS = [".wav", ".mp3", ".m4a"]
  34. # 最大文件大小(10MB)
  35. MAX_FILE_SIZE = 10 * 1024 * 1024
  36. # 有效的目标模型
  37. # VALID_TARGET_MODELS 已移除,改为动态查库验证
  38. def __init__(self, db: Session, user_id: str, api_key: str = None):
  39. """
  40. 初始化声音复刻服务
  41. Args:
  42. db: 数据库会话
  43. user_id: 用户ID
  44. api_key: 用户的API密钥(从用户数据动态加载)
  45. """
  46. self.db = db
  47. self.user_id = user_id
  48. self.api_key = api_key or os.getenv("DASHSCOPE_API_KEY")
  49. self.oss_service = get_oss_service()
  50. self._voice_service = None
  51. @property
  52. def voice_service(self):
  53. """延迟初始化VoiceEnrollmentService"""
  54. if self._voice_service is None:
  55. import dashscope
  56. from dashscope.audio.tts_v2 import VoiceEnrollmentService
  57. dashscope.api_key = self.api_key
  58. self._voice_service = VoiceEnrollmentService()
  59. return self._voice_service
  60. def validate_audio_file(self, file: UploadFile) -> None:
  61. """
  62. 验证音频文件格式和大小
  63. Args:
  64. file: 上传的文件对象
  65. Raises:
  66. HTTPException: 文件验证失败
  67. """
  68. # 检查文件是否存在
  69. if not file or not file.filename:
  70. raise HTTPException(status_code=400, detail="未提供音频文件")
  71. # 检查文件扩展名
  72. filename_lower = file.filename.lower()
  73. ext = None
  74. for allowed_ext in self.ALLOWED_EXTENSIONS:
  75. if filename_lower.endswith(allowed_ext):
  76. ext = allowed_ext
  77. break
  78. if not ext:
  79. raise HTTPException(
  80. status_code=400,
  81. detail=f"不支持的音频格式,仅支持: {', '.join(self.ALLOWED_EXTENSIONS)}"
  82. )
  83. # 检查Content-Type(如果提供)
  84. if file.content_type and file.content_type not in self.ALLOWED_AUDIO_TYPES:
  85. # 某些情况下content_type可能不准确,所以只记录警告
  86. logger.warning(f"音频文件Content-Type不匹配: {file.content_type}")
  87. # 检查文件大小
  88. file.file.seek(0, 2) # 移动到文件末尾
  89. file_size = file.file.tell()
  90. file.file.seek(0) # 重置到文件开头
  91. if file_size > self.MAX_FILE_SIZE:
  92. raise HTTPException(
  93. status_code=400,
  94. detail=f"文件大小超过限制(最大10MB),当前大小: {file_size / 1024 / 1024:.2f}MB"
  95. )
  96. if file_size == 0:
  97. raise HTTPException(status_code=400, detail="文件为空")
  98. async def create_voice(
  99. self,
  100. request: VoiceCreateRequest,
  101. file: UploadFile = None
  102. ) -> VoiceResponse:
  103. """
  104. 创建复刻音色
  105. Args:
  106. request: 创建音色请求
  107. file: 上传的音频文件(与audio_url二选一)
  108. Returns:
  109. 音色响应对象
  110. Raises:
  111. HTTPException: 创建失败
  112. """
  113. # 动态查库验证目标模型
  114. from app.models.model import ModelNew, ModelCategory
  115. valid = self.db.query(ModelNew).filter(
  116. ModelNew.model_code == request.target_model,
  117. ModelNew.categories.any(int(ModelCategory.TTS)),
  118. ModelNew.is_api_enabled == True,
  119. ).first()
  120. if not valid:
  121. raise HTTPException(status_code=400, detail=f"无效的目标模型: {request.target_model}")
  122. # 确定音频URL
  123. audio_url = request.audio_url
  124. local_audio_url = None # 本地OSS存储的URL
  125. if file:
  126. # 验证并上传文件
  127. self.validate_audio_file(file)
  128. # 读取文件内容
  129. file_content = await file.read()
  130. # 上传到OSS
  131. local_audio_url = self.oss_service.upload_file(
  132. file_content,
  133. prefix="audio/voice",
  134. original_filename=file.filename
  135. )
  136. audio_url = local_audio_url
  137. elif not audio_url:
  138. raise HTTPException(
  139. status_code=400,
  140. detail="必须提供音频文件或音频URL"
  141. )
  142. try:
  143. # 费用(API调用免费)
  144. bill = Decimal("0")
  145. # 调用DashScope API创建音色
  146. voice_id = self.voice_service.create_voice(
  147. target_model=request.target_model,
  148. prefix=request.prefix,
  149. url=audio_url,
  150. language_hints=request.language_hints
  151. )
  152. if not voice_id:
  153. raise HTTPException(status_code=502, detail="创建音色失败,未返回voice_id")
  154. # 保存到数据库
  155. voice_clone = VoiceClone(
  156. user_id=self.user_id,
  157. voice_id=voice_id,
  158. target_model=request.target_model,
  159. prefix=request.prefix,
  160. voice_name=request.voice_name,
  161. status="DEPLOYING",
  162. bill=bill,
  163. audio_url=local_audio_url or audio_url
  164. )
  165. self.db.add(voice_clone)
  166. self.db.commit()
  167. self.db.refresh(voice_clone)
  168. return VoiceResponse(
  169. voice_id=voice_id,
  170. status="DEPLOYING",
  171. target_model=request.target_model,
  172. voice_name=request.voice_name
  173. )
  174. except HTTPException:
  175. raise
  176. except Exception as e:
  177. # 尝试识别第三方错误类型并返回更精确的提示
  178. err_str = str(e).lower()
  179. logger.error(f"创建音色失败: {type(e).__name__}: {str(e)}")
  180. if 'audio.audiosilenterror' in err_str or 'silent audio' in err_str or 'silent' in err_str:
  181. # DashScope 返回静音错误
  182. raise HTTPException(status_code=400, detail="音频被判定为静音或无有效语音,请检查麦克风并重新录制(建议 ≥5 秒清晰朗读)")
  183. if 'invalid' in err_str and 'format' in err_str:
  184. raise HTTPException(status_code=400, detail="音频格式不受支持或文件损坏,请上传 WAV/MP3/M4A 格式的清晰录音")
  185. # 默认返回较友好的不可用提示,避免将第三方内部信息直接暴露给用户
  186. raise HTTPException(status_code=502, detail="创建音色失败:服务暂时不可用,请稍后重试或检查音频质量")
  187. async def list_voices(
  188. self,
  189. prefix: str = None,
  190. page: int = 0,
  191. page_size: int = 10,
  192. model: Optional[str] = None
  193. ) -> VoiceListResponse:
  194. """
  195. 查询用户音色列表
  196. Args:
  197. prefix: 按前缀筛选
  198. page: 页码(从0开始)
  199. page_size: 每页数量
  200. model: 按目标模型筛选(如:cosyvoice-v3-flash、cosyvoice-v3-plus)
  201. Returns:
  202. 音色列表响应
  203. Raises:
  204. HTTPException: 查询失败
  205. """
  206. try:
  207. # 先从本地数据库查询当前用户的音色,支持按模型筛选
  208. query = self.db.query(VoiceClone).filter(
  209. VoiceClone.user_id == self.user_id
  210. )
  211. # 按模型筛选
  212. if model:
  213. query = query.filter(VoiceClone.target_model == model)
  214. # 按前缀筛选
  215. if prefix:
  216. query = query.filter(VoiceClone.prefix.like(f"{prefix}%"))
  217. # 获取用户音色ID集合
  218. user_voices = query.all()
  219. user_voice_ids = {v.voice_id for v in user_voices}
  220. user_voice_map = {v.voice_id: v for v in user_voices}
  221. if not user_voice_ids:
  222. return VoiceListResponse(total=0, voices=[])
  223. # 调用DashScope API获取音色列表
  224. # 注意:由于需要按模型筛选,我们先从数据库筛选,然后调用API获取状态
  225. # 为了确保获取到所有相关音色,我们获取较大的页面
  226. result = self.voice_service.list_voices(
  227. prefix=prefix,
  228. page_index=0, # 从第一页开始
  229. page_size=100 # 获取足够多的数据以便筛选
  230. )
  231. # 解析结果
  232. voices = []
  233. if result:
  234. # result可能是列表或包含voices字段的对象
  235. if isinstance(result, list):
  236. voice_list = result
  237. elif hasattr(result, 'voices'):
  238. voice_list = result.voices
  239. elif isinstance(result, dict):
  240. voice_list = result.get('voices', result.get('data', []))
  241. else:
  242. voice_list = []
  243. # 过滤出属于当前用户且匹配筛选条件的音色
  244. for v in voice_list:
  245. voice_id = v.get('voice_id') if isinstance(v, dict) else getattr(v, 'voice_id', None)
  246. # 只返回属于当前用户的音色
  247. if voice_id and voice_id in user_voice_ids:
  248. # 如果指定了模型筛选,检查是否匹配
  249. if model:
  250. local_voice = user_voice_map.get(voice_id)
  251. if not local_voice or local_voice.target_model != model:
  252. continue
  253. status = v.get('status') if isinstance(v, dict) else getattr(v, 'status', 'UNKNOWN')
  254. gmt_create = v.get('gmt_create') if isinstance(v, dict) else getattr(v, 'gmt_create', None)
  255. gmt_modified = v.get('gmt_modified') if isinstance(v, dict) else getattr(v, 'gmt_modified', None)
  256. # 从本地数据库获取target_model和voice_name
  257. local_voice = user_voice_map.get(voice_id)
  258. target_model = local_voice.target_model if local_voice else None
  259. voice_name = local_voice.voice_name if local_voice else None
  260. voices.append(VoiceResponse(
  261. voice_id=voice_id,
  262. status=status,
  263. target_model=target_model,
  264. voice_name=voice_name,
  265. gmt_create=gmt_create,
  266. gmt_modified=gmt_modified
  267. ))
  268. # 同步更新本地数据库状态
  269. if local_voice and local_voice.status != status:
  270. local_voice.status = status
  271. local_voice.updated_at = datetime.utcnow()
  272. self.db.commit()
  273. # 应用分页
  274. total = len(voices)
  275. start_idx = page * page_size
  276. end_idx = start_idx + page_size
  277. paginated_voices = voices[start_idx:end_idx]
  278. return VoiceListResponse(
  279. total=total,
  280. voices=paginated_voices
  281. )
  282. except HTTPException:
  283. raise
  284. except Exception as e:
  285. logger.error(f"查询音色列表失败: {type(e).__name__}: {str(e)}")
  286. raise HTTPException(status_code=502, detail=f"查询音色列表失败: {str(e)}")
  287. async def query_voice(self, voice_id: str) -> VoiceResponse:
  288. """
  289. 查询指定音色详情
  290. Args:
  291. voice_id: 音色ID
  292. Returns:
  293. 音色响应对象
  294. Raises:
  295. HTTPException: 查询失败或音色不存在
  296. """
  297. # 验证权限:检查音色是否属于当前用户
  298. local_voice = self.db.query(VoiceClone).filter(
  299. VoiceClone.voice_id == voice_id,
  300. VoiceClone.user_id == self.user_id
  301. ).first()
  302. if not local_voice:
  303. raise HTTPException(status_code=404, detail="音色不存在")
  304. try:
  305. # 调用DashScope API查询音色详情
  306. result = self.voice_service.query_voice(voice_id=voice_id)
  307. if not result:
  308. raise HTTPException(status_code=404, detail="音色不存在")
  309. # 解析结果
  310. if isinstance(result, dict):
  311. status = result.get('status', 'UNKNOWN')
  312. target_model = result.get('target_model')
  313. resource_link = result.get('resource_link')
  314. gmt_create = result.get('gmt_create')
  315. gmt_modified = result.get('gmt_modified')
  316. else:
  317. status = getattr(result, 'status', 'UNKNOWN')
  318. target_model = getattr(result, 'target_model', None)
  319. resource_link = getattr(result, 'resource_link', None)
  320. gmt_create = getattr(result, 'gmt_create', None)
  321. gmt_modified = getattr(result, 'gmt_modified', None)
  322. # 更新本地数据库状态
  323. if local_voice.status != status:
  324. local_voice.status = status
  325. local_voice.updated_at = datetime.utcnow()
  326. self.db.commit()
  327. return VoiceResponse(
  328. voice_id=voice_id,
  329. status=status,
  330. target_model=target_model or local_voice.target_model,
  331. voice_name=local_voice.voice_name,
  332. resource_link=resource_link,
  333. gmt_create=gmt_create,
  334. gmt_modified=gmt_modified
  335. )
  336. except HTTPException:
  337. raise
  338. except Exception as e:
  339. logger.error(f"查询音色详情失败: {type(e).__name__}: {str(e)}")
  340. raise HTTPException(status_code=502, detail=f"查询音色详情失败: {str(e)}")
  341. async def update_voice(
  342. self,
  343. voice_id: str,
  344. file: UploadFile = None,
  345. audio_url: str = None
  346. ) -> VoiceResponse:
  347. """
  348. 更新音色(使用新的音频文件)
  349. Args:
  350. voice_id: 音色ID
  351. file: 新的音频文件(与audio_url二选一)
  352. audio_url: 新的音频URL(与file二选一)
  353. Returns:
  354. 音色响应对象
  355. Raises:
  356. HTTPException: 更新失败或音色不存在
  357. """
  358. # 验证权限:检查音色是否属于当前用户
  359. local_voice = self.db.query(VoiceClone).filter(
  360. VoiceClone.voice_id == voice_id,
  361. VoiceClone.user_id == self.user_id
  362. ).first()
  363. if not local_voice:
  364. raise HTTPException(status_code=404, detail="音色不存在")
  365. # 确定新的音频URL
  366. new_audio_url = audio_url
  367. if file:
  368. # 验证并上传文件
  369. self.validate_audio_file(file)
  370. # 读取文件内容
  371. file_content = await file.read()
  372. # 上传到OSS
  373. new_audio_url = self.oss_service.upload_file(
  374. file_content,
  375. prefix="audio/voice",
  376. original_filename=file.filename
  377. )
  378. elif not audio_url:
  379. raise HTTPException(
  380. status_code=400,
  381. detail="必须提供新的音频文件或音频URL"
  382. )
  383. try:
  384. # 调用DashScope API更新音色
  385. result = self.voice_service.update_voice(
  386. voice_id=voice_id,
  387. url=new_audio_url
  388. )
  389. # 更新本地数据库
  390. local_voice.audio_url = new_audio_url
  391. local_voice.status = "DEPLOYING" # 更新后需要重新审核
  392. local_voice.updated_at = datetime.utcnow()
  393. self.db.commit()
  394. return VoiceResponse(
  395. voice_id=voice_id,
  396. status="DEPLOYING"
  397. )
  398. except HTTPException:
  399. raise
  400. except Exception as e:
  401. logger.error(f"更新音色失败: {type(e).__name__}: {str(e)}")
  402. raise HTTPException(status_code=502, detail=f"更新音色失败: {str(e)}")
  403. async def delete_voice(self, voice_id: str) -> None:
  404. """
  405. 删除音色
  406. Args:
  407. voice_id: 音色ID
  408. Raises:
  409. HTTPException: 删除失败或音色不存在
  410. """
  411. # 验证权限:检查音色是否属于当前用户
  412. local_voice = self.db.query(VoiceClone).filter(
  413. VoiceClone.voice_id == voice_id,
  414. VoiceClone.user_id == self.user_id
  415. ).first()
  416. if not local_voice:
  417. raise HTTPException(status_code=404, detail="音色不存在")
  418. try:
  419. # 调用DashScope API删除音色
  420. self.voice_service.delete_voice(voice_id=voice_id)
  421. # 删除本地数据库记录
  422. self.db.delete(local_voice)
  423. self.db.commit()
  424. except HTTPException:
  425. raise
  426. except Exception as e:
  427. logger.error(f"删除音色失败: {type(e).__name__}: {str(e)}")
  428. raise HTTPException(status_code=502, detail=f"删除音色失败: {str(e)}")