| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518 |
- """
- 声音复刻服务
- 提供声音复刻的业务逻辑处理,集成阿里云百炼平台DashScope
- 需求: 3.1, 3.2, 3.3, 3.4, 3.5, 3.6, 3.7, 3.8, 3.9
- 支持: 创建音色、查询列表、查询详情、更新音色、删除音色
- """
- import logging
- import os
- from datetime import datetime
- from typing import List, Optional
- from decimal import Decimal
- from sqlalchemy.orm import Session
- from fastapi import HTTPException, UploadFile
- from app.models.audio import VoiceClone
- from app.schemas.audio_schema import (
- VoiceCreateRequest, VoiceResponse, VoiceListResponse
- )
- from app.services.oss_service import get_oss_service
- logger = logging.getLogger(__name__)
- class VoiceCloneService:
- """声音复刻服务类"""
-
- # 支持的音频格式
- ALLOWED_AUDIO_TYPES = [
- "audio/wav",
- "audio/x-wav",
- "audio/mpeg",
- "audio/mp3",
- "audio/m4a",
- "audio/x-m4a",
- "audio/mp4"
- ]
-
- # 支持的文件扩展名
- ALLOWED_EXTENSIONS = [".wav", ".mp3", ".m4a"]
-
- # 最大文件大小(10MB)
- MAX_FILE_SIZE = 10 * 1024 * 1024
-
- # 有效的目标模型
- # VALID_TARGET_MODELS 已移除,改为动态查库验证
-
- def __init__(self, db: Session, user_id: str, api_key: str = None):
- """
- 初始化声音复刻服务
-
- Args:
- db: 数据库会话
- user_id: 用户ID
- api_key: 用户的API密钥(从用户数据动态加载)
- """
- self.db = db
- self.user_id = user_id
- self.api_key = api_key or os.getenv("DASHSCOPE_API_KEY")
- self.oss_service = get_oss_service()
- self._voice_service = None
-
- @property
- def voice_service(self):
- """延迟初始化VoiceEnrollmentService"""
- if self._voice_service is None:
- import dashscope
- from dashscope.audio.tts_v2 import VoiceEnrollmentService
- dashscope.api_key = self.api_key
- self._voice_service = VoiceEnrollmentService()
- return self._voice_service
-
- def validate_audio_file(self, file: UploadFile) -> None:
- """
- 验证音频文件格式和大小
-
- Args:
- file: 上传的文件对象
-
- Raises:
- HTTPException: 文件验证失败
- """
- # 检查文件是否存在
- if not file or not file.filename:
- raise HTTPException(status_code=400, detail="未提供音频文件")
-
- # 检查文件扩展名
- filename_lower = file.filename.lower()
- ext = None
- for allowed_ext in self.ALLOWED_EXTENSIONS:
- if filename_lower.endswith(allowed_ext):
- ext = allowed_ext
- break
-
- if not ext:
- raise HTTPException(
- status_code=400,
- detail=f"不支持的音频格式,仅支持: {', '.join(self.ALLOWED_EXTENSIONS)}"
- )
-
- # 检查Content-Type(如果提供)
- if file.content_type and file.content_type not in self.ALLOWED_AUDIO_TYPES:
- # 某些情况下content_type可能不准确,所以只记录警告
- logger.warning(f"音频文件Content-Type不匹配: {file.content_type}")
-
- # 检查文件大小
- file.file.seek(0, 2) # 移动到文件末尾
- file_size = file.file.tell()
- file.file.seek(0) # 重置到文件开头
-
- if file_size > self.MAX_FILE_SIZE:
- raise HTTPException(
- status_code=400,
- detail=f"文件大小超过限制(最大10MB),当前大小: {file_size / 1024 / 1024:.2f}MB"
- )
-
- if file_size == 0:
- raise HTTPException(status_code=400, detail="文件为空")
-
- async def create_voice(
- self,
- request: VoiceCreateRequest,
- file: UploadFile = None
- ) -> VoiceResponse:
- """
- 创建复刻音色
-
- Args:
- request: 创建音色请求
- file: 上传的音频文件(与audio_url二选一)
-
- Returns:
- 音色响应对象
-
- Raises:
- HTTPException: 创建失败
- """
- # 动态查库验证目标模型
- from app.models.model import ModelNew, ModelCategory
- valid = self.db.query(ModelNew).filter(
- ModelNew.model_code == request.target_model,
- ModelNew.categories.any(int(ModelCategory.TTS)),
- ModelNew.is_api_enabled == True,
- ).first()
- if not valid:
- raise HTTPException(status_code=400, detail=f"无效的目标模型: {request.target_model}")
-
- # 确定音频URL
- audio_url = request.audio_url
- local_audio_url = None # 本地OSS存储的URL
-
- if file:
- # 验证并上传文件
- self.validate_audio_file(file)
-
- # 读取文件内容
- file_content = await file.read()
-
- # 上传到OSS
- local_audio_url = self.oss_service.upload_file(
- file_content,
- prefix="audio/voice",
- original_filename=file.filename
- )
- audio_url = local_audio_url
- elif not audio_url:
- raise HTTPException(
- status_code=400,
- detail="必须提供音频文件或音频URL"
- )
-
- try:
- # 费用(API调用免费)
- bill = Decimal("0")
- # 调用DashScope API创建音色
- voice_id = self.voice_service.create_voice(
- target_model=request.target_model,
- prefix=request.prefix,
- url=audio_url,
- language_hints=request.language_hints
- )
- if not voice_id:
- raise HTTPException(status_code=502, detail="创建音色失败,未返回voice_id")
- # 保存到数据库
- voice_clone = VoiceClone(
- user_id=self.user_id,
- voice_id=voice_id,
- target_model=request.target_model,
- prefix=request.prefix,
- voice_name=request.voice_name,
- status="DEPLOYING",
- bill=bill,
- audio_url=local_audio_url or audio_url
- )
- self.db.add(voice_clone)
- self.db.commit()
- self.db.refresh(voice_clone)
- return VoiceResponse(
- voice_id=voice_id,
- status="DEPLOYING",
- target_model=request.target_model,
- voice_name=request.voice_name
- )
-
- except HTTPException:
- raise
- except Exception as e:
- # 尝试识别第三方错误类型并返回更精确的提示
- err_str = str(e).lower()
- logger.error(f"创建音色失败: {type(e).__name__}: {str(e)}")
- if 'audio.audiosilenterror' in err_str or 'silent audio' in err_str or 'silent' in err_str:
- # DashScope 返回静音错误
- raise HTTPException(status_code=400, detail="音频被判定为静音或无有效语音,请检查麦克风并重新录制(建议 ≥5 秒清晰朗读)")
- if 'invalid' in err_str and 'format' in err_str:
- raise HTTPException(status_code=400, detail="音频格式不受支持或文件损坏,请上传 WAV/MP3/M4A 格式的清晰录音")
- # 默认返回较友好的不可用提示,避免将第三方内部信息直接暴露给用户
- raise HTTPException(status_code=502, detail="创建音色失败:服务暂时不可用,请稍后重试或检查音频质量")
-
- async def list_voices(
- self,
- prefix: str = None,
- page: int = 0,
- page_size: int = 10,
- model: Optional[str] = None
- ) -> VoiceListResponse:
- """
- 查询用户音色列表
-
- Args:
- prefix: 按前缀筛选
- page: 页码(从0开始)
- page_size: 每页数量
- model: 按目标模型筛选(如:cosyvoice-v3-flash、cosyvoice-v3-plus)
-
- Returns:
- 音色列表响应
-
- Raises:
- HTTPException: 查询失败
- """
- try:
- # 先从本地数据库查询当前用户的音色,支持按模型筛选
- query = self.db.query(VoiceClone).filter(
- VoiceClone.user_id == self.user_id
- )
-
- # 按模型筛选
- if model:
- query = query.filter(VoiceClone.target_model == model)
-
- # 按前缀筛选
- if prefix:
- query = query.filter(VoiceClone.prefix.like(f"{prefix}%"))
-
- # 获取用户音色ID集合
- user_voices = query.all()
- user_voice_ids = {v.voice_id for v in user_voices}
- user_voice_map = {v.voice_id: v for v in user_voices}
-
- if not user_voice_ids:
- return VoiceListResponse(total=0, voices=[])
-
- # 调用DashScope API获取音色列表
- # 注意:由于需要按模型筛选,我们先从数据库筛选,然后调用API获取状态
- # 为了确保获取到所有相关音色,我们获取较大的页面
- result = self.voice_service.list_voices(
- prefix=prefix,
- page_index=0, # 从第一页开始
- page_size=100 # 获取足够多的数据以便筛选
- )
-
- # 解析结果
- voices = []
-
- if result:
- # result可能是列表或包含voices字段的对象
- if isinstance(result, list):
- voice_list = result
- elif hasattr(result, 'voices'):
- voice_list = result.voices
- elif isinstance(result, dict):
- voice_list = result.get('voices', result.get('data', []))
- else:
- voice_list = []
-
- # 过滤出属于当前用户且匹配筛选条件的音色
- for v in voice_list:
- voice_id = v.get('voice_id') if isinstance(v, dict) else getattr(v, 'voice_id', None)
-
- # 只返回属于当前用户的音色
- if voice_id and voice_id in user_voice_ids:
- # 如果指定了模型筛选,检查是否匹配
- if model:
- local_voice = user_voice_map.get(voice_id)
- if not local_voice or local_voice.target_model != model:
- continue
-
- status = v.get('status') if isinstance(v, dict) else getattr(v, 'status', 'UNKNOWN')
- gmt_create = v.get('gmt_create') if isinstance(v, dict) else getattr(v, 'gmt_create', None)
- gmt_modified = v.get('gmt_modified') if isinstance(v, dict) else getattr(v, 'gmt_modified', None)
-
- # 从本地数据库获取target_model和voice_name
- local_voice = user_voice_map.get(voice_id)
- target_model = local_voice.target_model if local_voice else None
- voice_name = local_voice.voice_name if local_voice else None
-
- voices.append(VoiceResponse(
- voice_id=voice_id,
- status=status,
- target_model=target_model,
- voice_name=voice_name,
- gmt_create=gmt_create,
- gmt_modified=gmt_modified
- ))
-
- # 同步更新本地数据库状态
- if local_voice and local_voice.status != status:
- local_voice.status = status
- local_voice.updated_at = datetime.utcnow()
-
- self.db.commit()
-
- # 应用分页
- total = len(voices)
- start_idx = page * page_size
- end_idx = start_idx + page_size
- paginated_voices = voices[start_idx:end_idx]
-
- return VoiceListResponse(
- total=total,
- voices=paginated_voices
- )
-
- except HTTPException:
- raise
- except Exception as e:
- logger.error(f"查询音色列表失败: {type(e).__name__}: {str(e)}")
- raise HTTPException(status_code=502, detail=f"查询音色列表失败: {str(e)}")
-
- async def query_voice(self, voice_id: str) -> VoiceResponse:
- """
- 查询指定音色详情
-
- Args:
- voice_id: 音色ID
-
- Returns:
- 音色响应对象
-
- Raises:
- HTTPException: 查询失败或音色不存在
- """
- # 验证权限:检查音色是否属于当前用户
- local_voice = self.db.query(VoiceClone).filter(
- VoiceClone.voice_id == voice_id,
- VoiceClone.user_id == self.user_id
- ).first()
-
- if not local_voice:
- raise HTTPException(status_code=404, detail="音色不存在")
-
- try:
- # 调用DashScope API查询音色详情
- result = self.voice_service.query_voice(voice_id=voice_id)
-
- if not result:
- raise HTTPException(status_code=404, detail="音色不存在")
-
- # 解析结果
- if isinstance(result, dict):
- status = result.get('status', 'UNKNOWN')
- target_model = result.get('target_model')
- resource_link = result.get('resource_link')
- gmt_create = result.get('gmt_create')
- gmt_modified = result.get('gmt_modified')
- else:
- status = getattr(result, 'status', 'UNKNOWN')
- target_model = getattr(result, 'target_model', None)
- resource_link = getattr(result, 'resource_link', None)
- gmt_create = getattr(result, 'gmt_create', None)
- gmt_modified = getattr(result, 'gmt_modified', None)
-
- # 更新本地数据库状态
- if local_voice.status != status:
- local_voice.status = status
- local_voice.updated_at = datetime.utcnow()
- self.db.commit()
-
- return VoiceResponse(
- voice_id=voice_id,
- status=status,
- target_model=target_model or local_voice.target_model,
- voice_name=local_voice.voice_name,
- resource_link=resource_link,
- gmt_create=gmt_create,
- gmt_modified=gmt_modified
- )
-
- except HTTPException:
- raise
- except Exception as e:
- logger.error(f"查询音色详情失败: {type(e).__name__}: {str(e)}")
- raise HTTPException(status_code=502, detail=f"查询音色详情失败: {str(e)}")
- async def update_voice(
- self,
- voice_id: str,
- file: UploadFile = None,
- audio_url: str = None
- ) -> VoiceResponse:
- """
- 更新音色(使用新的音频文件)
-
- Args:
- voice_id: 音色ID
- file: 新的音频文件(与audio_url二选一)
- audio_url: 新的音频URL(与file二选一)
-
- Returns:
- 音色响应对象
-
- Raises:
- HTTPException: 更新失败或音色不存在
- """
- # 验证权限:检查音色是否属于当前用户
- local_voice = self.db.query(VoiceClone).filter(
- VoiceClone.voice_id == voice_id,
- VoiceClone.user_id == self.user_id
- ).first()
-
- if not local_voice:
- raise HTTPException(status_code=404, detail="音色不存在")
-
- # 确定新的音频URL
- new_audio_url = audio_url
-
- if file:
- # 验证并上传文件
- self.validate_audio_file(file)
-
- # 读取文件内容
- file_content = await file.read()
-
- # 上传到OSS
- new_audio_url = self.oss_service.upload_file(
- file_content,
- prefix="audio/voice",
- original_filename=file.filename
- )
- elif not audio_url:
- raise HTTPException(
- status_code=400,
- detail="必须提供新的音频文件或音频URL"
- )
-
- try:
- # 调用DashScope API更新音色
- result = self.voice_service.update_voice(
- voice_id=voice_id,
- url=new_audio_url
- )
-
- # 更新本地数据库
- local_voice.audio_url = new_audio_url
- local_voice.status = "DEPLOYING" # 更新后需要重新审核
- local_voice.updated_at = datetime.utcnow()
- self.db.commit()
-
- return VoiceResponse(
- voice_id=voice_id,
- status="DEPLOYING"
- )
-
- except HTTPException:
- raise
- except Exception as e:
- logger.error(f"更新音色失败: {type(e).__name__}: {str(e)}")
- raise HTTPException(status_code=502, detail=f"更新音色失败: {str(e)}")
-
- async def delete_voice(self, voice_id: str) -> None:
- """
- 删除音色
-
- Args:
- voice_id: 音色ID
-
- Raises:
- HTTPException: 删除失败或音色不存在
- """
- # 验证权限:检查音色是否属于当前用户
- local_voice = self.db.query(VoiceClone).filter(
- VoiceClone.voice_id == voice_id,
- VoiceClone.user_id == self.user_id
- ).first()
-
- if not local_voice:
- raise HTTPException(status_code=404, detail="音色不存在")
-
- try:
- # 调用DashScope API删除音色
- self.voice_service.delete_voice(voice_id=voice_id)
-
- # 删除本地数据库记录
- self.db.delete(local_voice)
- self.db.commit()
-
- except HTTPException:
- raise
- except Exception as e:
- logger.error(f"删除音色失败: {type(e).__name__}: {str(e)}")
- raise HTTPException(status_code=502, detail=f"删除音色失败: {str(e)}")
|