audio_v2.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315
  1. """
  2. AI语音V2版本数据传输对象定义
  3. 定义语音合成、语音识别、声音克隆和长文本转音频的V2版本API请求和响应数据结构
  4. 采用统一异步架构,所有任务通过task_id追踪状态
  5. """
  6. from typing import List, Optional, Dict, Any
  7. from datetime import datetime
  8. from pydantic import BaseModel, Field, field_validator
  9. from decimal import Decimal
  10. # ==================== 通用Schema ====================
  11. class TaskStatus(BaseModel):
  12. """任务状态枚举"""
  13. PENDING: str = "PENDING"
  14. PROCESSING: str = "PROCESSING"
  15. SUCCEEDED: str = "SUCCEEDED"
  16. FAILED: str = "FAILED"
  17. class TaskStatusResponse(BaseModel):
  18. """任务状态查询响应"""
  19. task_id: str = Field(..., description="任务ID")
  20. status: str = Field(..., description="任务状态:PENDING/PROCESSING/SUCCEEDED/FAILED")
  21. progress: Optional[int] = Field(default=None, description="进度百分比(0-100),仅长文本任务有效")
  22. error_message: Optional[str] = Field(default=None, description="错误信息(失败时)")
  23. created_at: datetime = Field(..., description="创建时间")
  24. updated_at: datetime = Field(..., description="更新时间")
  25. completed_at: Optional[datetime] = Field(default=None, description="完成时间")
  26. # ==================== 语音识别V2 Schema ====================
  27. class ASRRecognitionV2CreateRequest(BaseModel):
  28. """创建语音识别任务请求(V2异步)"""
  29. model: str = Field(..., description="ASR模型名称")
  30. file_url: str = Field(..., description="音频文件URL,必须公网可访问")
  31. @field_validator('file_url')
  32. @classmethod
  33. def validate_file_url(cls, v):
  34. if not v or not v.strip():
  35. raise ValueError('音频文件URL不能为空')
  36. if not v.startswith(('http://', 'https://')):
  37. raise ValueError('音频文件URL必须是有效的HTTP/HTTPS地址')
  38. return v
  39. class ASRRecognitionV2Response(BaseModel):
  40. """语音识别任务响应(V2)"""
  41. id: int = Field(..., description="记录ID")
  42. task_id: str = Field(..., description="DashScope任务ID")
  43. model: str = Field(..., description="ASR模型名称")
  44. file_url: str = Field(..., description="音频文件URL")
  45. status: str = Field(..., description="任务状态")
  46. result_text: Optional[str] = Field(default=None, description="识别结果文本")
  47. result_url: Optional[str] = Field(default=None, description="识别结果文件URL(长文本)")
  48. duration: Optional[int] = Field(default=None, description="音频时长(秒)")
  49. bill: Decimal = Field(default=Decimal('0'), description="费用(元)")
  50. error_message: Optional[str] = Field(default=None, description="错误信息")
  51. created_at: datetime = Field(..., description="创建时间")
  52. updated_at: datetime = Field(..., description="更新时间")
  53. completed_at: Optional[datetime] = Field(default=None, description="完成时间")
  54. class Config:
  55. from_attributes = True
  56. class ASRRecognitionV2ListResponse(BaseModel):
  57. """语音识别任务列表响应"""
  58. total: int = Field(..., description="总记录数")
  59. items: List[ASRRecognitionV2Response] = Field(default=[], description="任务列表")
  60. # ==================== 语音合成V2 Schema ====================
  61. class AudioSynthesisV2CreateRequest(BaseModel):
  62. """创建语音合成任务请求(V2异步)"""
  63. model: str = Field(..., description="TTS模型名称")
  64. voice: str = Field(..., description="音色ID")
  65. text: str = Field(..., description="合成文本内容")
  66. format: str = Field(default="mp3", description="音频格式:mp3/wav/pcm")
  67. custom_name: Optional[str] = Field(default=None, max_length=200, description="用户自定义名称")
  68. @field_validator('text')
  69. @classmethod
  70. def validate_text(cls, v):
  71. if not v or not v.strip():
  72. raise ValueError('合成文本不能为空')
  73. if len(v) > 500:
  74. raise ValueError('短文本合成不支持超过500字符,请使用长文本转音频功能')
  75. return v
  76. @field_validator('format')
  77. @classmethod
  78. def validate_format(cls, v):
  79. allowed_formats = ['mp3', 'wav', 'pcm']
  80. if v not in allowed_formats:
  81. raise ValueError(f'不支持的音频格式,允许的格式: {allowed_formats}')
  82. return v
  83. class AudioSynthesisV2Response(BaseModel):
  84. """语音合成任务响应(V2)"""
  85. id: int = Field(..., description="记录ID")
  86. task_id: str = Field(..., description="DashScope任务ID")
  87. model: str = Field(..., description="TTS模型名称")
  88. voice: str = Field(..., description="音色ID")
  89. text: str = Field(..., description="合成文本内容")
  90. audio_url: Optional[str] = Field(default=None, description="音频文件OSS地址")
  91. status: str = Field(..., description="任务状态")
  92. duration: Optional[Decimal] = Field(default=None, description="音频时长(秒)")
  93. format: str = Field(default="mp3", description="音频格式")
  94. characters: Optional[int] = Field(default=None, description="文本字符数")
  95. bill: Decimal = Field(default=Decimal('0'), description="费用(元)")
  96. custom_name: Optional[str] = Field(default=None, description="用户自定义名称")
  97. error_message: Optional[str] = Field(default=None, description="错误信息")
  98. created_at: datetime = Field(..., description="创建时间")
  99. updated_at: datetime = Field(..., description="更新时间")
  100. completed_at: Optional[datetime] = Field(default=None, description="完成时间")
  101. class Config:
  102. from_attributes = True
  103. class AudioSynthesisV2ListResponse(BaseModel):
  104. """语音合成任务列表响应"""
  105. total: int = Field(..., description="总记录数")
  106. items: List[AudioSynthesisV2Response] = Field(default=[], description="任务列表")
  107. # ==================== 声音克隆V2 Schema ====================
  108. class VoiceCloneV2CreateRequest(BaseModel):
  109. """创建声音克隆任务请求(V2异步)"""
  110. target_model: str = Field(..., description="目标TTS模型")
  111. prefix: str = Field(..., max_length=20, description="音色前缀名称")
  112. voice_name: Optional[str] = Field(default=None, max_length=50, description="音色名称(用户输入)")
  113. audio_url: str = Field(..., description="原始音频文件OSS地址")
  114. @field_validator('prefix')
  115. @classmethod
  116. def validate_prefix(cls, v):
  117. import re
  118. if not re.match(r'^[a-zA-Z0-9_]+$', v):
  119. raise ValueError('前缀仅允许数字、字母和下划线')
  120. return v
  121. @field_validator('target_model')
  122. @classmethod
  123. def validate_target_model(cls, v):
  124. allowed_models = ['cosyvoice-v3-plus', 'cosyvoice-v3-flash', 'cosyvoice-v2']
  125. if v not in allowed_models:
  126. raise ValueError(f'不支持的目标模型,允许的模型: {allowed_models}')
  127. return v
  128. class VoiceCloneV2Response(BaseModel):
  129. """声音克隆任务响应(V2)"""
  130. id: int = Field(..., description="记录ID")
  131. task_id: str = Field(..., description="DashScope任务ID")
  132. voice_id: Optional[str] = Field(default=None, description="生成的音色ID(完成后)")
  133. target_model: str = Field(..., description="目标TTS模型")
  134. prefix: str = Field(..., description="音色前缀名称")
  135. voice_name: Optional[str] = Field(default=None, description="音色名称")
  136. audio_url: Optional[str] = Field(default=None, description="原始音频文件OSS地址")
  137. status: str = Field(..., description="任务状态")
  138. bill: Decimal = Field(default=Decimal('0'), description="费用(元)")
  139. error_message: Optional[str] = Field(default=None, description="错误信息")
  140. created_at: datetime = Field(..., description="创建时间")
  141. updated_at: datetime = Field(..., description="更新时间")
  142. completed_at: Optional[datetime] = Field(default=None, description="完成时间")
  143. class Config:
  144. from_attributes = True
  145. class VoiceCloneV2ListResponse(BaseModel):
  146. """声音克隆任务列表响应"""
  147. total: int = Field(..., description="总记录数")
  148. items: List[VoiceCloneV2Response] = Field(default=[], description="任务列表")
  149. # ==================== 长文本转音频 Schema ====================
  150. class SegmentInfo(BaseModel):
  151. """分段信息"""
  152. index: int = Field(..., description="分段索引(从1开始)")
  153. text: str = Field(..., description="分段文本内容")
  154. task_id: Optional[str] = Field(default=None, description="DashScope任务ID")
  155. audio_url: Optional[str] = Field(default=None, description="分段音频URL")
  156. duration: Optional[float] = Field(default=None, description="分段时长(秒)")
  157. status: str = Field(default="PENDING", description="分段状态")
  158. class LongTextAudioCreateRequest(BaseModel):
  159. """创建长文本转音频任务请求"""
  160. model: str = Field(..., description="TTS模型")
  161. voice: str = Field(..., description="音色ID")
  162. text: str = Field(..., description="原始长文本")
  163. format: str = Field(default="mp3", description="音频格式")
  164. custom_name: Optional[str] = Field(default=None, max_length=200, description="用户自定义名称")
  165. @field_validator('text')
  166. @classmethod
  167. def validate_text(cls, v):
  168. if not v or not v.strip():
  169. raise ValueError('文本不能为空')
  170. if len(v) <= 500:
  171. raise ValueError('文本长度不超过500字符,请使用普通语音合成功能')
  172. return v
  173. @field_validator('format')
  174. @classmethod
  175. def validate_format(cls, v):
  176. allowed_formats = ['mp3', 'wav', 'pcm']
  177. if v not in allowed_formats:
  178. raise ValueError(f'不支持的音频格式,允许的格式: {allowed_formats}')
  179. return v
  180. class LongTextAudioResponse(BaseModel):
  181. """长文本转音频任务响应"""
  182. id: int = Field(..., description="记录ID")
  183. task_id: str = Field(..., description="任务ID(本地生成UUID)")
  184. model: str = Field(..., description="TTS模型")
  185. voice: str = Field(..., description="音色ID")
  186. text: str = Field(..., description="原始长文本")
  187. text_length: int = Field(..., description="文本总长度")
  188. segment_count: int = Field(default=0, description="分段数量")
  189. segments: List[SegmentInfo] = Field(default=[], description="分段信息列表")
  190. audio_url: Optional[str] = Field(default=None, description="最终拼接音频URL")
  191. status: str = Field(..., description="任务状态")
  192. progress: int = Field(default=0, description="进度百分比(0-100)")
  193. duration: Optional[Decimal] = Field(default=None, description="总时长(秒)")
  194. format: str = Field(default="mp3", description="音频格式")
  195. bill: Decimal = Field(default=Decimal('0'), description="费用(元)")
  196. custom_name: Optional[str] = Field(default=None, description="用户自定义名称")
  197. error_message: Optional[str] = Field(default=None, description="错误信息")
  198. created_at: datetime = Field(..., description="创建时间")
  199. updated_at: datetime = Field(..., description="更新时间")
  200. completed_at: Optional[datetime] = Field(default=None, description="完成时间")
  201. class Config:
  202. from_attributes = True
  203. class LongTextAudioListResponse(BaseModel):
  204. """长文本转音频任务列表响应"""
  205. total: int = Field(..., description="总记录数")
  206. items: List[LongTextAudioResponse] = Field(default=[], description="任务列表")
  207. # ==================== 通用查询参数 ====================
  208. class TaskListQueryParams(BaseModel):
  209. """任务列表查询参数"""
  210. status: Optional[str] = Field(default=None, description="按状态筛选:PENDING/PROCESSING/SUCCEEDED/FAILED")
  211. page: int = Field(default=1, ge=1, description="页码(从1开始)")
  212. page_size: int = Field(default=20, ge=1, le=100, description="每页数量(1-100)")
  213. order_by: str = Field(default="created_at", description="排序字段")
  214. order: str = Field(default="desc", description="排序方向:asc/desc")
  215. @field_validator('status')
  216. @classmethod
  217. def validate_status(cls, v):
  218. if v is not None:
  219. allowed_status = ['PENDING', 'PROCESSING', 'SUCCEEDED', 'FAILED']
  220. if v not in allowed_status:
  221. raise ValueError(f'不支持的状态值,允许的状态: {allowed_status}')
  222. return v
  223. @field_validator('order')
  224. @classmethod
  225. def validate_order(cls, v):
  226. if v not in ['asc', 'desc']:
  227. raise ValueError('排序方向只能是 asc 或 desc')
  228. return v
  229. # ==================== 批量操作 Schema ====================
  230. class BatchDeleteRequest(BaseModel):
  231. """批量删除请求"""
  232. task_ids: List[str] = Field(..., min_length=1, max_length=100, description="任务ID列表(最多100个)")
  233. class BatchDeleteResponse(BaseModel):
  234. """批量删除响应"""
  235. success_count: int = Field(..., description="成功删除数量")
  236. failed_count: int = Field(..., description="失败数量")
  237. failed_tasks: List[Dict[str, str]] = Field(default=[], description="失败的任务列表")
  238. # ==================== 统计信息 Schema ====================
  239. class TaskStatistics(BaseModel):
  240. """任务统计信息"""
  241. total: int = Field(..., description="总任务数")
  242. pending: int = Field(..., description="待处理任务数")
  243. processing: int = Field(..., description="处理中任务数")
  244. succeeded: int = Field(..., description="成功任务数")
  245. failed: int = Field(..., description="失败任务数")
  246. total_bill: Decimal = Field(..., description="总费用(元)")
  247. total_duration: Optional[Decimal] = Field(default=None, description="总时长(秒)")
  248. class UserAudioStatisticsResponse(BaseModel):
  249. """用户语音统计响应"""
  250. asr_stats: TaskStatistics = Field(..., description="语音识别统计")
  251. tts_stats: TaskStatistics = Field(..., description="语音合成统计")
  252. voice_clone_stats: TaskStatistics = Field(..., description="声音克隆统计")
  253. long_text_stats: TaskStatistics = Field(..., description="长文本转音频统计")