| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315 |
- """
- AI语音V2版本数据传输对象定义
- 定义语音合成、语音识别、声音克隆和长文本转音频的V2版本API请求和响应数据结构
- 采用统一异步架构,所有任务通过task_id追踪状态
- """
- from typing import List, Optional, Dict, Any
- from datetime import datetime
- from pydantic import BaseModel, Field, field_validator
- from decimal import Decimal
- # ==================== 通用Schema ====================
- class TaskStatus(BaseModel):
- """任务状态枚举"""
- PENDING: str = "PENDING"
- PROCESSING: str = "PROCESSING"
- SUCCEEDED: str = "SUCCEEDED"
- FAILED: str = "FAILED"
- class TaskStatusResponse(BaseModel):
- """任务状态查询响应"""
- task_id: str = Field(..., description="任务ID")
- status: str = Field(..., description="任务状态:PENDING/PROCESSING/SUCCEEDED/FAILED")
- progress: Optional[int] = Field(default=None, description="进度百分比(0-100),仅长文本任务有效")
- error_message: Optional[str] = Field(default=None, description="错误信息(失败时)")
- created_at: datetime = Field(..., description="创建时间")
- updated_at: datetime = Field(..., description="更新时间")
- completed_at: Optional[datetime] = Field(default=None, description="完成时间")
- # ==================== 语音识别V2 Schema ====================
- class ASRRecognitionV2CreateRequest(BaseModel):
- """创建语音识别任务请求(V2异步)"""
- model: str = Field(..., description="ASR模型名称")
- file_url: str = Field(..., description="音频文件URL,必须公网可访问")
-
- @field_validator('file_url')
- @classmethod
- def validate_file_url(cls, v):
- if not v or not v.strip():
- raise ValueError('音频文件URL不能为空')
- if not v.startswith(('http://', 'https://')):
- raise ValueError('音频文件URL必须是有效的HTTP/HTTPS地址')
- return v
- class ASRRecognitionV2Response(BaseModel):
- """语音识别任务响应(V2)"""
- id: int = Field(..., description="记录ID")
- task_id: str = Field(..., description="DashScope任务ID")
- model: str = Field(..., description="ASR模型名称")
- file_url: str = Field(..., description="音频文件URL")
- status: str = Field(..., description="任务状态")
- result_text: Optional[str] = Field(default=None, description="识别结果文本")
- result_url: Optional[str] = Field(default=None, description="识别结果文件URL(长文本)")
- duration: Optional[int] = Field(default=None, description="音频时长(秒)")
- bill: Decimal = Field(default=Decimal('0'), description="费用(元)")
- error_message: Optional[str] = Field(default=None, description="错误信息")
- created_at: datetime = Field(..., description="创建时间")
- updated_at: datetime = Field(..., description="更新时间")
- completed_at: Optional[datetime] = Field(default=None, description="完成时间")
- class Config:
- from_attributes = True
- class ASRRecognitionV2ListResponse(BaseModel):
- """语音识别任务列表响应"""
- total: int = Field(..., description="总记录数")
- items: List[ASRRecognitionV2Response] = Field(default=[], description="任务列表")
- # ==================== 语音合成V2 Schema ====================
- class AudioSynthesisV2CreateRequest(BaseModel):
- """创建语音合成任务请求(V2异步)"""
- model: str = Field(..., description="TTS模型名称")
- voice: str = Field(..., description="音色ID")
- text: str = Field(..., description="合成文本内容")
- format: str = Field(default="mp3", description="音频格式:mp3/wav/pcm")
- custom_name: Optional[str] = Field(default=None, max_length=200, description="用户自定义名称")
-
- @field_validator('text')
- @classmethod
- def validate_text(cls, v):
- if not v or not v.strip():
- raise ValueError('合成文本不能为空')
- if len(v) > 500:
- raise ValueError('短文本合成不支持超过500字符,请使用长文本转音频功能')
- return v
-
- @field_validator('format')
- @classmethod
- def validate_format(cls, v):
- allowed_formats = ['mp3', 'wav', 'pcm']
- if v not in allowed_formats:
- raise ValueError(f'不支持的音频格式,允许的格式: {allowed_formats}')
- return v
- class AudioSynthesisV2Response(BaseModel):
- """语音合成任务响应(V2)"""
- id: int = Field(..., description="记录ID")
- task_id: str = Field(..., description="DashScope任务ID")
- model: str = Field(..., description="TTS模型名称")
- voice: str = Field(..., description="音色ID")
- text: str = Field(..., description="合成文本内容")
- audio_url: Optional[str] = Field(default=None, description="音频文件OSS地址")
- status: str = Field(..., description="任务状态")
- duration: Optional[Decimal] = Field(default=None, description="音频时长(秒)")
- format: str = Field(default="mp3", description="音频格式")
- characters: Optional[int] = Field(default=None, description="文本字符数")
- bill: Decimal = Field(default=Decimal('0'), description="费用(元)")
- custom_name: Optional[str] = Field(default=None, description="用户自定义名称")
- error_message: Optional[str] = Field(default=None, description="错误信息")
- created_at: datetime = Field(..., description="创建时间")
- updated_at: datetime = Field(..., description="更新时间")
- completed_at: Optional[datetime] = Field(default=None, description="完成时间")
- class Config:
- from_attributes = True
- class AudioSynthesisV2ListResponse(BaseModel):
- """语音合成任务列表响应"""
- total: int = Field(..., description="总记录数")
- items: List[AudioSynthesisV2Response] = Field(default=[], description="任务列表")
- # ==================== 声音克隆V2 Schema ====================
- class VoiceCloneV2CreateRequest(BaseModel):
- """创建声音克隆任务请求(V2异步)"""
- target_model: str = Field(..., description="目标TTS模型")
- prefix: str = Field(..., max_length=20, description="音色前缀名称")
- voice_name: Optional[str] = Field(default=None, max_length=50, description="音色名称(用户输入)")
- audio_url: str = Field(..., description="原始音频文件OSS地址")
-
- @field_validator('prefix')
- @classmethod
- def validate_prefix(cls, v):
- import re
- if not re.match(r'^[a-zA-Z0-9_]+$', v):
- raise ValueError('前缀仅允许数字、字母和下划线')
- return v
-
- @field_validator('target_model')
- @classmethod
- def validate_target_model(cls, v):
- allowed_models = ['cosyvoice-v3-plus', 'cosyvoice-v3-flash', 'cosyvoice-v2']
- if v not in allowed_models:
- raise ValueError(f'不支持的目标模型,允许的模型: {allowed_models}')
- return v
- class VoiceCloneV2Response(BaseModel):
- """声音克隆任务响应(V2)"""
- id: int = Field(..., description="记录ID")
- task_id: str = Field(..., description="DashScope任务ID")
- voice_id: Optional[str] = Field(default=None, description="生成的音色ID(完成后)")
- target_model: str = Field(..., description="目标TTS模型")
- prefix: str = Field(..., description="音色前缀名称")
- voice_name: Optional[str] = Field(default=None, description="音色名称")
- audio_url: Optional[str] = Field(default=None, description="原始音频文件OSS地址")
- status: str = Field(..., description="任务状态")
- bill: Decimal = Field(default=Decimal('0'), description="费用(元)")
- error_message: Optional[str] = Field(default=None, description="错误信息")
- created_at: datetime = Field(..., description="创建时间")
- updated_at: datetime = Field(..., description="更新时间")
- completed_at: Optional[datetime] = Field(default=None, description="完成时间")
- class Config:
- from_attributes = True
- class VoiceCloneV2ListResponse(BaseModel):
- """声音克隆任务列表响应"""
- total: int = Field(..., description="总记录数")
- items: List[VoiceCloneV2Response] = Field(default=[], description="任务列表")
- # ==================== 长文本转音频 Schema ====================
- class SegmentInfo(BaseModel):
- """分段信息"""
- index: int = Field(..., description="分段索引(从1开始)")
- text: str = Field(..., description="分段文本内容")
- task_id: Optional[str] = Field(default=None, description="DashScope任务ID")
- audio_url: Optional[str] = Field(default=None, description="分段音频URL")
- duration: Optional[float] = Field(default=None, description="分段时长(秒)")
- status: str = Field(default="PENDING", description="分段状态")
- class LongTextAudioCreateRequest(BaseModel):
- """创建长文本转音频任务请求"""
- model: str = Field(..., description="TTS模型")
- voice: str = Field(..., description="音色ID")
- text: str = Field(..., description="原始长文本")
- format: str = Field(default="mp3", description="音频格式")
- custom_name: Optional[str] = Field(default=None, max_length=200, description="用户自定义名称")
-
- @field_validator('text')
- @classmethod
- def validate_text(cls, v):
- if not v or not v.strip():
- raise ValueError('文本不能为空')
- if len(v) <= 500:
- raise ValueError('文本长度不超过500字符,请使用普通语音合成功能')
- return v
-
- @field_validator('format')
- @classmethod
- def validate_format(cls, v):
- allowed_formats = ['mp3', 'wav', 'pcm']
- if v not in allowed_formats:
- raise ValueError(f'不支持的音频格式,允许的格式: {allowed_formats}')
- return v
- class LongTextAudioResponse(BaseModel):
- """长文本转音频任务响应"""
- id: int = Field(..., description="记录ID")
- task_id: str = Field(..., description="任务ID(本地生成UUID)")
- model: str = Field(..., description="TTS模型")
- voice: str = Field(..., description="音色ID")
- text: str = Field(..., description="原始长文本")
- text_length: int = Field(..., description="文本总长度")
- segment_count: int = Field(default=0, description="分段数量")
- segments: List[SegmentInfo] = Field(default=[], description="分段信息列表")
- audio_url: Optional[str] = Field(default=None, description="最终拼接音频URL")
- status: str = Field(..., description="任务状态")
- progress: int = Field(default=0, description="进度百分比(0-100)")
- duration: Optional[Decimal] = Field(default=None, description="总时长(秒)")
- format: str = Field(default="mp3", description="音频格式")
- bill: Decimal = Field(default=Decimal('0'), description="费用(元)")
- custom_name: Optional[str] = Field(default=None, description="用户自定义名称")
- error_message: Optional[str] = Field(default=None, description="错误信息")
- created_at: datetime = Field(..., description="创建时间")
- updated_at: datetime = Field(..., description="更新时间")
- completed_at: Optional[datetime] = Field(default=None, description="完成时间")
- class Config:
- from_attributes = True
- class LongTextAudioListResponse(BaseModel):
- """长文本转音频任务列表响应"""
- total: int = Field(..., description="总记录数")
- items: List[LongTextAudioResponse] = Field(default=[], description="任务列表")
- # ==================== 通用查询参数 ====================
- class TaskListQueryParams(BaseModel):
- """任务列表查询参数"""
- status: Optional[str] = Field(default=None, description="按状态筛选:PENDING/PROCESSING/SUCCEEDED/FAILED")
- page: int = Field(default=1, ge=1, description="页码(从1开始)")
- page_size: int = Field(default=20, ge=1, le=100, description="每页数量(1-100)")
- order_by: str = Field(default="created_at", description="排序字段")
- order: str = Field(default="desc", description="排序方向:asc/desc")
-
- @field_validator('status')
- @classmethod
- def validate_status(cls, v):
- if v is not None:
- allowed_status = ['PENDING', 'PROCESSING', 'SUCCEEDED', 'FAILED']
- if v not in allowed_status:
- raise ValueError(f'不支持的状态值,允许的状态: {allowed_status}')
- return v
-
- @field_validator('order')
- @classmethod
- def validate_order(cls, v):
- if v not in ['asc', 'desc']:
- raise ValueError('排序方向只能是 asc 或 desc')
- return v
- # ==================== 批量操作 Schema ====================
- class BatchDeleteRequest(BaseModel):
- """批量删除请求"""
- task_ids: List[str] = Field(..., min_length=1, max_length=100, description="任务ID列表(最多100个)")
- class BatchDeleteResponse(BaseModel):
- """批量删除响应"""
- success_count: int = Field(..., description="成功删除数量")
- failed_count: int = Field(..., description="失败数量")
- failed_tasks: List[Dict[str, str]] = Field(default=[], description="失败的任务列表")
- # ==================== 统计信息 Schema ====================
- class TaskStatistics(BaseModel):
- """任务统计信息"""
- total: int = Field(..., description="总任务数")
- pending: int = Field(..., description="待处理任务数")
- processing: int = Field(..., description="处理中任务数")
- succeeded: int = Field(..., description="成功任务数")
- failed: int = Field(..., description="失败任务数")
- total_bill: Decimal = Field(..., description="总费用(元)")
- total_duration: Optional[Decimal] = Field(default=None, description="总时长(秒)")
- class UserAudioStatisticsResponse(BaseModel):
- """用户语音统计响应"""
- asr_stats: TaskStatistics = Field(..., description="语音识别统计")
- tts_stats: TaskStatistics = Field(..., description="语音合成统计")
- voice_clone_stats: TaskStatistics = Field(..., description="声音克隆统计")
- long_text_stats: TaskStatistics = Field(..., description="长文本转音频统计")
|