| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637 |
- """
- AI语音模块ORM定义
- 定义AI语音系统的数据库表结构,包含语音合成、声音复刻、语音识别和系统音色
- """
- from datetime import datetime
- from sqlalchemy import Column, Integer, String, Text, DateTime, Numeric, Boolean, ForeignKey, Index
- from sqlalchemy.dialects.postgresql import JSONB
- from sqlalchemy.sql import func
- from app.database import Base
- class AudioSynthesis(Base):
- """
- 语音合成记录ORM类
- 存储用户的TTS语音合成记录,包括文本、音色、音频URL和费用信息
- """
- __tablename__ = "audio_synthesis"
- # 主键
- id = Column(Integer, primary_key=True, autoincrement=True, comment="主键ID")
- # 用户ID(外键关联users表)
- user_id = Column(
- String(50),
- ForeignKey("aigcspace.users.id", ondelete="CASCADE"),
- nullable=False,
- comment="用户ID"
- )
- # TTS模型名称
- model = Column(String(100), nullable=False, comment="TTS模型名称")
- # 音色ID
- voice = Column(String(100), nullable=False, comment="音色ID")
- # 合成文本内容
- text = Column(Text, nullable=False, comment="合成文本内容")
- # 音频文件OSS地址
- audio_url = Column(String(500), nullable=False, comment="音频文件OSS地址")
- # 音频时长(秒)
- duration = Column(Numeric(10, 2), comment="音频时长(秒)")
- # 音频格式(mp3/wav/pcm)
- format = Column(String(20), comment="音频格式")
- # 文本字符数
- characters = Column(Integer, comment="文本字符数")
- # 费用(元)
- bill = Column(Numeric(10, 4), default=0, comment="费用(元)")
- # 用户自定义名称
- custom_name = Column(String(200), comment="用户自定义名称")
- # 完成时间
- completed_at = Column(DateTime, comment="任务完成时间")
-
- # 审核状态
- review_status = Column(String(20), default="pending", comment="审核状态: pending/approved/rejected")
- reviewed_by = Column(Integer, comment="审核人ID")
- reviewed_at = Column(DateTime, comment="审核时间")
- reject_reason = Column(String(500), comment="拒绝原因")
- # 创建时间
- created_at = Column(
- DateTime,
- server_default=func.now(),
- comment="创建时间"
- )
- # 表级配置
- __table_args__ = (
- Index('idx_audio_synthesis_user_id', 'user_id'),
- Index('idx_audio_synthesis_created_at', 'created_at'),
- {'schema': 'aigcspace', 'comment': '语音合成记录表'}
- )
- def __repr__(self):
- return f"<AudioSynthesis(id={self.id}, model='{self.model}', voice='{self.voice}', user_id='{self.user_id}')>"
- class VoiceClone(Base):
- """
- 声音复刻音色ORM类
- 存储用户创建的复刻音色信息,包括音色ID、状态和原始音频
- """
- __tablename__ = "voice_clone"
- # 主键
- id = Column(Integer, primary_key=True, autoincrement=True, comment="主键ID")
- # 用户ID(外键关联users表)
- user_id = Column(
- String(50),
- ForeignKey("aigcspace.users.id", ondelete="CASCADE"),
- nullable=False,
- comment="用户ID"
- )
- # DashScope返回的音色ID
- voice_id = Column(String(200), unique=True, nullable=False, comment="DashScope返回的音色ID")
- # 目标TTS模型
- target_model = Column(String(100), nullable=False, comment="目标TTS模型")
- # 音色前缀名称
- prefix = Column(String(20), nullable=False, comment="音色前缀名称")
- # 音色名称(用户输入的中文名称)
- voice_name = Column(String(50), comment="音色名称(用户输入的中文名称)")
- # 音色状态(DEPLOYING/DEPLOYED/FAILED)
- status = Column(String(20), default='DEPLOYING', comment="音色状态")
- # 费用(元)
- bill = Column(Numeric(10, 4), default=0, comment="费用(元)")
- # 原始音频文件OSS地址
- audio_url = Column(String(500), comment="原始音频文件OSS地址")
- # 创建时间
- created_at = Column(
- DateTime,
- server_default=func.now(),
- comment="创建时间"
- )
- # 更新时间
- updated_at = Column(
- DateTime,
- server_default=func.now(),
- onupdate=func.now(),
- comment="更新时间"
- )
- # 表级配置
- __table_args__ = (
- Index('idx_voice_clone_user_id', 'user_id'),
- Index('idx_voice_clone_voice_id', 'voice_id'),
- Index('idx_voice_clone_status', 'status'),
- Index('idx_voice_clone_created_at', 'created_at'),
- {'schema': 'aigcspace', 'comment': '声音复刻音色表'}
- )
- def __repr__(self):
- return f"<VoiceClone(id={self.id}, voice_id='{self.voice_id}', status='{self.status}', user_id='{self.user_id}')>"
- class ASRTask(Base):
- """
- 语音识别任务ORM类
- 存储异步语音识别任务信息,包括任务状态和识别结果
- """
- __tablename__ = "asr_task"
- # 主键
- id = Column(Integer, primary_key=True, autoincrement=True, comment="主键ID")
- # 用户ID(外键关联users表)
- user_id = Column(
- String(50),
- ForeignKey("aigcspace.users.id", ondelete="CASCADE"),
- nullable=False,
- comment="用户ID"
- )
- # DashScope返回的任务ID
- task_id = Column(String(100), unique=True, nullable=False, comment="DashScope返回的任务ID")
- # ASR模型名称
- model = Column(String(100), nullable=False, comment="ASR模型名称")
- # 音频文件URL
- file_url = Column(Text, nullable=False, comment="音频文件URL")
- # 任务状态(PENDING/RUNNING/SUCCEEDED/FAILED)
- status = Column(String(20), default='PENDING', comment="任务状态")
- # 识别结果文本
- result_text = Column(Text, comment="识别结果文本")
- # 识别结果文件URL
- result_url = Column(Text, comment="识别结果文件URL")
- # 音频时长(秒)
- duration = Column(Integer, comment="音频时长(秒)")
- # 费用(元)
- bill = Column(Numeric(10, 4), default=0, comment="费用(元)")
- # 创建时间
- created_at = Column(
- DateTime,
- server_default=func.now(),
- comment="创建时间"
- )
- # 更新时间
- updated_at = Column(
- DateTime,
- server_default=func.now(),
- onupdate=func.now(),
- comment="更新时间"
- )
- # 表级配置
- __table_args__ = (
- Index('idx_asr_task_user_id', 'user_id'),
- Index('idx_asr_task_task_id', 'task_id'),
- Index('idx_asr_task_status', 'status'),
- Index('idx_asr_task_created_at', 'created_at'),
- {'schema': 'aigcspace', 'comment': '语音识别任务表'}
- )
- def __repr__(self):
- return f"<ASRTask(id={self.id}, task_id='{self.task_id}', status='{self.status}', user_id='{self.user_id}')>"
- class ASRRecognition(Base):
- """
- 同步语音识别记录ORM类
- 存储同步语音识别的历史记录,包括识别结果、语言、情感等信息
- """
- __tablename__ = "asr_recognition"
- # 主键
- id = Column(Integer, primary_key=True, autoincrement=True, comment="主键ID")
- # 用户ID(外键关联users表)
- user_id = Column(
- String(50),
- ForeignKey("aigcspace.users.id", ondelete="CASCADE"),
- nullable=False,
- comment="用户ID"
- )
- # ASR模型名称
- model = Column(String(100), nullable=False, comment="ASR模型名称")
- # 音频来源(二选一)
- audio_url = Column(String(500), comment="音频文件URL")
- audio_base64 = Column(Text, comment="Base64编码的音频数据")
- # 识别参数
- language = Column(String(20), comment="指定语种(zh/en/ja/ko等)")
- enable_itn = Column(Boolean, default=False, comment="是否启用逆文本标准化")
- context = Column(Text, comment="上下文提示")
- # 识别结果
- result_text = Column(Text, nullable=False, comment="识别结果文本")
- detected_language = Column(String(20), comment="检测到的语言")
- emotion = Column(String(20), comment="情感类型")
- duration = Column(Integer, comment="音频时长(秒)")
- # 使用统计
- input_tokens = Column(Integer, default=0, comment="输入Token数")
- output_tokens = Column(Integer, default=0, comment="输出Token数")
- # 费用(元)
- bill = Column(Numeric(10, 4), default=0, comment="费用(元)")
- # 创建时间
- created_at = Column(
- DateTime,
- server_default=func.now(),
- comment="创建时间"
- )
- # 表级配置
- __table_args__ = (
- Index('idx_asr_recognition_user_id', 'user_id'),
- Index('idx_asr_recognition_created_at', 'created_at'),
- Index('idx_asr_recognition_model', 'model'),
- {'schema': 'aigcspace', 'comment': '同步语音识别记录表'}
- )
- def __repr__(self):
- return f"<ASRRecognition(id={self.id}, model='{self.model}', user_id='{self.user_id}')>"
- class SystemVoice(Base):
- """
- 系统预置音色ORM类
- 存储系统预置的TTS音色信息,包括音色特质、支持的语言和模型
- """
- __tablename__ = "system_voice"
- # 主键
- id = Column(Integer, primary_key=True, autoincrement=True, comment="主键ID")
- # 音色ID
- voice_id = Column(String(100), unique=True, nullable=False, comment="音色ID")
- # 音色名称
- name = Column(String(50), nullable=False, comment="音色名称")
- # 音色特质描述
- trait = Column(String(100), comment="音色特质描述")
- # 年龄段(青年/中年/老年)
- age = Column(String(20), comment="年龄段")
- # 场景分类(通用/客服/新闻/有声书等)
- category = Column(String(50), comment="场景分类")
- # 支持的语言列表
- languages = Column(JSONB, default=[], comment="支持的语言列表")
- # 支持的TTS模型列表
- models = Column(JSONB, default=[], comment="支持的TTS模型列表")
- # 是否支持SSML
- ssml_support = Column(Boolean, default=False, comment="是否支持SSML")
- # 是否支持指令控制
- instruct_support = Column(Boolean, default=False, comment="是否支持指令控制")
- # 是否支持时间戳
- timestamp_support = Column(Boolean, default=False, comment="是否支持时间戳")
- # 是否启用
- is_active = Column(Boolean, default=True, comment="是否启用")
- # 创建时间
- created_at = Column(
- DateTime,
- server_default=func.now(),
- comment="创建时间"
- )
- # 表级配置
- __table_args__ = (
- Index('idx_system_voice_voice_id', 'voice_id'),
- Index('idx_system_voice_category', 'category'),
- Index('idx_system_voice_is_active', 'is_active'),
- {'schema': 'aigcspace', 'comment': '系统预置音色表'}
- )
- def __repr__(self):
- return f"<SystemVoice(id={self.id}, voice_id='{self.voice_id}', name='{self.name}')>"
- # ============================================
- # V2 版本模型(异步统一架构)
- # ============================================
- class ASRRecognitionV2(Base):
- """
- 语音识别任务ORM类V2(异步模式)
- 统一异步架构,所有识别任务通过task_id追踪状态
- """
- __tablename__ = "asr_recognition_v2"
- # 主键
- id = Column(Integer, primary_key=True, autoincrement=True, comment="主键ID")
- # 用户ID(外键关联users表)
- user_id = Column(
- String(50),
- ForeignKey("aigcspace.users.id", ondelete="CASCADE"),
- nullable=False,
- comment="用户ID"
- )
- # DashScope任务ID
- task_id = Column(String(100), unique=True, nullable=False, comment="DashScope任务ID")
- # ASR模型名称
- model = Column(String(100), nullable=False, comment="ASR模型名称")
- # 音频文件URL
- file_url = Column(String(500), nullable=False, comment="音频文件URL")
- # 任务状态
- status = Column(String(20), default='PENDING', comment="任务状态(PENDING/PROCESSING/SUCCEEDED/FAILED)")
- # 识别结果
- result_text = Column(Text, comment="识别结果文本")
- result_url = Column(String(500), comment="识别结果文件URL(长文本)")
- # 音频时长(秒)
- duration = Column(Integer, comment="音频时长(秒)")
- # 费用(元)
- bill = Column(Numeric(10, 4), default=0, comment="费用(元)")
- # 错误信息
- error_message = Column(Text, comment="错误信息(失败时)")
- # 时间戳
- created_at = Column(DateTime, server_default=func.now(), comment="创建时间")
- updated_at = Column(DateTime, server_default=func.now(), onupdate=func.now(), comment="更新时间")
- completed_at = Column(DateTime, comment="完成时间")
- # 表级配置
- __table_args__ = (
- Index('idx_asr_recognition_v2_user_id', 'user_id'),
- Index('idx_asr_recognition_v2_task_id', 'task_id'),
- Index('idx_asr_recognition_v2_status', 'status'),
- Index('idx_asr_recognition_v2_created_at', 'created_at'),
- {'schema': 'aigcspace', 'comment': '语音识别任务表V2(异步模式)'}
- )
- def __repr__(self):
- return f"<ASRRecognitionV2(id={self.id}, task_id='{self.task_id}', status='{self.status}')>"
- class AudioSynthesisV2(Base):
- """
- 语音合成任务ORM类V2(异步模式)
- 统一异步架构,所有合成任务通过task_id追踪状态
- """
- __tablename__ = "audio_synthesis_v2"
- # 主键
- id = Column(Integer, primary_key=True, autoincrement=True, comment="主键ID")
- # 用户ID(外键关联users表)
- user_id = Column(
- String(50),
- ForeignKey("aigcspace.users.id", ondelete="CASCADE"),
- nullable=False,
- comment="用户ID"
- )
- # DashScope任务ID
- task_id = Column(String(100), unique=True, nullable=False, comment="DashScope任务ID")
- # TTS模型名称
- model = Column(String(100), nullable=False, comment="TTS模型名称")
- # 音色ID
- voice = Column(String(100), nullable=False, comment="音色ID")
- # 合成文本内容
- text = Column(Text, nullable=False, comment="合成文本内容")
- # 音频文件OSS地址
- audio_url = Column(String(500), comment="音频文件OSS地址")
- # 任务状态
- status = Column(String(20), default='PENDING', comment="任务状态(PENDING/PROCESSING/SUCCEEDED/FAILED)")
- # 音频时长(秒)
- duration = Column(Numeric(10, 2), comment="音频时长(秒)")
- # 音频格式(mp3/wav/pcm)
- format = Column(String(20), default='mp3', comment="音频格式")
- # 文本字符数
- characters = Column(Integer, comment="文本字符数")
- # 费用(元)
- bill = Column(Numeric(10, 4), default=0, comment="费用(元)")
- # 用户自定义名称
- custom_name = Column(String(200), comment="用户自定义名称")
- # 错误信息
- error_message = Column(Text, comment="错误信息(失败时)")
- # 时间戳
- created_at = Column(DateTime, server_default=func.now(), comment="创建时间")
- updated_at = Column(DateTime, server_default=func.now(), onupdate=func.now(), comment="更新时间")
- completed_at = Column(DateTime, comment="完成时间")
- # 表级配置
- __table_args__ = (
- Index('idx_audio_synthesis_v2_user_id', 'user_id'),
- Index('idx_audio_synthesis_v2_task_id', 'task_id'),
- Index('idx_audio_synthesis_v2_status', 'status'),
- Index('idx_audio_synthesis_v2_created_at', 'created_at'),
- {'schema': 'aigcspace', 'comment': '语音合成任务表V2(异步模式)'}
- )
- def __repr__(self):
- return f"<AudioSynthesisV2(id={self.id}, task_id='{self.task_id}', status='{self.status}')>"
- class VoiceCloneV2(Base):
- """
- 声音克隆任务ORM类V2(异步模式)
- 统一异步架构,所有克隆任务通过task_id追踪状态
- """
- __tablename__ = "voice_clone_v2"
- # 主键
- id = Column(Integer, primary_key=True, autoincrement=True, comment="主键ID")
- # 用户ID(外键关联users表)
- user_id = Column(
- String(50),
- ForeignKey("aigcspace.users.id", ondelete="CASCADE"),
- nullable=False,
- comment="用户ID"
- )
- # DashScope任务ID
- task_id = Column(String(100), unique=True, nullable=False, comment="DashScope任务ID")
- # 生成的音色ID(完成后才有)
- voice_id = Column(String(200), comment="生成的音色ID(完成后)")
- # 目标TTS模型
- target_model = Column(String(100), nullable=False, comment="目标TTS模型")
- # 音色前缀名称
- prefix = Column(String(20), nullable=False, comment="音色前缀名称")
- # 音色名称(用户输入的中文名称)
- voice_name = Column(String(50), comment="音色名称(用户输入)")
- # 原始音频文件OSS地址
- audio_url = Column(String(500), comment="原始音频文件OSS地址")
- # 任务状态
- status = Column(String(20), default='PENDING', comment="任务状态(PENDING/PROCESSING/SUCCEEDED/FAILED)")
- # 费用(元)
- bill = Column(Numeric(10, 4), default=0, comment="费用(元)")
- # 错误信息
- error_message = Column(Text, comment="错误信息(失败时)")
- # 时间戳
- created_at = Column(DateTime, server_default=func.now(), comment="创建时间")
- updated_at = Column(DateTime, server_default=func.now(), onupdate=func.now(), comment="更新时间")
- completed_at = Column(DateTime, comment="完成时间")
- # 表级配置
- __table_args__ = (
- Index('idx_voice_clone_v2_user_id', 'user_id'),
- Index('idx_voice_clone_v2_task_id', 'task_id'),
- Index('idx_voice_clone_v2_voice_id', 'voice_id'),
- Index('idx_voice_clone_v2_status', 'status'),
- Index('idx_voice_clone_v2_created_at', 'created_at'),
- {'schema': 'aigcspace', 'comment': '声音克隆任务表V2(异步模式)'}
- )
- def __repr__(self):
- return f"<VoiceCloneV2(id={self.id}, task_id='{self.task_id}', status='{self.status}')>"
- class LongTextAudio(Base):
- """
- 长文本转音频任务ORM类(异步模式)
- 支持长文本分段合成和拼接,通过segments字段存储分段信息
- """
- __tablename__ = "long_text_audio"
- # 主键
- id = Column(Integer, primary_key=True, autoincrement=True, comment="主键ID")
- # 用户ID(外键关联users表)
- user_id = Column(
- String(50),
- ForeignKey("aigcspace.users.id", ondelete="CASCADE"),
- nullable=False,
- comment="用户ID"
- )
- # 本地生成的任务ID
- task_id = Column(String(100), unique=True, nullable=False, comment="任务ID(本地生成UUID)")
- # TTS模型
- model = Column(String(100), nullable=False, comment="TTS模型")
- # 音色ID
- voice = Column(String(100), nullable=False, comment="音色ID")
- # 原始长文本
- text = Column(Text, nullable=False, comment="原始长文本")
- # 文本总长度
- text_length = Column(Integer, nullable=False, comment="文本总长度")
- # 分段数量
- segment_count = Column(Integer, default=0, comment="分段数量")
- # 分段信息(JSONB数组)
- # 格式: [{"index": 1, "text": "...", "task_id": "...", "audio_url": "...", "duration": 10.5, "status": "SUCCEEDED"}]
- segments = Column(JSONB, default=[], comment="分段信息(JSONB数组)")
- # 最终拼接音频URL
- audio_url = Column(String(500), comment="最终拼接音频URL")
- # 任务状态
- status = Column(String(20), default='PENDING', comment="任务状态(PENDING/PROCESSING/SUCCEEDED/FAILED)")
- # 进度百分比(0-100)
- progress = Column(Integer, default=0, comment="进度百分比(0-100)")
- # 总时长(秒)
- duration = Column(Numeric(10, 2), comment="总时长(秒)")
- # 音频格式
- format = Column(String(20), default='mp3', comment="音频格式")
- # 费用(元)
- bill = Column(Numeric(10, 4), default=0, comment="费用(元)")
- # 用户自定义名称
- custom_name = Column(String(200), comment="用户自定义名称")
- # 错误信息
- error_message = Column(Text, comment="错误信息(失败时)")
- # 时间戳
- created_at = Column(DateTime, server_default=func.now(), comment="创建时间")
- updated_at = Column(DateTime, server_default=func.now(), onupdate=func.now(), comment="更新时间")
- completed_at = Column(DateTime, comment="完成时间")
- # 表级配置
- __table_args__ = (
- Index('idx_long_text_audio_user_id', 'user_id'),
- Index('idx_long_text_audio_task_id', 'task_id'),
- Index('idx_long_text_audio_status', 'status'),
- Index('idx_long_text_audio_created_at', 'created_at'),
- {'schema': 'aigcspace', 'comment': '长文本转音频任务表(异步模式)'}
- )
- def __repr__(self):
- return f"<LongTextAudio(id={self.id}, task_id='{self.task_id}', status='{self.status}', progress={self.progress}%)>"
|