audio.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637
  1. """
  2. AI语音模块ORM定义
  3. 定义AI语音系统的数据库表结构,包含语音合成、声音复刻、语音识别和系统音色
  4. """
  5. from datetime import datetime
  6. from sqlalchemy import Column, Integer, String, Text, DateTime, Numeric, Boolean, ForeignKey, Index
  7. from sqlalchemy.dialects.postgresql import JSONB
  8. from sqlalchemy.sql import func
  9. from app.database import Base
  10. class AudioSynthesis(Base):
  11. """
  12. 语音合成记录ORM类
  13. 存储用户的TTS语音合成记录,包括文本、音色、音频URL和费用信息
  14. """
  15. __tablename__ = "audio_synthesis"
  16. # 主键
  17. id = Column(Integer, primary_key=True, autoincrement=True, comment="主键ID")
  18. # 用户ID(外键关联users表)
  19. user_id = Column(
  20. String(50),
  21. ForeignKey("aigcspace.users.id", ondelete="CASCADE"),
  22. nullable=False,
  23. comment="用户ID"
  24. )
  25. # TTS模型名称
  26. model = Column(String(100), nullable=False, comment="TTS模型名称")
  27. # 音色ID
  28. voice = Column(String(100), nullable=False, comment="音色ID")
  29. # 合成文本内容
  30. text = Column(Text, nullable=False, comment="合成文本内容")
  31. # 音频文件OSS地址
  32. audio_url = Column(String(500), nullable=False, comment="音频文件OSS地址")
  33. # 音频时长(秒)
  34. duration = Column(Numeric(10, 2), comment="音频时长(秒)")
  35. # 音频格式(mp3/wav/pcm)
  36. format = Column(String(20), comment="音频格式")
  37. # 文本字符数
  38. characters = Column(Integer, comment="文本字符数")
  39. # 费用(元)
  40. bill = Column(Numeric(10, 4), default=0, comment="费用(元)")
  41. # 用户自定义名称
  42. custom_name = Column(String(200), comment="用户自定义名称")
  43. # 完成时间
  44. completed_at = Column(DateTime, comment="任务完成时间")
  45. # 审核状态
  46. review_status = Column(String(20), default="pending", comment="审核状态: pending/approved/rejected")
  47. reviewed_by = Column(Integer, comment="审核人ID")
  48. reviewed_at = Column(DateTime, comment="审核时间")
  49. reject_reason = Column(String(500), comment="拒绝原因")
  50. # 创建时间
  51. created_at = Column(
  52. DateTime,
  53. server_default=func.now(),
  54. comment="创建时间"
  55. )
  56. # 表级配置
  57. __table_args__ = (
  58. Index('idx_audio_synthesis_user_id', 'user_id'),
  59. Index('idx_audio_synthesis_created_at', 'created_at'),
  60. {'schema': 'aigcspace', 'comment': '语音合成记录表'}
  61. )
  62. def __repr__(self):
  63. return f"<AudioSynthesis(id={self.id}, model='{self.model}', voice='{self.voice}', user_id='{self.user_id}')>"
  64. class VoiceClone(Base):
  65. """
  66. 声音复刻音色ORM类
  67. 存储用户创建的复刻音色信息,包括音色ID、状态和原始音频
  68. """
  69. __tablename__ = "voice_clone"
  70. # 主键
  71. id = Column(Integer, primary_key=True, autoincrement=True, comment="主键ID")
  72. # 用户ID(外键关联users表)
  73. user_id = Column(
  74. String(50),
  75. ForeignKey("aigcspace.users.id", ondelete="CASCADE"),
  76. nullable=False,
  77. comment="用户ID"
  78. )
  79. # DashScope返回的音色ID
  80. voice_id = Column(String(200), unique=True, nullable=False, comment="DashScope返回的音色ID")
  81. # 目标TTS模型
  82. target_model = Column(String(100), nullable=False, comment="目标TTS模型")
  83. # 音色前缀名称
  84. prefix = Column(String(20), nullable=False, comment="音色前缀名称")
  85. # 音色名称(用户输入的中文名称)
  86. voice_name = Column(String(50), comment="音色名称(用户输入的中文名称)")
  87. # 音色状态(DEPLOYING/DEPLOYED/FAILED)
  88. status = Column(String(20), default='DEPLOYING', comment="音色状态")
  89. # 费用(元)
  90. bill = Column(Numeric(10, 4), default=0, comment="费用(元)")
  91. # 原始音频文件OSS地址
  92. audio_url = Column(String(500), comment="原始音频文件OSS地址")
  93. # 创建时间
  94. created_at = Column(
  95. DateTime,
  96. server_default=func.now(),
  97. comment="创建时间"
  98. )
  99. # 更新时间
  100. updated_at = Column(
  101. DateTime,
  102. server_default=func.now(),
  103. onupdate=func.now(),
  104. comment="更新时间"
  105. )
  106. # 表级配置
  107. __table_args__ = (
  108. Index('idx_voice_clone_user_id', 'user_id'),
  109. Index('idx_voice_clone_voice_id', 'voice_id'),
  110. Index('idx_voice_clone_status', 'status'),
  111. Index('idx_voice_clone_created_at', 'created_at'),
  112. {'schema': 'aigcspace', 'comment': '声音复刻音色表'}
  113. )
  114. def __repr__(self):
  115. return f"<VoiceClone(id={self.id}, voice_id='{self.voice_id}', status='{self.status}', user_id='{self.user_id}')>"
  116. class ASRTask(Base):
  117. """
  118. 语音识别任务ORM类
  119. 存储异步语音识别任务信息,包括任务状态和识别结果
  120. """
  121. __tablename__ = "asr_task"
  122. # 主键
  123. id = Column(Integer, primary_key=True, autoincrement=True, comment="主键ID")
  124. # 用户ID(外键关联users表)
  125. user_id = Column(
  126. String(50),
  127. ForeignKey("aigcspace.users.id", ondelete="CASCADE"),
  128. nullable=False,
  129. comment="用户ID"
  130. )
  131. # DashScope返回的任务ID
  132. task_id = Column(String(100), unique=True, nullable=False, comment="DashScope返回的任务ID")
  133. # ASR模型名称
  134. model = Column(String(100), nullable=False, comment="ASR模型名称")
  135. # 音频文件URL
  136. file_url = Column(Text, nullable=False, comment="音频文件URL")
  137. # 任务状态(PENDING/RUNNING/SUCCEEDED/FAILED)
  138. status = Column(String(20), default='PENDING', comment="任务状态")
  139. # 识别结果文本
  140. result_text = Column(Text, comment="识别结果文本")
  141. # 识别结果文件URL
  142. result_url = Column(Text, comment="识别结果文件URL")
  143. # 音频时长(秒)
  144. duration = Column(Integer, comment="音频时长(秒)")
  145. # 费用(元)
  146. bill = Column(Numeric(10, 4), default=0, comment="费用(元)")
  147. # 创建时间
  148. created_at = Column(
  149. DateTime,
  150. server_default=func.now(),
  151. comment="创建时间"
  152. )
  153. # 更新时间
  154. updated_at = Column(
  155. DateTime,
  156. server_default=func.now(),
  157. onupdate=func.now(),
  158. comment="更新时间"
  159. )
  160. # 表级配置
  161. __table_args__ = (
  162. Index('idx_asr_task_user_id', 'user_id'),
  163. Index('idx_asr_task_task_id', 'task_id'),
  164. Index('idx_asr_task_status', 'status'),
  165. Index('idx_asr_task_created_at', 'created_at'),
  166. {'schema': 'aigcspace', 'comment': '语音识别任务表'}
  167. )
  168. def __repr__(self):
  169. return f"<ASRTask(id={self.id}, task_id='{self.task_id}', status='{self.status}', user_id='{self.user_id}')>"
  170. class ASRRecognition(Base):
  171. """
  172. 同步语音识别记录ORM类
  173. 存储同步语音识别的历史记录,包括识别结果、语言、情感等信息
  174. """
  175. __tablename__ = "asr_recognition"
  176. # 主键
  177. id = Column(Integer, primary_key=True, autoincrement=True, comment="主键ID")
  178. # 用户ID(外键关联users表)
  179. user_id = Column(
  180. String(50),
  181. ForeignKey("aigcspace.users.id", ondelete="CASCADE"),
  182. nullable=False,
  183. comment="用户ID"
  184. )
  185. # ASR模型名称
  186. model = Column(String(100), nullable=False, comment="ASR模型名称")
  187. # 音频来源(二选一)
  188. audio_url = Column(String(500), comment="音频文件URL")
  189. audio_base64 = Column(Text, comment="Base64编码的音频数据")
  190. # 识别参数
  191. language = Column(String(20), comment="指定语种(zh/en/ja/ko等)")
  192. enable_itn = Column(Boolean, default=False, comment="是否启用逆文本标准化")
  193. context = Column(Text, comment="上下文提示")
  194. # 识别结果
  195. result_text = Column(Text, nullable=False, comment="识别结果文本")
  196. detected_language = Column(String(20), comment="检测到的语言")
  197. emotion = Column(String(20), comment="情感类型")
  198. duration = Column(Integer, comment="音频时长(秒)")
  199. # 使用统计
  200. input_tokens = Column(Integer, default=0, comment="输入Token数")
  201. output_tokens = Column(Integer, default=0, comment="输出Token数")
  202. # 费用(元)
  203. bill = Column(Numeric(10, 4), default=0, comment="费用(元)")
  204. # 创建时间
  205. created_at = Column(
  206. DateTime,
  207. server_default=func.now(),
  208. comment="创建时间"
  209. )
  210. # 表级配置
  211. __table_args__ = (
  212. Index('idx_asr_recognition_user_id', 'user_id'),
  213. Index('idx_asr_recognition_created_at', 'created_at'),
  214. Index('idx_asr_recognition_model', 'model'),
  215. {'schema': 'aigcspace', 'comment': '同步语音识别记录表'}
  216. )
  217. def __repr__(self):
  218. return f"<ASRRecognition(id={self.id}, model='{self.model}', user_id='{self.user_id}')>"
  219. class SystemVoice(Base):
  220. """
  221. 系统预置音色ORM类
  222. 存储系统预置的TTS音色信息,包括音色特质、支持的语言和模型
  223. """
  224. __tablename__ = "system_voice"
  225. # 主键
  226. id = Column(Integer, primary_key=True, autoincrement=True, comment="主键ID")
  227. # 音色ID
  228. voice_id = Column(String(100), unique=True, nullable=False, comment="音色ID")
  229. # 音色名称
  230. name = Column(String(50), nullable=False, comment="音色名称")
  231. # 音色特质描述
  232. trait = Column(String(100), comment="音色特质描述")
  233. # 年龄段(青年/中年/老年)
  234. age = Column(String(20), comment="年龄段")
  235. # 场景分类(通用/客服/新闻/有声书等)
  236. category = Column(String(50), comment="场景分类")
  237. # 支持的语言列表
  238. languages = Column(JSONB, default=[], comment="支持的语言列表")
  239. # 支持的TTS模型列表
  240. models = Column(JSONB, default=[], comment="支持的TTS模型列表")
  241. # 是否支持SSML
  242. ssml_support = Column(Boolean, default=False, comment="是否支持SSML")
  243. # 是否支持指令控制
  244. instruct_support = Column(Boolean, default=False, comment="是否支持指令控制")
  245. # 是否支持时间戳
  246. timestamp_support = Column(Boolean, default=False, comment="是否支持时间戳")
  247. # 是否启用
  248. is_active = Column(Boolean, default=True, comment="是否启用")
  249. # 创建时间
  250. created_at = Column(
  251. DateTime,
  252. server_default=func.now(),
  253. comment="创建时间"
  254. )
  255. # 表级配置
  256. __table_args__ = (
  257. Index('idx_system_voice_voice_id', 'voice_id'),
  258. Index('idx_system_voice_category', 'category'),
  259. Index('idx_system_voice_is_active', 'is_active'),
  260. {'schema': 'aigcspace', 'comment': '系统预置音色表'}
  261. )
  262. def __repr__(self):
  263. return f"<SystemVoice(id={self.id}, voice_id='{self.voice_id}', name='{self.name}')>"
  264. # ============================================
  265. # V2 版本模型(异步统一架构)
  266. # ============================================
  267. class ASRRecognitionV2(Base):
  268. """
  269. 语音识别任务ORM类V2(异步模式)
  270. 统一异步架构,所有识别任务通过task_id追踪状态
  271. """
  272. __tablename__ = "asr_recognition_v2"
  273. # 主键
  274. id = Column(Integer, primary_key=True, autoincrement=True, comment="主键ID")
  275. # 用户ID(外键关联users表)
  276. user_id = Column(
  277. String(50),
  278. ForeignKey("aigcspace.users.id", ondelete="CASCADE"),
  279. nullable=False,
  280. comment="用户ID"
  281. )
  282. # DashScope任务ID
  283. task_id = Column(String(100), unique=True, nullable=False, comment="DashScope任务ID")
  284. # ASR模型名称
  285. model = Column(String(100), nullable=False, comment="ASR模型名称")
  286. # 音频文件URL
  287. file_url = Column(String(500), nullable=False, comment="音频文件URL")
  288. # 任务状态
  289. status = Column(String(20), default='PENDING', comment="任务状态(PENDING/PROCESSING/SUCCEEDED/FAILED)")
  290. # 识别结果
  291. result_text = Column(Text, comment="识别结果文本")
  292. result_url = Column(String(500), comment="识别结果文件URL(长文本)")
  293. # 音频时长(秒)
  294. duration = Column(Integer, comment="音频时长(秒)")
  295. # 费用(元)
  296. bill = Column(Numeric(10, 4), default=0, comment="费用(元)")
  297. # 错误信息
  298. error_message = Column(Text, comment="错误信息(失败时)")
  299. # 时间戳
  300. created_at = Column(DateTime, server_default=func.now(), comment="创建时间")
  301. updated_at = Column(DateTime, server_default=func.now(), onupdate=func.now(), comment="更新时间")
  302. completed_at = Column(DateTime, comment="完成时间")
  303. # 表级配置
  304. __table_args__ = (
  305. Index('idx_asr_recognition_v2_user_id', 'user_id'),
  306. Index('idx_asr_recognition_v2_task_id', 'task_id'),
  307. Index('idx_asr_recognition_v2_status', 'status'),
  308. Index('idx_asr_recognition_v2_created_at', 'created_at'),
  309. {'schema': 'aigcspace', 'comment': '语音识别任务表V2(异步模式)'}
  310. )
  311. def __repr__(self):
  312. return f"<ASRRecognitionV2(id={self.id}, task_id='{self.task_id}', status='{self.status}')>"
  313. class AudioSynthesisV2(Base):
  314. """
  315. 语音合成任务ORM类V2(异步模式)
  316. 统一异步架构,所有合成任务通过task_id追踪状态
  317. """
  318. __tablename__ = "audio_synthesis_v2"
  319. # 主键
  320. id = Column(Integer, primary_key=True, autoincrement=True, comment="主键ID")
  321. # 用户ID(外键关联users表)
  322. user_id = Column(
  323. String(50),
  324. ForeignKey("aigcspace.users.id", ondelete="CASCADE"),
  325. nullable=False,
  326. comment="用户ID"
  327. )
  328. # DashScope任务ID
  329. task_id = Column(String(100), unique=True, nullable=False, comment="DashScope任务ID")
  330. # TTS模型名称
  331. model = Column(String(100), nullable=False, comment="TTS模型名称")
  332. # 音色ID
  333. voice = Column(String(100), nullable=False, comment="音色ID")
  334. # 合成文本内容
  335. text = Column(Text, nullable=False, comment="合成文本内容")
  336. # 音频文件OSS地址
  337. audio_url = Column(String(500), comment="音频文件OSS地址")
  338. # 任务状态
  339. status = Column(String(20), default='PENDING', comment="任务状态(PENDING/PROCESSING/SUCCEEDED/FAILED)")
  340. # 音频时长(秒)
  341. duration = Column(Numeric(10, 2), comment="音频时长(秒)")
  342. # 音频格式(mp3/wav/pcm)
  343. format = Column(String(20), default='mp3', comment="音频格式")
  344. # 文本字符数
  345. characters = Column(Integer, comment="文本字符数")
  346. # 费用(元)
  347. bill = Column(Numeric(10, 4), default=0, comment="费用(元)")
  348. # 用户自定义名称
  349. custom_name = Column(String(200), comment="用户自定义名称")
  350. # 错误信息
  351. error_message = Column(Text, comment="错误信息(失败时)")
  352. # 时间戳
  353. created_at = Column(DateTime, server_default=func.now(), comment="创建时间")
  354. updated_at = Column(DateTime, server_default=func.now(), onupdate=func.now(), comment="更新时间")
  355. completed_at = Column(DateTime, comment="完成时间")
  356. # 表级配置
  357. __table_args__ = (
  358. Index('idx_audio_synthesis_v2_user_id', 'user_id'),
  359. Index('idx_audio_synthesis_v2_task_id', 'task_id'),
  360. Index('idx_audio_synthesis_v2_status', 'status'),
  361. Index('idx_audio_synthesis_v2_created_at', 'created_at'),
  362. {'schema': 'aigcspace', 'comment': '语音合成任务表V2(异步模式)'}
  363. )
  364. def __repr__(self):
  365. return f"<AudioSynthesisV2(id={self.id}, task_id='{self.task_id}', status='{self.status}')>"
  366. class VoiceCloneV2(Base):
  367. """
  368. 声音克隆任务ORM类V2(异步模式)
  369. 统一异步架构,所有克隆任务通过task_id追踪状态
  370. """
  371. __tablename__ = "voice_clone_v2"
  372. # 主键
  373. id = Column(Integer, primary_key=True, autoincrement=True, comment="主键ID")
  374. # 用户ID(外键关联users表)
  375. user_id = Column(
  376. String(50),
  377. ForeignKey("aigcspace.users.id", ondelete="CASCADE"),
  378. nullable=False,
  379. comment="用户ID"
  380. )
  381. # DashScope任务ID
  382. task_id = Column(String(100), unique=True, nullable=False, comment="DashScope任务ID")
  383. # 生成的音色ID(完成后才有)
  384. voice_id = Column(String(200), comment="生成的音色ID(完成后)")
  385. # 目标TTS模型
  386. target_model = Column(String(100), nullable=False, comment="目标TTS模型")
  387. # 音色前缀名称
  388. prefix = Column(String(20), nullable=False, comment="音色前缀名称")
  389. # 音色名称(用户输入的中文名称)
  390. voice_name = Column(String(50), comment="音色名称(用户输入)")
  391. # 原始音频文件OSS地址
  392. audio_url = Column(String(500), comment="原始音频文件OSS地址")
  393. # 任务状态
  394. status = Column(String(20), default='PENDING', comment="任务状态(PENDING/PROCESSING/SUCCEEDED/FAILED)")
  395. # 费用(元)
  396. bill = Column(Numeric(10, 4), default=0, comment="费用(元)")
  397. # 错误信息
  398. error_message = Column(Text, comment="错误信息(失败时)")
  399. # 时间戳
  400. created_at = Column(DateTime, server_default=func.now(), comment="创建时间")
  401. updated_at = Column(DateTime, server_default=func.now(), onupdate=func.now(), comment="更新时间")
  402. completed_at = Column(DateTime, comment="完成时间")
  403. # 表级配置
  404. __table_args__ = (
  405. Index('idx_voice_clone_v2_user_id', 'user_id'),
  406. Index('idx_voice_clone_v2_task_id', 'task_id'),
  407. Index('idx_voice_clone_v2_voice_id', 'voice_id'),
  408. Index('idx_voice_clone_v2_status', 'status'),
  409. Index('idx_voice_clone_v2_created_at', 'created_at'),
  410. {'schema': 'aigcspace', 'comment': '声音克隆任务表V2(异步模式)'}
  411. )
  412. def __repr__(self):
  413. return f"<VoiceCloneV2(id={self.id}, task_id='{self.task_id}', status='{self.status}')>"
  414. class LongTextAudio(Base):
  415. """
  416. 长文本转音频任务ORM类(异步模式)
  417. 支持长文本分段合成和拼接,通过segments字段存储分段信息
  418. """
  419. __tablename__ = "long_text_audio"
  420. # 主键
  421. id = Column(Integer, primary_key=True, autoincrement=True, comment="主键ID")
  422. # 用户ID(外键关联users表)
  423. user_id = Column(
  424. String(50),
  425. ForeignKey("aigcspace.users.id", ondelete="CASCADE"),
  426. nullable=False,
  427. comment="用户ID"
  428. )
  429. # 本地生成的任务ID
  430. task_id = Column(String(100), unique=True, nullable=False, comment="任务ID(本地生成UUID)")
  431. # TTS模型
  432. model = Column(String(100), nullable=False, comment="TTS模型")
  433. # 音色ID
  434. voice = Column(String(100), nullable=False, comment="音色ID")
  435. # 原始长文本
  436. text = Column(Text, nullable=False, comment="原始长文本")
  437. # 文本总长度
  438. text_length = Column(Integer, nullable=False, comment="文本总长度")
  439. # 分段数量
  440. segment_count = Column(Integer, default=0, comment="分段数量")
  441. # 分段信息(JSONB数组)
  442. # 格式: [{"index": 1, "text": "...", "task_id": "...", "audio_url": "...", "duration": 10.5, "status": "SUCCEEDED"}]
  443. segments = Column(JSONB, default=[], comment="分段信息(JSONB数组)")
  444. # 最终拼接音频URL
  445. audio_url = Column(String(500), comment="最终拼接音频URL")
  446. # 任务状态
  447. status = Column(String(20), default='PENDING', comment="任务状态(PENDING/PROCESSING/SUCCEEDED/FAILED)")
  448. # 进度百分比(0-100)
  449. progress = Column(Integer, default=0, comment="进度百分比(0-100)")
  450. # 总时长(秒)
  451. duration = Column(Numeric(10, 2), comment="总时长(秒)")
  452. # 音频格式
  453. format = Column(String(20), default='mp3', comment="音频格式")
  454. # 费用(元)
  455. bill = Column(Numeric(10, 4), default=0, comment="费用(元)")
  456. # 用户自定义名称
  457. custom_name = Column(String(200), comment="用户自定义名称")
  458. # 错误信息
  459. error_message = Column(Text, comment="错误信息(失败时)")
  460. # 时间戳
  461. created_at = Column(DateTime, server_default=func.now(), comment="创建时间")
  462. updated_at = Column(DateTime, server_default=func.now(), onupdate=func.now(), comment="更新时间")
  463. completed_at = Column(DateTime, comment="完成时间")
  464. # 表级配置
  465. __table_args__ = (
  466. Index('idx_long_text_audio_user_id', 'user_id'),
  467. Index('idx_long_text_audio_task_id', 'task_id'),
  468. Index('idx_long_text_audio_status', 'status'),
  469. Index('idx_long_text_audio_created_at', 'created_at'),
  470. {'schema': 'aigcspace', 'comment': '长文本转音频任务表(异步模式)'}
  471. )
  472. def __repr__(self):
  473. return f"<LongTextAudio(id={self.id}, task_id='{self.task_id}', status='{self.status}', progress={self.progress}%)>"