openai_compat.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401
  1. """
  2. OpenAI 兼容 API 数据传输对象定义
  3. 完整支持 OpenAI Chat Completions 和 Models 接口规范
  4. """
  5. from typing import Any, Dict, List, Literal, Optional, Union
  6. from pydantic import BaseModel, Field
  7. # ─────────────────────────────────────────────
  8. # 消息内容(支持多模态)
  9. # ─────────────────────────────────────────────
  10. class ImageUrl(BaseModel):
  11. url: str = Field(..., description="图片 URL 或 base64 data URI")
  12. detail: Optional[Literal["auto", "low", "high"]] = Field("auto", description="图片细节级别")
  13. class ContentPartText(BaseModel):
  14. type: Literal["text"]
  15. text: str
  16. class ContentPartImage(BaseModel):
  17. type: Literal["image_url"]
  18. image_url: ImageUrl
  19. class ContentPartAudio(BaseModel):
  20. type: Literal["input_audio"]
  21. input_audio: Dict[str, str] = Field(..., description="音频数据,包含 data 和 format 字段")
  22. ContentPart = Union[ContentPartText, ContentPartImage, ContentPartAudio]
  23. # ─────────────────────────────────────────────
  24. # Tool / Function Calling
  25. # ─────────────────────────────────────────────
  26. class FunctionDefinition(BaseModel):
  27. name: str = Field(..., description="函数名称")
  28. description: Optional[str] = Field(None, description="函数描述")
  29. parameters: Optional[Dict[str, Any]] = Field(None, description="JSON Schema 格式的参数定义")
  30. strict: Optional[bool] = Field(None, description="是否严格模式")
  31. class Tool(BaseModel):
  32. type: Literal["function"] = "function"
  33. function: FunctionDefinition
  34. class ToolChoiceFunction(BaseModel):
  35. name: str
  36. class ToolChoiceObject(BaseModel):
  37. type: Literal["function"] = "function"
  38. function: ToolChoiceFunction
  39. ToolChoice = Union[Literal["none", "auto", "required"], ToolChoiceObject]
  40. # ─────────────────────────────────────────────
  41. # 消息对象(支持多角色 + 多模态内容)
  42. # ─────────────────────────────────────────────
  43. class ToolCallFunction(BaseModel):
  44. name: str
  45. arguments: str # JSON 字符串
  46. class ToolCall(BaseModel):
  47. id: str
  48. type: Literal["function"] = "function"
  49. function: ToolCallFunction
  50. class Message(BaseModel):
  51. role: Literal["system", "user", "assistant", "tool"] = Field(..., description="消息角色")
  52. # content 支持纯文本或多模态内容数组,assistant 回复 tool_calls 时可为 null
  53. content: Optional[Union[str, List[ContentPart]]] = Field(None, description="消息内容")
  54. name: Optional[str] = Field(None, description="消息发送者名称")
  55. # assistant 角色专用
  56. tool_calls: Optional[List[ToolCall]] = Field(None, description="工具调用列表(assistant 角色)")
  57. # tool 角色专用
  58. tool_call_id: Optional[str] = Field(None, description="对应的工具调用 ID(tool 角色)")
  59. # ─────────────────────────────────────────────
  60. # 响应格式
  61. # ─────────────────────────────────────────────
  62. class ResponseFormat(BaseModel):
  63. type: Literal["text", "json_object", "json_schema"] = Field("text", description="响应格式类型")
  64. json_schema: Optional[Dict[str, Any]] = Field(None, description="JSON Schema 定义(type=json_schema 时使用)")
  65. # ─────────────────────────────────────────────
  66. # 流式选项
  67. # ─────────────────────────────────────────────
  68. class StreamOptions(BaseModel):
  69. include_usage: Optional[bool] = Field(False, description="流式结束时是否返回 usage 统计")
  70. # ─────────────────────────────────────────────
  71. # Chat Completions 请求
  72. # ─────────────────────────────────────────────
  73. class ChatCompletionsRequest(BaseModel):
  74. """
  75. 完整的 Chat Completions 请求体
  76. 兼容 OpenAI /v1/chat/completions 接口规范
  77. """
  78. # 必填
  79. model: str = Field(..., description="模型名称", examples=["gpt-4", "qwen-max"])
  80. messages: List[Message] = Field(..., min_length=1, description="消息列表", examples=[[
  81. {"role": "system", "content": "You are a helpful assistant."},
  82. {"role": "user", "content": "Hello!"}
  83. ]])
  84. # 采样参数
  85. temperature: Optional[float] = Field(None, ge=0, le=2, description="采样温度,0-2", examples=[0.7])
  86. top_p: Optional[float] = Field(None, ge=0, le=1, description="核采样概率", examples=[0.9])
  87. n: Optional[int] = Field(None, ge=1, le=128, description="生成候选数量", examples=[1])
  88. # 输出控制
  89. max_tokens: Optional[int] = Field(None, gt=0, description="最大输出 token 数(旧版)", examples=[2000])
  90. max_completion_tokens: Optional[int] = Field(None, gt=0, description="最大输出 token 数(新版)")
  91. stop: Optional[Union[str, List[str]]] = Field(None, description="停止词,字符串或最多 4 个字符串的列表")
  92. presence_penalty: Optional[float] = Field(None, ge=-2, le=2, description="存在惩罚,-2 到 2")
  93. frequency_penalty: Optional[float] = Field(None, ge=-2, le=2, description="频率惩罚,-2 到 2")
  94. logit_bias: Optional[Dict[str, float]] = Field(None, description="token 偏置字典")
  95. logprobs: Optional[bool] = Field(None, description="是否返回 log 概率")
  96. top_logprobs: Optional[int] = Field(None, ge=0, le=20, description="返回最高概率的 token 数量")
  97. # 流式
  98. stream: bool = Field(False, description="是否流式输出", examples=[False])
  99. stream_options: Optional[StreamOptions] = Field(None, description="流式选项")
  100. # 工具调用
  101. tools: Optional[List[Tool]] = Field(None, description="可用工具列表")
  102. tool_choice: Optional[ToolChoice] = Field(None, description="工具选择策略")
  103. parallel_tool_calls: Optional[bool] = Field(None, description="是否允许并行工具调用")
  104. # 结构化输出
  105. response_format: Optional[ResponseFormat] = Field(None, description="响应格式")
  106. # 可复现性
  107. seed: Optional[int] = Field(None, description="随机种子,用于可复现输出")
  108. # 用户标识(用于追踪/审计)
  109. user: Optional[str] = Field(None, description="终端用户唯一标识")
  110. # 其他
  111. service_tier: Optional[str] = Field(None, description="服务等级")
  112. store: Optional[bool] = Field(None, description="是否存储对话")
  113. metadata: Optional[Dict[str, str]] = Field(None, description="自定义元数据")
  114. # ─────────────────────────────────────────────
  115. # Chat Completions 响应
  116. # ─────────────────────────────────────────────
  117. class Usage(BaseModel):
  118. prompt_tokens: int
  119. completion_tokens: int
  120. total_tokens: int
  121. prompt_tokens_details: Optional[Dict[str, int]] = None
  122. completion_tokens_details: Optional[Dict[str, int]] = None
  123. class Choice(BaseModel):
  124. index: int
  125. message: Message
  126. finish_reason: Optional[str] = None
  127. logprobs: Optional[Any] = None
  128. class ChatCompletionsResponse(BaseModel):
  129. id: str
  130. object: str = "chat.completion"
  131. created: int
  132. model: str
  133. choices: List[Choice]
  134. usage: Optional[Usage] = None
  135. system_fingerprint: Optional[str] = None
  136. service_tier: Optional[str] = None
  137. # ─────────────────────────────────────────────
  138. # 流式响应
  139. # ─────────────────────────────────────────────
  140. class StreamDelta(BaseModel):
  141. role: Optional[str] = None
  142. content: Optional[str] = None
  143. tool_calls: Optional[List[Any]] = None
  144. class StreamChoice(BaseModel):
  145. index: int
  146. delta: StreamDelta
  147. finish_reason: Optional[str] = None
  148. logprobs: Optional[Any] = None
  149. class ChatCompletionsStreamChunk(BaseModel):
  150. id: str
  151. object: str = "chat.completion.chunk"
  152. created: int
  153. model: str
  154. choices: List[StreamChoice]
  155. usage: Optional[Usage] = None # 仅在 stream_options.include_usage=true 的最后一条出现
  156. system_fingerprint: Optional[str] = None
  157. # ─────────────────────────────────────────────
  158. # Models 列表
  159. # ─────────────────────────────────────────────
  160. class ModelInfo(BaseModel):
  161. id: str = Field(..., description="模型 ID")
  162. object: str = Field("model", description="对象类型")
  163. created: int = Field(..., description="创建时间戳")
  164. owned_by: str = Field(..., description="模型所有者")
  165. class ModelsListResponse(BaseModel):
  166. object: str = Field("list", description="对象类型")
  167. data: List[ModelInfo] = Field(..., description="模型列表")
  168. class EmbeddingsRequest(BaseModel):
  169. input: Union[str, List[str]] = Field(..., description="要嵌入的文本,可以是字符串或字符串数组")
  170. model: str = Field(..., description="模型ID,如 text-embedding-3-small")
  171. encoding_format: Optional[Literal["float", "base64"]] = Field("float", description="返回的格式")
  172. dimensions: Optional[int] = Field(None, description="向量维度")
  173. user: Optional[str] = Field(None, description="终端用户标识")
  174. class EmbeddingData(BaseModel):
  175. object: str = Field("embedding", description="对象类型")
  176. embedding: List[float] = Field(..., description="向量浮点数数组")
  177. index: int = Field(..., description="对应输入的索引")
  178. class EmbeddingsResponse(BaseModel):
  179. object: str = Field("list", description="对象类型")
  180. data: List[EmbeddingData] = Field(..., description="嵌入向量数据列表")
  181. model: str = Field(..., description="使用的模型")
  182. usage: Usage = Field(..., description="Token用量统计")
  183. class ImageGenerationRequest(BaseModel):
  184. """图像生成请求
  185. 兼容 /v1/images/generations 端点
  186. """
  187. prompt: str = Field(..., description="生成图像的文本提示词", max_length=4000)
  188. model: Optional[str] = Field("dall-e-3", description="使用的模型ID")
  189. n: Optional[int] = Field(1, description="生成的图像数量", ge=1, le=10)
  190. quality: Optional[Literal["standard", "hd"]] = Field("standard", description="图像质量")
  191. response_format: Optional[Literal["url", "b64_json"]] = Field("url", description="返回格式: url 或 base64")
  192. size: Optional[str] = Field("1024x1024", description="图像尺寸,如 256x256, 1024x1024")
  193. style: Optional[Literal["vivid", "natural"]] = Field("vivid", description="图像风格")
  194. user: Optional[str] = Field(None, description="终端用户的唯一标识符")
  195. class ImageData(BaseModel):
  196. """图像数据对象"""
  197. url: Optional[str] = Field(None, description="图像的网络URL")
  198. b64_json: Optional[str] = Field(None, description="图像的Base64编码字符串")
  199. revised_prompt: Optional[str] = Field(None, description="模型自动修改后的提示词")
  200. class ImageGenerationResponse(BaseModel):
  201. """图像生成响应
  202. 兼容 OpenAI 图像返回格式
  203. """
  204. created: int = Field(..., description="创建时间戳")
  205. data: List[ImageData] = Field(..., description="生成的图像数据列表")
  206. class AudioTranscriptionResponse(BaseModel):
  207. """语音转文字(STT)响应"""
  208. text: str = Field(..., description="识别或翻译出的文本内容")
  209. class AudioSpeechRequest(BaseModel):
  210. """文字转语音(TTS)请求"""
  211. model: str = Field(..., description="使用的模型ID,如 tts-1")
  212. input: str = Field(..., description="要转换为语音的文本", max_length=4096)
  213. voice: str = Field(..., description="发音人声音类型 (支持标准音色及扩展特征音色)")
  214. response_format: Optional[Literal["mp3", "opus", "aac", "flac", "wav", "pcm"]] = Field("mp3", description="音频格式")
  215. speed: Optional[float] = Field(1.0, ge=0.25, le=4.0, description="语速")
  216. class VideoGenerationRequest(BaseModel):
  217. """视频生成请求"""
  218. prompt: str = Field(..., description="生成视频的文本提示词")
  219. model: Optional[str] = Field("wan2.6-t2v", description="使用的模型ID")
  220. response_format: Optional[Literal["url", "b64_json"]] = Field("url", description="返回格式")
  221. size: Optional[str] = Field("1080P", description="视频分辨率,如 720P, 1080P")
  222. duration: Optional[int] = Field(5, description="视频时长(秒)")
  223. class VideoData(BaseModel):
  224. """视频数据对象"""
  225. url: Optional[str] = Field(None, description="视频的网络URL")
  226. content_type: Optional[str] = Field("video/mp4", description="视频MIME类型")
  227. class VideoGenerationResponse(BaseModel):
  228. """视频生成响应"""
  229. created: int = Field(..., description="创建时间戳")
  230. data: List[VideoData] = Field(..., description="生成的视频数据列表")
  231. class ImageEditsRequest(BaseModel):
  232. """图像编辑请求
  233. 兼容 /v1/images/edits 端点
  234. """
  235. image: str = Field(..., description="要编辑的原始图像 (base64 编码)")
  236. prompt: str = Field(..., description="对新图像的文本描述", max_length=4000)
  237. mask: Optional[str] = Field(None, description="可选的遮罩层图像 (base64 编码)")
  238. model: Optional[str] = Field("wan2.6-image", description="模型ID")
  239. n: Optional[int] = Field(1, description="生成数量", ge=1, le=10)
  240. size: Optional[str] = Field("1024x1024", description="图像尺寸")
  241. response_format: Optional[Literal["url", "b64_json"]] = Field("url", description="返回格式")
  242. user: Optional[str] = Field(None, description="终端用户标识")
  243. class AudioTranscriptionsRequest(BaseModel):
  244. """语音转文字请求
  245. 兼容 /v1/audio/transcriptions 端点
  246. """
  247. file: str = Field(..., description="要识别的音频文件 (base64 编码)")
  248. model: str = Field(..., description="模型名称")
  249. language: Optional[str] = Field(None, description="ISO-639-1 语言代码")
  250. response_format: Optional[str] = Field("json", description="返回格式")
  251. user: Optional[str] = Field(None, description="终端用户标识")
  252. class AudioTranslationsRequest(BaseModel):
  253. """语音翻译请求
  254. 兼容 /v1/audio/translations 端点
  255. """
  256. file: str = Field(..., description="要翻译的音频文件 (base64 编码)")
  257. model: str = Field(..., description="使用的语音识别模型")
  258. source_language: Optional[str] = Field(None, description="原语音语言代码")
  259. target_language: Optional[str] = Field("en", description="目标翻译语言代码")
  260. translation_model: Optional[str] = Field("qwen-max", description="执行翻译的文本大模型")
  261. prompt: Optional[str] = Field(None, description="可选的翻译提示词")
  262. user: Optional[str] = Field(None, description="终端用户标识")
  263. class VideoGenerationRequest(BaseModel):
  264. """视频生成请求"""
  265. prompt: str = Field(..., description="生成视频的文本提示词")
  266. model: Optional[str] = Field("wan2.6-t2v", description="使用的模型ID")
  267. response_format: Optional[Literal["url", "b64_json"]] = Field("url", description="返回格式")
  268. size: Optional[str] = Field("1280x720", description="视频尺寸,如 1280x720, 1920x1080, 720x1280")
  269. duration: Optional[int] = Field(5, description="视频时长(秒)")
  270. image: Optional[str] = Field(None, description="图生视频的参考图像 (base64 编码或URL)")
  271. class AudioSpeechResponse(BaseModel):
  272. """文字转语音响应
  273. 兼容 /v1/audio/speech 端点
  274. """
  275. audio: str = Field(..., description="生成的音频数据 (base64 编码)")
  276. content_type: str = Field(..., description="音频 MIME 类型")
  277. # ─────────────────────────────────────────────
  278. # Rerank 重排序
  279. # ─────────────────────────────────────────────
  280. class RerankRequest(BaseModel):
  281. """重排序请求
  282. 兼容 OpenAI 风格的 rerank 接口
  283. """
  284. model: str = Field(..., description="模型ID,如 bge-reranker-v2-m3")
  285. query: str = Field(..., description="查询文本")
  286. documents: List[str] = Field(..., description="待排序的文档列表", min_length=1)
  287. top_n: Optional[int] = Field(None, description="返回前N个结果,默认返回全部", ge=1)
  288. return_documents: Optional[bool] = Field(True, description="是否返回文档内容")
  289. user: Optional[str] = Field(None, description="终端用户标识")
  290. class RerankResult(BaseModel):
  291. """单个重排序结果"""
  292. index: int = Field(..., description="文档在原始列表中的索引")
  293. relevance_score: float = Field(..., description="相关性分数")
  294. document: Optional[str] = Field(None, description="文档内容(如果 return_documents=true)")
  295. class RerankResponse(BaseModel):
  296. """重排序响应"""
  297. object: str = Field("list", description="对象类型")
  298. data: List[RerankResult] = Field(..., description="排序后的结果列表")
  299. model: str = Field(..., description="使用的模型")
  300. usage: Usage = Field(..., description="Token用量统计")