""" OpenAI 兼容 API 数据传输对象定义 完整支持 OpenAI Chat Completions 和 Models 接口规范 """ from typing import Any, Dict, List, Literal, Optional, Union from pydantic import BaseModel, Field # ───────────────────────────────────────────── # 消息内容(支持多模态) # ───────────────────────────────────────────── class ImageUrl(BaseModel): url: str = Field(..., description="图片 URL 或 base64 data URI") detail: Optional[Literal["auto", "low", "high"]] = Field("auto", description="图片细节级别") class ContentPartText(BaseModel): type: Literal["text"] text: str class ContentPartImage(BaseModel): type: Literal["image_url"] image_url: ImageUrl class ContentPartAudio(BaseModel): type: Literal["input_audio"] input_audio: Dict[str, str] = Field(..., description="音频数据,包含 data 和 format 字段") ContentPart = Union[ContentPartText, ContentPartImage, ContentPartAudio] # ───────────────────────────────────────────── # Tool / Function Calling # ───────────────────────────────────────────── class FunctionDefinition(BaseModel): name: str = Field(..., description="函数名称") description: Optional[str] = Field(None, description="函数描述") parameters: Optional[Dict[str, Any]] = Field(None, description="JSON Schema 格式的参数定义") strict: Optional[bool] = Field(None, description="是否严格模式") class Tool(BaseModel): type: Literal["function"] = "function" function: FunctionDefinition class ToolChoiceFunction(BaseModel): name: str class ToolChoiceObject(BaseModel): type: Literal["function"] = "function" function: ToolChoiceFunction ToolChoice = Union[Literal["none", "auto", "required"], ToolChoiceObject] # ───────────────────────────────────────────── # 消息对象(支持多角色 + 多模态内容) # ───────────────────────────────────────────── class ToolCallFunction(BaseModel): name: str arguments: str # JSON 字符串 class ToolCall(BaseModel): id: str type: Literal["function"] = "function" function: ToolCallFunction class Message(BaseModel): role: Literal["system", "user", "assistant", "tool"] = Field(..., description="消息角色") # content 支持纯文本或多模态内容数组,assistant 回复 tool_calls 时可为 null content: Optional[Union[str, List[ContentPart]]] = Field(None, description="消息内容") name: Optional[str] = Field(None, description="消息发送者名称") # assistant 角色专用 tool_calls: Optional[List[ToolCall]] = Field(None, description="工具调用列表(assistant 角色)") # tool 角色专用 tool_call_id: Optional[str] = Field(None, description="对应的工具调用 ID(tool 角色)") # ───────────────────────────────────────────── # 响应格式 # ───────────────────────────────────────────── class ResponseFormat(BaseModel): type: Literal["text", "json_object", "json_schema"] = Field("text", description="响应格式类型") json_schema: Optional[Dict[str, Any]] = Field(None, description="JSON Schema 定义(type=json_schema 时使用)") # ───────────────────────────────────────────── # 流式选项 # ───────────────────────────────────────────── class StreamOptions(BaseModel): include_usage: Optional[bool] = Field(False, description="流式结束时是否返回 usage 统计") # ───────────────────────────────────────────── # Chat Completions 请求 # ───────────────────────────────────────────── class ChatCompletionsRequest(BaseModel): """ 完整的 Chat Completions 请求体 兼容 OpenAI /v1/chat/completions 接口规范 """ # 必填 model: str = Field(..., description="模型名称", examples=["gpt-4", "qwen-max"]) messages: List[Message] = Field(..., min_length=1, description="消息列表", examples=[[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello!"} ]]) # 采样参数 temperature: Optional[float] = Field(None, ge=0, le=2, description="采样温度,0-2", examples=[0.7]) top_p: Optional[float] = Field(None, ge=0, le=1, description="核采样概率", examples=[0.9]) n: Optional[int] = Field(None, ge=1, le=128, description="生成候选数量", examples=[1]) # 输出控制 max_tokens: Optional[int] = Field(None, gt=0, description="最大输出 token 数(旧版)", examples=[2000]) max_completion_tokens: Optional[int] = Field(None, gt=0, description="最大输出 token 数(新版)") stop: Optional[Union[str, List[str]]] = Field(None, description="停止词,字符串或最多 4 个字符串的列表") presence_penalty: Optional[float] = Field(None, ge=-2, le=2, description="存在惩罚,-2 到 2") frequency_penalty: Optional[float] = Field(None, ge=-2, le=2, description="频率惩罚,-2 到 2") logit_bias: Optional[Dict[str, float]] = Field(None, description="token 偏置字典") logprobs: Optional[bool] = Field(None, description="是否返回 log 概率") top_logprobs: Optional[int] = Field(None, ge=0, le=20, description="返回最高概率的 token 数量") # 流式 stream: bool = Field(False, description="是否流式输出", examples=[False]) stream_options: Optional[StreamOptions] = Field(None, description="流式选项") # 工具调用 tools: Optional[List[Tool]] = Field(None, description="可用工具列表") tool_choice: Optional[ToolChoice] = Field(None, description="工具选择策略") parallel_tool_calls: Optional[bool] = Field(None, description="是否允许并行工具调用") # 结构化输出 response_format: Optional[ResponseFormat] = Field(None, description="响应格式") # 可复现性 seed: Optional[int] = Field(None, description="随机种子,用于可复现输出") # 用户标识(用于追踪/审计) user: Optional[str] = Field(None, description="终端用户唯一标识") # 其他 service_tier: Optional[str] = Field(None, description="服务等级") store: Optional[bool] = Field(None, description="是否存储对话") metadata: Optional[Dict[str, str]] = Field(None, description="自定义元数据") # ───────────────────────────────────────────── # Chat Completions 响应 # ───────────────────────────────────────────── class Usage(BaseModel): prompt_tokens: int completion_tokens: int total_tokens: int prompt_tokens_details: Optional[Dict[str, int]] = None completion_tokens_details: Optional[Dict[str, int]] = None class Choice(BaseModel): index: int message: Message finish_reason: Optional[str] = None logprobs: Optional[Any] = None class ChatCompletionsResponse(BaseModel): id: str object: str = "chat.completion" created: int model: str choices: List[Choice] usage: Optional[Usage] = None system_fingerprint: Optional[str] = None service_tier: Optional[str] = None # ───────────────────────────────────────────── # 流式响应 # ───────────────────────────────────────────── class StreamDelta(BaseModel): role: Optional[str] = None content: Optional[str] = None tool_calls: Optional[List[Any]] = None class StreamChoice(BaseModel): index: int delta: StreamDelta finish_reason: Optional[str] = None logprobs: Optional[Any] = None class ChatCompletionsStreamChunk(BaseModel): id: str object: str = "chat.completion.chunk" created: int model: str choices: List[StreamChoice] usage: Optional[Usage] = None # 仅在 stream_options.include_usage=true 的最后一条出现 system_fingerprint: Optional[str] = None # ───────────────────────────────────────────── # Models 列表 # ───────────────────────────────────────────── class ModelInfo(BaseModel): id: str = Field(..., description="模型 ID") object: str = Field("model", description="对象类型") created: int = Field(..., description="创建时间戳") owned_by: str = Field(..., description="模型所有者") class ModelsListResponse(BaseModel): object: str = Field("list", description="对象类型") data: List[ModelInfo] = Field(..., description="模型列表") class EmbeddingsRequest(BaseModel): input: Union[str, List[str]] = Field(..., description="要嵌入的文本,可以是字符串或字符串数组") model: str = Field(..., description="模型ID,如 text-embedding-3-small") encoding_format: Optional[Literal["float", "base64"]] = Field("float", description="返回的格式") dimensions: Optional[int] = Field(None, description="向量维度") user: Optional[str] = Field(None, description="终端用户标识") class EmbeddingData(BaseModel): object: str = Field("embedding", description="对象类型") embedding: List[float] = Field(..., description="向量浮点数数组") index: int = Field(..., description="对应输入的索引") class EmbeddingsResponse(BaseModel): object: str = Field("list", description="对象类型") data: List[EmbeddingData] = Field(..., description="嵌入向量数据列表") model: str = Field(..., description="使用的模型") usage: Usage = Field(..., description="Token用量统计") class ImageGenerationRequest(BaseModel): """图像生成请求 兼容 /v1/images/generations 端点 """ prompt: str = Field(..., description="生成图像的文本提示词", max_length=4000) model: Optional[str] = Field("dall-e-3", description="使用的模型ID") n: Optional[int] = Field(1, description="生成的图像数量", ge=1, le=10) quality: Optional[Literal["standard", "hd"]] = Field("standard", description="图像质量") response_format: Optional[Literal["url", "b64_json"]] = Field("url", description="返回格式: url 或 base64") size: Optional[str] = Field("1024x1024", description="图像尺寸,如 256x256, 1024x1024") style: Optional[Literal["vivid", "natural"]] = Field("vivid", description="图像风格") user: Optional[str] = Field(None, description="终端用户的唯一标识符") class ImageData(BaseModel): """图像数据对象""" url: Optional[str] = Field(None, description="图像的网络URL") b64_json: Optional[str] = Field(None, description="图像的Base64编码字符串") revised_prompt: Optional[str] = Field(None, description="模型自动修改后的提示词") class ImageGenerationResponse(BaseModel): """图像生成响应 兼容 OpenAI 图像返回格式 """ created: int = Field(..., description="创建时间戳") data: List[ImageData] = Field(..., description="生成的图像数据列表") class AudioTranscriptionResponse(BaseModel): """语音转文字(STT)响应""" text: str = Field(..., description="识别或翻译出的文本内容") class AudioSpeechRequest(BaseModel): """文字转语音(TTS)请求""" model: str = Field(..., description="使用的模型ID,如 tts-1") input: str = Field(..., description="要转换为语音的文本", max_length=4096) voice: str = Field(..., description="发音人声音类型 (支持标准音色及扩展特征音色)") response_format: Optional[Literal["mp3", "opus", "aac", "flac", "wav", "pcm"]] = Field("mp3", description="音频格式") speed: Optional[float] = Field(1.0, ge=0.25, le=4.0, description="语速") class VideoGenerationRequest(BaseModel): """视频生成请求""" prompt: str = Field(..., description="生成视频的文本提示词") model: Optional[str] = Field("wan2.6-t2v", description="使用的模型ID") response_format: Optional[Literal["url", "b64_json"]] = Field("url", description="返回格式") size: Optional[str] = Field("1080P", description="视频分辨率,如 720P, 1080P") duration: Optional[int] = Field(5, description="视频时长(秒)") class VideoData(BaseModel): """视频数据对象""" url: Optional[str] = Field(None, description="视频的网络URL") content_type: Optional[str] = Field("video/mp4", description="视频MIME类型") class VideoGenerationResponse(BaseModel): """视频生成响应""" created: int = Field(..., description="创建时间戳") data: List[VideoData] = Field(..., description="生成的视频数据列表") class ImageEditsRequest(BaseModel): """图像编辑请求 兼容 /v1/images/edits 端点 """ image: str = Field(..., description="要编辑的原始图像 (base64 编码)") prompt: str = Field(..., description="对新图像的文本描述", max_length=4000) mask: Optional[str] = Field(None, description="可选的遮罩层图像 (base64 编码)") model: Optional[str] = Field("wan2.6-image", description="模型ID") n: Optional[int] = Field(1, description="生成数量", ge=1, le=10) size: Optional[str] = Field("1024x1024", description="图像尺寸") response_format: Optional[Literal["url", "b64_json"]] = Field("url", description="返回格式") user: Optional[str] = Field(None, description="终端用户标识") class AudioTranscriptionsRequest(BaseModel): """语音转文字请求 兼容 /v1/audio/transcriptions 端点 """ file: str = Field(..., description="要识别的音频文件 (base64 编码)") model: str = Field(..., description="模型名称") language: Optional[str] = Field(None, description="ISO-639-1 语言代码") response_format: Optional[str] = Field("json", description="返回格式") user: Optional[str] = Field(None, description="终端用户标识") class AudioTranslationsRequest(BaseModel): """语音翻译请求 兼容 /v1/audio/translations 端点 """ file: str = Field(..., description="要翻译的音频文件 (base64 编码)") model: str = Field(..., description="使用的语音识别模型") source_language: Optional[str] = Field(None, description="原语音语言代码") target_language: Optional[str] = Field("en", description="目标翻译语言代码") translation_model: Optional[str] = Field("qwen-max", description="执行翻译的文本大模型") prompt: Optional[str] = Field(None, description="可选的翻译提示词") user: Optional[str] = Field(None, description="终端用户标识") class VideoGenerationRequest(BaseModel): """视频生成请求""" prompt: str = Field(..., description="生成视频的文本提示词") model: Optional[str] = Field("wan2.6-t2v", description="使用的模型ID") response_format: Optional[Literal["url", "b64_json"]] = Field("url", description="返回格式") size: Optional[str] = Field("1280x720", description="视频尺寸,如 1280x720, 1920x1080, 720x1280") duration: Optional[int] = Field(5, description="视频时长(秒)") image: Optional[str] = Field(None, description="图生视频的参考图像 (base64 编码或URL)") class AudioSpeechResponse(BaseModel): """文字转语音响应 兼容 /v1/audio/speech 端点 """ audio: str = Field(..., description="生成的音频数据 (base64 编码)") content_type: str = Field(..., description="音频 MIME 类型") # ───────────────────────────────────────────── # Rerank 重排序 # ───────────────────────────────────────────── class RerankRequest(BaseModel): """重排序请求 兼容 OpenAI 风格的 rerank 接口 """ model: str = Field(..., description="模型ID,如 bge-reranker-v2-m3") query: str = Field(..., description="查询文本") documents: List[str] = Field(..., description="待排序的文档列表", min_length=1) top_n: Optional[int] = Field(None, description="返回前N个结果,默认返回全部", ge=1) return_documents: Optional[bool] = Field(True, description="是否返回文档内容") user: Optional[str] = Field(None, description="终端用户标识") class RerankResult(BaseModel): """单个重排序结果""" index: int = Field(..., description="文档在原始列表中的索引") relevance_score: float = Field(..., description="相关性分数") document: Optional[str] = Field(None, description="文档内容(如果 return_documents=true)") class RerankResponse(BaseModel): """重排序响应""" object: str = Field("list", description="对象类型") data: List[RerankResult] = Field(..., description="排序后的结果列表") model: str = Field(..., description="使用的模型") usage: Usage = Field(..., description="Token用量统计")