{ "version": 14, "models": [ { "url": "https://bailian.console.aliyun.com/cn-beijing?spm=5176.29619931.J_XNqYbJaEnpB5_cCJf7e6D.1.770b10d7fVCktz&tab=model#/model-market/detail/cosyvoice-clone-v1", "model_name": "cosyvoice-clone-v1", "prices": { "语音合成": { "raw": "2.0", "unit": "元/每万字符", "price": 2.0, "currency": "CNY" } }, "model_info": { "features": { "cache存储": false, "前缀续写": false, "批量推理": false, "模型体验": false, "模型调优": false, "联网搜索": false, "结构化输出": false, "function calling": false }, "model_code": "cosyvoice-clone-v1", "description": "声音复刻Cosyvoice大模型,依托先进的大模型技术进行特征提取,从而完成声音的复刻,且无需训练过程。仅需提供时长较短的音频,即可迅速生成高度相似且听感自然的定制声音。", "display_tags": [ "语音合成" ], "input_modalities": [ "Text" ], "output_modalities": [ "Audio" ] }, "rate_limits": {}, "tool_prices": [], "scraped_at": "2026-04-07T04:02:34.714572Z" }, { "url": "https://bailian.console.aliyun.com/cn-beijing?spm=5176.29619931.J_XNqYbJaEnpB5_cCJf7e6D.1.770b10d7fVCktz&tab=model#/model-market/detail/cosyvoice-v3-flash", "model_name": "cosyvoice-v3-flash", "prices": { "语音合成": { "raw": "1", "unit": "元/每万字符", "price": 1.0, "currency": "CNY" } }, "model_info": { "features": { "cache存储": false, "前缀续写": false, "批量推理": false, "模型体验": false, "模型调优": false, "联网搜索": false, "结构化输出": false, "function calling": false }, "model_code": "cosyvoice-v3-flash", "description": "合成能力:CosyVoice-v3-Flash是通义实验室CosyVoice系列最新版高性能的语音合成大模型,较之前版本在自然度、音质、韵律、情感表现力上有更好的表现。该模型支持文本至语音的实时流式合成。克隆能力:CosyVoice-v3-Flash也是通义实验室CosyVoice系列最新版的语音克隆大模型,较之前版本提升了发音准确性、音色相似度,并且增加了更多小语种支持(德、西、法、意、俄)。仅需提供5-20s的参考音频,即可迅速生成高度相似且听感自然的定制声音。", "display_tags": [ "语音合成" ], "input_modalities": [ "Text" ], "output_modalities": [ "Audio" ] }, "rate_limits": { "RPM": "180", "上下文长度": null, "最大输入长度": null, "最大输出长度": null }, "tool_prices": [], "scraped_at": "2026-04-07T04:03:10.816050Z" }, { "url": "https://bailian.console.aliyun.com/cn-beijing?spm=5176.29619931.J_XNqYbJaEnpB5_cCJf7e6D.1.770b10d7fVCktz&tab=model#/model-market/detail/cosyvoice-v3-plus", "model_name": "cosyvoice-v3-plus", "prices": { "语音合成": { "raw": "2.0", "unit": "元/每万字符", "price": 2.0, "currency": "CNY" } }, "model_info": { "features": { "cache存储": false, "前缀续写": false, "批量推理": false, "模型体验": false, "模型调优": false, "联网搜索": false, "结构化输出": false, "function calling": false }, "model_code": "cosyvoice-v3-plus", "description": "克隆能力:CosyVoice-v3-plus是通义实验室CosyVoice系列最新版的语音克隆大模型,具有更好的音质和复刻相似度,适用于更专业的场景。仅需提供5-20s的参考音频,即可迅速生成高度相似且听感自然的定制声音。合成能力:CosyVoice-v3-plus是通义实验室CosyVoice系列最新版的语音合成大模型,具有更好的音质和表现力,适用于更专业的场景。该模型支持文本至语音的实时流式合成。", "display_tags": [ "语音合成" ], "input_modalities": [ "Text" ], "output_modalities": [ "Audio" ] }, "rate_limits": { "RPM": "180", "上下文长度": null, "最大输入长度": null, "最大输出长度": null }, "tool_prices": [], "scraped_at": "2026-04-07T04:02:54.837174Z" }, { "url": "https://bailian.console.aliyun.com/cn-beijing?spm=5176.29619931.J_XNqYbJaEnpB5_cCJf7e6D.1.770b10d7fVCktz&tab=model#/model-market/detail/fun-asr-realtime", "model_name": "fun-asr-realtime", "prices": { "语音识别": { "raw": "0.00033", "unit": "元/每秒", "price": 0.00033, "currency": "CNY" } }, "model_info": { "features": { "cache存储": false, "前缀续写": false, "批量推理": false, "模型体验": false, "模型调优": false, "联网搜索": false, "结构化输出": false, "function calling": false }, "model_code": "fun-asr-realtime", "description": "通义实验室新一代端到端语音识别大模型的实时版,基于领先的自研语音技术,具备卓越的上下文感知和高精度语音转写能力。基于端到端架构,Fun-ASR 集成了创新的 RAG 技术,支持大规模热词自定义、敏感/语气词自动过滤、ITN 规范化、标点预测等多维功能,显著提升了整体识别准确率和语境贴合度。同时,Fun-ASR 支持中英文自由切换,多地区方言覆盖,具备更强的噪声鲁棒性,适应多样复杂环境。", "display_tags": [ "实时语音识别" ], "input_modalities": [ "Audio" ], "output_modalities": [ "Text" ] }, "rate_limits": { "RPM": "1200", "上下文长度": null, "最大输入长度": null, "最大输出长度": null }, "tool_prices": [], "scraped_at": "2026-04-07T04:03:28.288659Z" }, { "url": "https://bailian.console.aliyun.com/cn-beijing?spm=5176.29619931.J_XNqYbJaEnpB5_cCJf7e6D.1.770b10d7fVCktz&tab=model#/model-market/detail/paraformer-v1", "model_name": "paraformer-v1", "prices": { "语音识别": { "raw": "0.00008", "unit": "元/每秒", "price": 0.00008, "currency": "CNY" } }, "model_info": { "features": { "cache存储": false, "前缀续写": false, "批量推理": false, "模型体验": true, "模型调优": false, "联网搜索": false, "结构化输出": false, "function calling": false }, "model_code": "paraformer-v1", "description": "Paraformer中英文语音识别模型,支持16kHz及以上采样率的音频或视频语音识别。", "display_tags": [ "语音识别" ], "input_modalities": [ "Audio" ], "output_modalities": [ "Text" ] }, "rate_limits": {}, "tool_prices": [], "scraped_at": "2026-04-07T04:04:07.572048Z" }, { "url": "https://bailian.console.aliyun.com/cn-beijing?spm=5176.29619931.J_XNqYbJaEnpB5_cCJf7e6D.1.770b10d7fVCktz&tab=model#/model-market/detail/paraformer-v2", "model_name": "paraformer-v2", "prices": { "语音识别": { "raw": "0.00008", "unit": "元/每秒", "price": 0.00008, "currency": "CNY" } }, "model_info": { "features": { "cache存储": false, "前缀续写": false, "批量推理": false, "模型体验": true, "模型调优": false, "联网搜索": false, "结构化输出": false, "function calling": false }, "model_code": "paraformer-v2", "description": "推荐使用 Paraformer最新语音识别模型,支持多个语种的语音识别。可以通过language_hints参数选择语种获得更准确的识别效果,支持任意采样率。 支持的语言包括:中文(含粤语等各种方言)、英文、日语、韩语。可支持热词。", "display_tags": [ "语音识别" ], "input_modalities": [ "Audio" ], "output_modalities": [ "Text" ] }, "rate_limits": {}, "tool_prices": [], "scraped_at": "2026-04-07T04:03:48.308940Z" }, { "url": "https://bailian.console.aliyun.com/cn-beijing?spm=5176.29619931.J_XNqYbJaEnpB5_cCJf7e6D.1.770b10d7fVCktz&tab=model#/model-market/detail/qwen2.5-vl-embedding", "model_name": "qwen2.5-vl-embedding", "prices": { "图片输入": { "raw": "1.8", "unit": "元/每百万tokens", "price": 1.8, "currency": "CNY" }, "文本输入": { "raw": "0.7", "unit": "元/每百万tokens", "price": 0.7, "currency": "CNY" } }, "model_info": { "features": { "cache存储": false, "前缀续写": false, "批量推理": false, "模型体验": false, "模型调优": false, "联网搜索": false, "结构化输出": false, "function calling": false }, "model_code": "qwen2.5-vl-embedding", "description": "基于Qwen2.5-VL底座训练的统一多模态向量模型,支持文本、图片、视频单模态/混合模态输入,输出统一表征向量,适用于跨模态检索、图搜、视频检索、图像聚类、复杂多模态信息检索、打标等场景", "display_tags": [ "多模态向量" ], "input_modalities": [ "Text", "Image" ], "output_modalities": [] }, "rate_limits": { "RPM": "1200", "TPM": "600000", "上下文长度": null, "最大输入长度": null, "最大输出长度": null }, "tool_prices": [], "scraped_at": "2026-04-07T03:58:58.395231Z" }, { "url": "https://bailian.console.aliyun.com/cn-beijing?spm=5176.29619931.J_XNqYbJaEnpB5_cCJf7e6D.1.770b10d7fVCktz&tab=model#/model-market/detail/qwen3-asr-flash-realtime", "model_name": "qwen3-asr-flash-realtime", "prices": { "语音识别": { "raw": "0.00033", "unit": "元/每秒", "price": 0.00033, "currency": "CNY" } }, "model_info": { "features": { "cache存储": false, "前缀续写": false, "批量推理": false, "模型体验": false, "模型调优": false, "联网搜索": false, "结构化输出": false, "function calling": false }, "model_code": "qwen3-asr-flash-realtime", "description": "Qwen3-ASR-Flash的实时版,一款基于大语言模型的高精度、高智能、高鲁棒性的多语种语音识别模型。依托强大的基座模型、海量的文本与多模态数据、千万小时音频数据,Qwen3-ASR-Flash实现了高精度的语音识别功能,能够自动判断语种并准确识别 11 个语种的语音,在复杂的音频环境下能够保证精确转录。", "display_tags": [ "Qwen3", "实时语音识别" ], "input_modalities": [ "Audio" ], "output_modalities": [ "Text" ] }, "rate_limits": { "RPM": "1200", "上下文长度": null, "最大输入长度": null, "最大输出长度": null }, "tool_prices": [], "scraped_at": "2026-04-07T04:04:27.248211Z" }, { "url": "https://bailian.console.aliyun.com/cn-beijing?spm=5176.29619931.J_XNqYbJaEnpB5_cCJf7e6D.1.770b10d7fVCktz&tab=model#/model-market/detail/qwen3-coder-flash", "model_name": "qwen3-coder-flash", "prices": { "256k