model_handler.py 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. AI模型处理器
  5. 用于管理生成、与嵌入模型的创建和配置
  6. 支持的模型类型:
  7. - doubao: 豆包模型
  8. - qwen: 通义千问模型
  9. - deepseek: DeepSeek模型
  10. - lq_qwen3_8b: 本地Qwen3-8B模型
  11. - lq_qwen3_8b_lq_lora: 本地Qwen3-8B-lq-lora模型
  12. - lq_qwen3_4b: 本地Qwen3-4B模型
  13. - qwen_local_14b: 本地Qwen3-14B模型
  14. - shutian_qwen3_embed: 蜀天Qwen3-Embedding-8B嵌入模型(默认)
  15. - siliconflow_embed: 硅基流动Qwen3-Embedding-8B嵌入模型
  16. - lq_bge_reranker_v2_m3: 本地BGE-reranker-v2-m3重排序模型
  17. - qwen3_5_35b_a3b: DashScope Qwen3.5-35B-A3B模型(默认兜底模型)
  18. - qwen3_5_27b: DashScope Qwen3.5-27B模型
  19. - qwen3_5_122b_a10b: DashScope Qwen3.5-122B-A10B模型
  20. - shutian_qwen3_5_122b: 蜀天Qwen3.5-122B-A10B模型(183.220.37.46:25423)
  21. - shutian_qwen3_8b: 蜀天Qwen3-8B模型(183.220.37.46:25424)
  22. - shutian_qwen3_5_35b: 蜀天Qwen3.5-35B模型(183.220.37.46:25427)
  23. - shutian_qwen3_6_27b: 蜀天Qwen3.6-27B模型(183.220.37.46:25424)
  24. - shutian_qwen3_embed: 蜀天Qwen3-Embedding-8B模型(183.220.37.46:25425)
  25. - shutian_qwen3_reranker: 蜀天Qwen3-Reranker-8B模型(183.220.37.46:25426)
  26. """
  27. # 禁用 transformers 的深度学习框架检测,避免启动时耗时扫描
  28. import os
  29. os.environ["TRANSFORMERS_VERBOSITY"] = "error"
  30. os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
  31. import requests
  32. from langchain_openai import ChatOpenAI, OpenAIEmbeddings
  33. from foundation.infrastructure.config.config import config_handler
  34. from foundation.observability.logger.loggering import review_logger as logger
  35. class ModelConnectionError(Exception):
  36. """模型连接错误"""
  37. pass
  38. class ModelConfigError(Exception):
  39. """模型配置错误"""
  40. pass
  41. class ModelAPIError(Exception):
  42. """模型API调用错误"""
  43. pass
  44. class ModelHandler:
  45. """
  46. AI模型处理器类,用于管理多种AI模型的创建和配置
  47. """
  48. # 模型连接超时时间配置(秒)
  49. CONNECTION_TIMEOUT = 30
  50. REQUEST_TIMEOUT = 120
  51. MAX_RETRIES = 2
  52. def __init__(self):
  53. """
  54. 初始化模型处理器
  55. 加载配置处理器,用于后续读取各种模型的配置信息
  56. """
  57. self.config = config_handler
  58. self._model_cache = {} # 模型实例缓存
  59. def _check_connection(self, base_url: str, api_key: str = None, timeout: int = 5) -> bool:
  60. """
  61. 检查模型服务连接是否可用
  62. 支持两种检查方式:
  63. 1. GET /models - 标准 OpenAI 兼容接口
  64. 2. POST /chat/completions - 直接测试 chat 接口(部分服务只支持此接口)
  65. Args:
  66. base_url: 模型服务地址
  67. api_key: API密钥(可选)
  68. timeout: 超时时间(秒)
  69. Returns:
  70. bool: 连接是否可用
  71. """
  72. headers = {}
  73. if api_key and api_key != "dummy":
  74. headers["Authorization"] = f"Bearer {api_key}"
  75. # 方法1: 尝试 /models 端点
  76. try:
  77. health_url = f"{base_url.rstrip('/')}/models"
  78. response = requests.get(
  79. health_url,
  80. headers=headers,
  81. timeout=timeout
  82. )
  83. if 200 <= response.status_code < 300:
  84. logger.debug(f"连接检查通过 (/models): {base_url}")
  85. return True
  86. except requests.exceptions.Timeout:
  87. logger.debug(f"连接检查超时 (/models): {base_url}")
  88. except Exception as e:
  89. logger.debug(f"连接检查失败 (/models): {base_url}, {e}")
  90. # 方法2: 尝试 /chat/completions 端点(发送一个简单请求)
  91. try:
  92. chat_url = f"{base_url.rstrip('/')}/chat/completions"
  93. test_payload = {
  94. "model": "test",
  95. "messages": [{"role": "user", "content": "test"}],
  96. "max_tokens": 1
  97. }
  98. response = requests.post(
  99. chat_url,
  100. headers={**headers, "Content-Type": "application/json"},
  101. json=test_payload,
  102. timeout=timeout
  103. )
  104. # 即使返回 400/401/404 也说明服务是通的
  105. # 只有连接错误/超时才是真的连不上
  106. if response.status_code != 503: # 503 表示服务不可用
  107. logger.debug(f"连接检查通过 (/chat/completions): {base_url}, 状态码: {response.status_code}")
  108. return True
  109. except requests.exceptions.Timeout:
  110. logger.warning(f"连接检查超时: {base_url}")
  111. except requests.exceptions.ConnectionError as e:
  112. logger.warning(f"连接检查错误: {base_url}, {e}")
  113. except Exception as e:
  114. logger.warning(f"连接检查异常: {base_url}, {e}")
  115. return False
  116. def _handle_model_error(self, model_name: str, error: Exception, fallback_model=None):
  117. """
  118. 统一处理模型错误
  119. Args:
  120. model_name: 模型名称
  121. error: 异常对象
  122. fallback_model: 降级模型实例(可选)
  123. Returns:
  124. 降级模型实例,如果不可用则返回None
  125. """
  126. error_type = type(error).__name__
  127. error_msg = str(error)
  128. logger.error(f"模型初始化失败 [{model_name}]: {error_type} - {error_msg}")
  129. # 如果提供了降级模型,记录日志并返回
  130. if fallback_model:
  131. logger.warning(f"使用降级模型: {fallback_model.__class__.__name__}")
  132. return fallback_model
  133. # 如果没有降级模型,返回None让调用方处理
  134. return None
  135. def get_models(self):
  136. """
  137. 获取AI模型实例
  138. Returns:
  139. ChatOpenAI: 配置好的AI模型实例
  140. Note:
  141. 优先从 model_setting.yaml 读取默认模型配置,如果不存在则回退到 config.ini 的 MODEL_TYPE
  142. 支持的模型类型:doubao, qwen, deepseek, lq_qwen3_8b, lq_qwen3_8b_lora, lq_qwen3_4b, qwen_local_14b
  143. """
  144. # 优先从 model_setting.yaml 读取默认模型配置
  145. try:
  146. from foundation.ai.models.model_config_loader import get_model_for_function
  147. model_type = get_model_for_function("default")
  148. if model_type:
  149. logger.debug(f"从 model_setting.yaml 读取默认模型: {model_type}")
  150. else:
  151. model_type = self.config.get("model", "MODEL_TYPE")
  152. except Exception as e:
  153. logger.debug(f"从 model_setting.yaml 读取默认模型失败: {e},回退到 config.ini")
  154. model_type = self.config.get("model", "MODEL_TYPE")
  155. logger.info(f"正在初始化AI模型,模型类型: {model_type}")
  156. # 检查缓存
  157. cache_key = f"chat_{model_type}"
  158. if cache_key in self._model_cache:
  159. logger.info(f"使用缓存的模型: {model_type}")
  160. return self._model_cache[cache_key]
  161. model = None
  162. try:
  163. if model_type == "doubao":
  164. model = self._get_doubao_model()
  165. elif model_type == "qwen":
  166. model = self._get_qwen_model()
  167. elif model_type == "deepseek":
  168. model = self._get_deepseek_model()
  169. elif model_type == "lq_qwen3_8b":
  170. model = self._get_lq_qwen3_8b_model()
  171. elif model_type == "lq_qwen3_8b_lq_lora":
  172. model = self._get_lq_qwen3_8b_lora_model()
  173. elif model_type == "lq_qwen3_4b":
  174. model = self._get_lq_qwen3_4b_model()
  175. elif model_type == "qwen_local_14b":
  176. model = self._get_qwen_local_14b_model()
  177. elif model_type == "qwen3_5_35b_a3b":
  178. model = self._get_qwen3_5_35b_a3b_model()
  179. elif model_type == "qwen3_5_27b":
  180. model = self._get_qwen3_5_27b_model()
  181. elif model_type == "qwen3_5_122b_a10b":
  182. model = self._get_qwen3_5_122b_a10b_model()
  183. elif model_type == "shutian_qwen3_5_122b":
  184. model = self._get_shutian_qwen3_5_122b_model()
  185. elif model_type == "shutian_qwen3_8b":
  186. model = self._get_shutian_qwen3_8b_model()
  187. elif model_type == "shutian_qwen3_5_35b":
  188. model = self._get_shutian_qwen3_5_35b_model()
  189. elif model_type == "shutian_qwen3_6_27b":
  190. model = self._get_shutian_qwen3_6_27b_model()
  191. else:
  192. logger.warning(f"未知的模型类型 '{model_type}',使用默认 qwen3_5_35b_a3b 模型")
  193. model = self._get_qwen3_5_35b_a3b_model()
  194. if model:
  195. self._model_cache[cache_key] = model
  196. logger.info(f"AI模型初始化完成: {model_type}")
  197. return model
  198. else:
  199. raise ModelAPIError(f"模型初始化返回None: {model_type}")
  200. except Exception as e:
  201. logger.error(f"获取模型失败 [{model_type}]: {e}")
  202. # 使用 qwen3_5_35b_a3b 作为兜底降级方案
  203. if model_type != "qwen3_5_35b_a3b":
  204. logger.info("尝试使用 qwen3_5_35b_a3b 模型作为降级方案")
  205. try:
  206. fallback_model = self._get_qwen3_5_35b_a3b_model()
  207. if fallback_model:
  208. self._model_cache[cache_key] = fallback_model
  209. logger.warning("已切换到 qwen3_5_35b_a3b 降级模型")
  210. return fallback_model
  211. except Exception as fallback_error:
  212. logger.error(f"降级模型也失败: {fallback_error}")
  213. # 如果所有模型都失败,抛出异常
  214. raise ModelConnectionError(f"无法初始化任何模型服务: {e}")
  215. def get_model_by_name(self, model_type: str = None):
  216. """
  217. 根据模型名称动态获取指定的AI模型实例
  218. Args:
  219. model_type: 模型类型名称,如果为None则使用配置文件中的默认模型
  220. 支持的模型类型:doubao, qwen, deepseek, gemini,
  221. lq_qwen3_8b, lq_qwen3_8b_lq_lora,
  222. lq_qwen3_4b, qwen_local_14b
  223. Returns:
  224. ChatOpenAI: 配置好的AI模型实例
  225. Note:
  226. 该方法支持动态切换模型,不受配置文件中的默认MODEL_TYPE限制
  227. 如果model_type为None,则使用配置文件中的默认模型
  228. 如果model_type无效,则使用gemini作为降级模型
  229. """
  230. # 如果未指定模型类型,使用配置文件中的默认模型
  231. if model_type is None:
  232. model_type = self.config.get("model", "MODEL_TYPE")
  233. logger.info(f"动态获取AI模型,模型类型: {model_type}")
  234. # 检查缓存
  235. cache_key = f"chat_{model_type}"
  236. if cache_key in self._model_cache:
  237. logger.info(f"使用缓存的模型: {model_type}")
  238. return self._model_cache[cache_key]
  239. model = None
  240. try:
  241. if model_type == "doubao":
  242. model = self._get_doubao_model()
  243. elif model_type == "qwen":
  244. model = self._get_qwen_model()
  245. elif model_type == "deepseek":
  246. model = self._get_deepseek_model()
  247. elif model_type == "lq_qwen3_8b":
  248. model = self._get_lq_qwen3_8b_model()
  249. elif model_type == "lq_qwen3_8b_lq_lora":
  250. model = self._get_lq_qwen3_8b_lora_model()
  251. elif model_type == "lq_qwen3_4b":
  252. model = self._get_lq_qwen3_4b_model()
  253. elif model_type == "qwen_local_14b":
  254. model = self._get_qwen_local_14b_model()
  255. elif model_type == "qwen3_5_35b_a3b":
  256. model = self._get_qwen3_5_35b_a3b_model()
  257. elif model_type == "qwen3_5_27b":
  258. model = self._get_qwen3_5_27b_model()
  259. elif model_type == "qwen3_5_122b_a10b":
  260. model = self._get_qwen3_5_122b_a10b_model()
  261. elif model_type == "shutian_qwen3_5_122b":
  262. model = self._get_shutian_qwen3_5_122b_model()
  263. elif model_type == "shutian_qwen3_8b":
  264. model = self._get_shutian_qwen3_8b_model()
  265. elif model_type == "shutian_qwen3_5_35b":
  266. model = self._get_shutian_qwen3_5_35b_model()
  267. elif model_type == "shutian_qwen3_6_27b":
  268. model = self._get_shutian_qwen3_6_27b_model()
  269. else:
  270. logger.warning(f"未知的模型类型 '{model_type}',使用默认 qwen3_5_35b_a3b 模型")
  271. model = self._get_qwen3_5_35b_a3b_model()
  272. if model:
  273. self._model_cache[cache_key] = model
  274. logger.info(f"AI模型动态初始化完成: {model_type}")
  275. return model
  276. else:
  277. raise ModelAPIError(f"模型初始化返回None: {model_type}")
  278. except Exception as e:
  279. logger.error(f"动态获取模型失败 [{model_type}]: {e}")
  280. # 使用 qwen3_5_35b_a3b 作为兜底降级方案
  281. if model_type != "qwen3_5_35b_a3b":
  282. logger.info("尝试使用 qwen3_5_35b_a3b 模型作为降级方案")
  283. try:
  284. fallback_model = self._get_qwen3_5_35b_a3b_model()
  285. if fallback_model:
  286. # 注意:不要把降级模型存入原模型的缓存,避免后续调用都使用错误的模型
  287. fallback_cache_key = "chat_qwen3_5_35b_a3b"
  288. self._model_cache[fallback_cache_key] = fallback_model
  289. logger.warning(f"已切换到 qwen3_5_35b_a3b 降级模型(不会缓存为 {model_type})")
  290. return fallback_model
  291. except Exception as fallback_error:
  292. logger.error(f"降级模型也失败: {fallback_error}")
  293. # 如果所有模型都失败,抛出异常
  294. raise ModelConnectionError(f"无法初始化任何模型服务: {e}")
  295. def get_model_by_function(self, function_name: str):
  296. """
  297. 根据功能名称获取对应的AI模型实例
  298. 从 config/model_setting.yaml 加载功能对应的模型配置
  299. Args:
  300. function_name: 功能名称,如:
  301. - doc_classification_secondary: 文档二级分类
  302. - doc_classification_tertiary: 文档三级分类
  303. - completeness_review_generate: 完整性审查生成
  304. - completeness_review_classify: 完整性审查分类
  305. - rag_query_understand: RAG查询理解
  306. - rag_answer_generate: RAG答案生成
  307. - sensitive_check: 敏感信息检查
  308. - grammar_check: 语法检查
  309. Returns:
  310. ChatOpenAI: 配置好的AI模型实例
  311. Example:
  312. model = model_handler.get_model_by_function("doc_classification_tertiary")
  313. """
  314. try:
  315. from foundation.ai.models.model_config_loader import get_model_for_function
  316. model_type = get_model_for_function(function_name)
  317. logger.info(f"根据功能 '{function_name}' 获取模型: {model_type}")
  318. return self.get_model_by_name(model_type)
  319. except Exception as e:
  320. logger.warning(f"根据功能获取模型失败 [{function_name}]: {e},尝试使用默认模型")
  321. try:
  322. default_model = get_model_for_function("default")
  323. return self.get_model_by_name(default_model)
  324. except Exception:
  325. return self.get_model_by_name("qwen3_5_35b_a3b")
  326. def get_embedding_model(self):
  327. """
  328. 获取Embedding模型实例
  329. Returns:
  330. OpenAIEmbeddings: 配置好的Embedding模型实例
  331. Note:
  332. 根据配置文件中的EMBEDDING_MODEL_TYPE参数选择对应模型
  333. 支持的模型类型:shutian_qwen3_embed, siliconflow_embed
  334. 默认返回蜀天 shutian_qwen3_embed 模型
  335. """
  336. # 优先从 model_setting.yaml 读取embedding配置
  337. embedding_model_type = None
  338. try:
  339. from .model_config_loader import model_config_loader
  340. settings = model_config_loader._config.get("model_settings", {})
  341. embedding_config = settings.get("embedding", {})
  342. if embedding_config and "model" in embedding_config:
  343. embedding_model_type = embedding_config["model"]
  344. logger.debug(f"从 model_setting.yaml 读取embedding模型: {embedding_model_type}")
  345. except Exception as e:
  346. logger.debug(f"从 model_setting.yaml 读取embedding配置失败: {e}")
  347. # 回退到 config.ini
  348. if not embedding_model_type:
  349. embedding_model_type = self.config.get("model", "EMBEDDING_MODEL_TYPE", "shutian_qwen3_embed")
  350. logger.info(f"正在初始化Embedding模型,模型类型: {embedding_model_type}")
  351. # 检查缓存
  352. cache_key = f"embed_{embedding_model_type}"
  353. if cache_key in self._model_cache:
  354. logger.info(f"使用缓存的Embedding模型: {embedding_model_type}")
  355. return self._model_cache[cache_key]
  356. model = None
  357. try:
  358. if embedding_model_type == "siliconflow_embed":
  359. model = self._get_siliconflow_embedding_model()
  360. elif embedding_model_type == "shutian_qwen3_embed":
  361. model = self._get_shutian_qwen3_embed()
  362. else:
  363. # 默认返回蜀天Embedding模型
  364. logger.warning(f"未知的Embedding模型类型 '{embedding_model_type}',使用默认蜀天Embedding")
  365. model = self._get_shutian_qwen3_embed()
  366. if model:
  367. self._model_cache[cache_key] = model
  368. logger.info(f"Embedding模型初始化完成: {embedding_model_type}")
  369. return model
  370. else:
  371. raise ModelAPIError(f"Embedding模型初始化返回None: {embedding_model_type}")
  372. except (ModelConnectionError, Exception) as e:
  373. logger.error(f"获取Embedding模型失败 [{embedding_model_type}]: {e}")
  374. raise ModelConnectionError(f"无法初始化Embedding模型服务: {e}")
  375. def _get_doubao_model(self):
  376. """
  377. 获取豆包模型
  378. Returns:
  379. ChatOpenAI: 配置好的豆包模型实例
  380. """
  381. try:
  382. doubao_url = self.config.get("doubao", "DOUBAO_SERVER_URL")
  383. doubao_model_id = self.config.get("doubao", "DOUBAO_MODEL_ID")
  384. doubao_api_key = self.config.get("doubao", "DOUBAO_API_KEY")
  385. # 验证配置完整性
  386. if not all([doubao_url, doubao_model_id, doubao_api_key]):
  387. missing = []
  388. if not doubao_url:
  389. missing.append("DOUBAO_SERVER_URL")
  390. if not doubao_model_id:
  391. missing.append("DOUBAO_MODEL_ID")
  392. if not doubao_api_key:
  393. missing.append("DOUBAO_API_KEY")
  394. raise ModelConfigError(f"豆包模型配置不完整,缺少: {', '.join(missing)}")
  395. # 检查连接
  396. if not self._check_connection(doubao_url, doubao_api_key):
  397. logger.warning(f"豆包模型服务连接失败: {doubao_url}")
  398. raise ModelConnectionError(f"无法连接到豆包模型服务: {doubao_url}")
  399. llm = ChatOpenAI(
  400. base_url=doubao_url,
  401. model=doubao_model_id,
  402. api_key=doubao_api_key,
  403. temperature=0.7,
  404. timeout=self.REQUEST_TIMEOUT,
  405. extra_body={
  406. "enable_thinking": False,
  407. })
  408. logger.info(f"豆包模型初始化成功: {doubao_model_id}")
  409. return llm
  410. except ModelConfigError:
  411. raise
  412. except ModelConnectionError:
  413. raise
  414. except Exception as e:
  415. error = ModelAPIError(f"豆包模型初始化异常: {e}")
  416. return self._handle_model_error("doubao", error)
  417. def _get_qwen_model(self):
  418. """
  419. 获取通义千问模型
  420. Returns:
  421. ChatOpenAI: 配置好的通义千问模型实例
  422. """
  423. try:
  424. qwen_url = self.config.get("qwen", "QWEN_SERVER_URL")
  425. qwen_model_id = self.config.get("qwen", "QWEN_MODEL_ID")
  426. qwen_api_key = self.config.get("qwen", "QWEN_API_KEY")
  427. # 验证配置完整性
  428. if not all([qwen_url, qwen_model_id, qwen_api_key]):
  429. missing = []
  430. if not qwen_url:
  431. missing.append("QWEN_SERVER_URL")
  432. if not qwen_model_id:
  433. missing.append("QWEN_MODEL_ID")
  434. if not qwen_api_key:
  435. missing.append("QWEN_API_KEY")
  436. raise ModelConfigError(f"通义千问模型配置不完整,缺少: {', '.join(missing)}")
  437. # 检查连接
  438. if not self._check_connection(qwen_url, qwen_api_key):
  439. logger.warning(f"通义千问模型服务连接失败: {qwen_url}")
  440. raise ModelConnectionError(f"无法连接到通义千问模型服务: {qwen_url}")
  441. llm = ChatOpenAI(
  442. base_url=qwen_url,
  443. model=qwen_model_id,
  444. api_key=qwen_api_key,
  445. temperature=0.7,
  446. timeout=self.REQUEST_TIMEOUT,
  447. extra_body={
  448. "enable_thinking": False,
  449. })
  450. logger.info(f"通义千问模型初始化成功: {qwen_model_id}")
  451. return llm
  452. except ModelConfigError:
  453. raise
  454. except ModelConnectionError:
  455. raise
  456. except Exception as e:
  457. error = ModelAPIError(f"通义千问模型初始化异常: {e}")
  458. return self._handle_model_error("qwen", error)
  459. def _get_deepseek_model(self):
  460. """
  461. 获取DeepSeek模型
  462. Returns:
  463. ChatOpenAI: 配置好的DeepSeek模型实例
  464. """
  465. try:
  466. deepseek_url = self.config.get("deepseek", "DEEPSEEK_SERVER_URL")
  467. deepseek_model_id = self.config.get("deepseek", "DEEPSEEK_MODEL_ID")
  468. deepseek_api_key = self.config.get("deepseek", "DEEPSEEK_API_KEY")
  469. # 验证配置完整性
  470. if not all([deepseek_url, deepseek_model_id, deepseek_api_key]):
  471. missing = []
  472. if not deepseek_url:
  473. missing.append("DEEPSEEK_SERVER_URL")
  474. if not deepseek_model_id:
  475. missing.append("DEEPSEEK_MODEL_ID")
  476. if not deepseek_api_key:
  477. missing.append("DEEPSEEK_API_KEY")
  478. raise ModelConfigError(f"DeepSeek模型配置不完整,缺少: {', '.join(missing)}")
  479. # 检查连接
  480. if not self._check_connection(deepseek_url, deepseek_api_key):
  481. logger.warning(f"DeepSeek模型服务连接失败: {deepseek_url}")
  482. raise ModelConnectionError(f"无法连接到DeepSeek模型服务: {deepseek_url}")
  483. llm = ChatOpenAI(
  484. base_url=deepseek_url,
  485. model=deepseek_model_id,
  486. api_key=deepseek_api_key,
  487. temperature=0.7,
  488. timeout=self.REQUEST_TIMEOUT,
  489. extra_body={
  490. "enable_thinking": False,
  491. })
  492. logger.info(f"DeepSeek模型初始化成功: {deepseek_model_id}")
  493. return llm
  494. except ModelConfigError:
  495. raise
  496. except ModelConnectionError:
  497. raise
  498. except Exception as e:
  499. error = ModelAPIError(f"DeepSeek模型初始化异常: {e}")
  500. return self._handle_model_error("deepseek", error)
  501. def _get_lq_qwen3_8b_model(self):
  502. """
  503. 获取本地Qwen3-8B-Instruct模型
  504. Returns:
  505. ChatOpenAI: 配置好的本地Qwen3-8B模型实例
  506. """
  507. try:
  508. server_url = "http://192.168.91.253:9002/v1"
  509. model_id = "Qwen3-8B"
  510. # 检查本地服务连接
  511. if not self._check_connection(server_url, "dummy", timeout=3):
  512. logger.warning(f"本地Qwen3-8B模型服务连接失败: {server_url}")
  513. raise ModelConnectionError(f"无法连接到本地Qwen3-8B模型服务: {server_url}")
  514. llm = ChatOpenAI(
  515. base_url=server_url,
  516. model=model_id,
  517. api_key="dummy",
  518. temperature=0.7,
  519. timeout=self.REQUEST_TIMEOUT,
  520. )
  521. logger.info(f"本地Qwen3-8B模型初始化成功: {model_id}")
  522. return llm
  523. except ModelConnectionError:
  524. raise
  525. except Exception as e:
  526. error = ModelAPIError(f"本地Qwen3-8B模型初始化异常: {e}")
  527. return self._handle_model_error("lq_qwen3_8b", error)
  528. def _get_lq_qwen3_8b_lora_model(self):
  529. """
  530. 获取本地Qwen3-8B-lq-lora模型
  531. Returns:
  532. ChatOpenAI: 配置好的本地Qwen3-8B-lq-lora模型实例
  533. """
  534. try:
  535. server_url = self.config.get("lq_qwen3_8B_lora", "LQ_QWEN3_8B_LQ_LORA_SERVER_URL")
  536. model_id = self.config.get("lq_qwen3_8B_lora", "LQ_QWEN3_8B_LQ_LORA_MODEL_ID")
  537. api_key = self.config.get("lq_qwen3_8B_lora", "LQ_QWEN3_8B_LQ_LORA_API_KEY", "dummy")
  538. # 验证配置完整性
  539. if not all([server_url, model_id]):
  540. missing = []
  541. if not server_url:
  542. missing.append("LQ_QWEN3_8B_LQ_LORA_SERVER_URL")
  543. if not model_id:
  544. missing.append("LQ_QWEN3_8B_LQ_LORA_MODEL_ID")
  545. raise ModelConfigError(f"本地Qwen3-8B-lq-lora模型配置不完整,缺少: {', '.join(missing)}")
  546. # 检查本地服务连接
  547. if not self._check_connection(server_url, api_key, timeout=3):
  548. logger.warning(f"本地Qwen3-8B-lq-lora模型服务连接失败: {server_url}")
  549. raise ModelConnectionError(f"无法连接到本地Qwen3-8B-lq-lora模型服务: {server_url}")
  550. llm = ChatOpenAI(
  551. base_url=server_url,
  552. model=model_id,
  553. api_key=api_key,
  554. temperature=0.7,
  555. timeout=self.REQUEST_TIMEOUT,
  556. )
  557. logger.info(f"本地Qwen3-8B-lq-lora模型初始化成功: {model_id}")
  558. return llm
  559. except ModelConfigError:
  560. raise
  561. except ModelConnectionError:
  562. raise
  563. except Exception as e:
  564. error = ModelAPIError(f"本地Qwen3-8B-lq-lora模型初始化异常: {e}")
  565. return self._handle_model_error("lq_qwen3_8b_lora", error)
  566. def _get_lq_qwen3_4b_model(self):
  567. """
  568. 获取本地Qwen3-4B-Instruct模型
  569. Returns:
  570. ChatOpenAI: 配置好的本地Qwen3-4B模型实例
  571. """
  572. try:
  573. server_url = "http://192.168.91.253:9001/v1"
  574. model_id = "Qwen3-4B"
  575. # 检查本地服务连接
  576. if not self._check_connection(server_url, "dummy", timeout=3):
  577. logger.warning(f"本地Qwen3-4B模型服务连接失败: {server_url}")
  578. raise ModelConnectionError(f"无法连接到本地Qwen3-4B模型服务: {server_url}")
  579. llm = ChatOpenAI(
  580. base_url=server_url,
  581. model=model_id,
  582. api_key="dummy",
  583. temperature=0.7,
  584. timeout=self.REQUEST_TIMEOUT,
  585. )
  586. logger.info(f"本地Qwen3-4B模型初始化成功: {model_id}")
  587. return llm
  588. except ModelConnectionError:
  589. raise
  590. except Exception as e:
  591. error = ModelAPIError(f"本地Qwen3-4B模型初始化异常: {e}")
  592. return self._handle_model_error("lq_qwen3_4b", error)
  593. def _get_qwen_local_14b_model(self):
  594. """
  595. 获取本地Qwen3-14B-Instruct模型
  596. Returns:
  597. ChatOpenAI: 配置好的本地Qwen3-14B模型实例
  598. """
  599. try:
  600. server_url = "http://192.168.91.253:9003/v1"
  601. model_id = "Qwen3-14B"
  602. # 检查本地服务连接
  603. if not self._check_connection(server_url, "dummy", timeout=3):
  604. logger.warning(f"本地Qwen3-14B模型服务连接失败: {server_url}")
  605. raise ModelConnectionError(f"无法连接到本地Qwen3-14B模型服务: {server_url}")
  606. llm = ChatOpenAI(
  607. base_url=server_url,
  608. model=model_id,
  609. api_key="dummy",
  610. temperature=0.7,
  611. timeout=self.REQUEST_TIMEOUT,
  612. )
  613. logger.info(f"本地Qwen3-14B模型初始化成功: {model_id}")
  614. return llm
  615. except ModelConnectionError:
  616. raise
  617. except Exception as e:
  618. error = ModelAPIError(f"本地Qwen3-14B模型初始化异常: {e}")
  619. return self._handle_model_error("qwen_local_14b", error)
  620. def _get_qwen3_5_35b_a3b_model(self):
  621. """
  622. 获取 DashScope Qwen3.5-35B-A3B 模型
  623. Returns:
  624. ChatOpenAI: 配置好的 DashScope Qwen3.5-35B-A3B 模型实例
  625. """
  626. try:
  627. url = self.config.get("qwen3_5_35b_a3b", "DASHSCOPE_SERVER_URL")
  628. model_id = self.config.get("qwen3_5_35b_a3b", "DASHSCOPE_MODEL_ID")
  629. api_key = self.config.get("qwen3_5_35b_a3b", "DASHSCOPE_API_KEY")
  630. # 验证配置完整性
  631. if not all([url, model_id, api_key]):
  632. missing = []
  633. if not url:
  634. missing.append("DASHSCOPE_SERVER_URL")
  635. if not model_id:
  636. missing.append("DASHSCOPE_MODEL_ID")
  637. if not api_key:
  638. missing.append("DASHSCOPE_API_KEY")
  639. raise ModelConfigError(f"DashScope Qwen3.5-35B 模型配置不完整,缺少: {', '.join(missing)}")
  640. llm = ChatOpenAI(
  641. base_url=url,
  642. model=model_id,
  643. api_key=api_key,
  644. temperature=0.7,
  645. timeout=self.REQUEST_TIMEOUT,
  646. extra_body={
  647. "chat_template_kwargs": {"enable_thinking": False}
  648. }
  649. )
  650. logger.info(f"DashScope Qwen3.5-35B 模型初始化成功: {model_id} (思考模式: 关闭)")
  651. return llm
  652. except ModelConfigError:
  653. raise
  654. except Exception as e:
  655. return self._handle_model_error("qwen3_5_35b_a3b", ModelAPIError(str(e)))
  656. def _get_qwen3_5_27b_model(self):
  657. """
  658. 获取 DashScope Qwen3.5-27B 模型
  659. Returns:
  660. ChatOpenAI: 配置好的 DashScope Qwen3.5-27B 模型实例
  661. """
  662. try:
  663. url = self.config.get("qwen3_5_27b", "DASHSCOPE_SERVER_URL")
  664. model_id = self.config.get("qwen3_5_27b", "DASHSCOPE_MODEL_ID")
  665. api_key = self.config.get("qwen3_5_27b", "DASHSCOPE_API_KEY")
  666. # 验证配置完整性
  667. if not all([url, model_id, api_key]):
  668. missing = []
  669. if not url:
  670. missing.append("DASHSCOPE_SERVER_URL")
  671. if not model_id:
  672. missing.append("DASHSCOPE_MODEL_ID")
  673. if not api_key:
  674. missing.append("DASHSCOPE_API_KEY")
  675. raise ModelConfigError(f"DashScope Qwen3.5-27B 模型配置不完整,缺少: {', '.join(missing)}")
  676. llm = ChatOpenAI(
  677. base_url=url,
  678. model=model_id,
  679. api_key=api_key,
  680. temperature=0.7,
  681. timeout=self.REQUEST_TIMEOUT,
  682. extra_body={
  683. "chat_template_kwargs": {"enable_thinking": False}
  684. }
  685. )
  686. logger.info(f"DashScope Qwen3.5-27B 模型初始化成功: {model_id} (思考模式: 关闭)")
  687. return llm
  688. except ModelConfigError:
  689. raise
  690. except Exception as e:
  691. return self._handle_model_error("qwen3_5_27b", ModelAPIError(str(e)))
  692. def _get_qwen3_5_122b_a10b_model(self):
  693. """
  694. 获取 DashScope Qwen3.5-122B-A10B 模型
  695. Returns:
  696. ChatOpenAI: 配置好的 DashScope Qwen3.5-122B-A10B 模型实例
  697. """
  698. try:
  699. url = self.config.get("qwen3_5_122b_a10b", "DASHSCOPE_SERVER_URL")
  700. model_id = self.config.get("qwen3_5_122b_a10b", "DASHSCOPE_MODEL_ID")
  701. api_key = self.config.get("qwen3_5_122b_a10b", "DASHSCOPE_API_KEY")
  702. # 验证配置完整性
  703. if not all([url, model_id, api_key]):
  704. missing = []
  705. if not url:
  706. missing.append("DASHSCOPE_SERVER_URL")
  707. if not model_id:
  708. missing.append("DASHSCOPE_MODEL_ID")
  709. if not api_key:
  710. missing.append("DASHSCOPE_API_KEY")
  711. raise ModelConfigError(f"DashScope Qwen3.5-122B 模型配置不完整,缺少: {', '.join(missing)}")
  712. llm = ChatOpenAI(
  713. base_url=url,
  714. model=model_id,
  715. api_key=api_key,
  716. temperature=0.7,
  717. timeout=self.REQUEST_TIMEOUT,
  718. extra_body={
  719. "chat_template_kwargs": {"enable_thinking": False}
  720. }
  721. )
  722. logger.info(f"DashScope Qwen3.5-122B 模型初始化成功: {model_id} (思考模式: 关闭)")
  723. return llm
  724. except ModelConfigError:
  725. raise
  726. except Exception as e:
  727. return self._handle_model_error("qwen3_5_122b_a10b", ModelAPIError(str(e)))
  728. def _get_siliconflow_embedding_model(self):
  729. """
  730. 获取硅基流动Qwen3-Embedding-8B嵌入模型
  731. Returns:
  732. OpenAIEmbeddings: 配置好的硅基流动Qwen3-Embedding-8B嵌入模型实例
  733. """
  734. try:
  735. server_url = self.config.get("siliconflow_embed", "SLCF_EMBED_SERVER_URL")
  736. api_key = self.config.get("siliconflow_embed", "SLCF_EMBED_API_KEY")
  737. model_id = self.config.get("siliconflow_embed", "SLCF_EMBED_MODEL_ID", "Qwen/Qwen3-Embedding-8B")
  738. dimensions = self.config.get("siliconflow_embed", "SLCF_EMBED_DIMENSIONS", "4096")
  739. # 验证配置完整性
  740. if not all([server_url, api_key, model_id]):
  741. missing = []
  742. if not server_url:
  743. missing.append("SLCF_EMBED_SERVER_URL")
  744. if not api_key:
  745. missing.append("SLCF_EMBED_API_KEY")
  746. if not model_id:
  747. missing.append("SLCF_EMBED_MODEL_ID")
  748. raise ModelConfigError(f"硅基流动Embedding模型配置不完整,缺少: {', '.join(missing)}")
  749. # 检查连接
  750. if not self._check_connection(server_url, api_key):
  751. logger.warning(f"硅基流动Embedding模型服务连接失败: {server_url}")
  752. raise ModelConnectionError(f"无法连接到硅基流动Embedding模型服务: {server_url}")
  753. # 使用 langchain_openai 的 OpenAIEmbeddings
  754. embeddings = OpenAIEmbeddings(
  755. base_url=server_url,
  756. model=model_id,
  757. api_key=api_key,
  758. timeout=self.REQUEST_TIMEOUT,
  759. tiktoken_enabled=False,
  760. check_embedding_ctx_length=False,
  761. max_retries=0, # 禁用SDK内置重试,由EmbeddingClient统一管理
  762. )
  763. logger.info(f"硅基流动Embedding模型初始化成功: {model_id} (dimensions: {dimensions})")
  764. return embeddings
  765. except ModelConfigError:
  766. raise
  767. except ModelConnectionError:
  768. raise
  769. except Exception as e:
  770. error = ModelAPIError(f"硅基流动Embedding模型初始化异常: {e}")
  771. return self._handle_model_error("siliconflow_embed", error)
  772. def _get_shutian_qwen3_5_122b_model(self):
  773. """
  774. 获取蜀天Qwen3.5-122B-A10B模型
  775. Returns:
  776. ChatOpenAI: 配置好的蜀天Qwen3.5-122B模型实例
  777. """
  778. try:
  779. server_url = self.config.get("shutian", "SHUTIAN_122B_SERVER_URL", "http://183.220.37.46:25423/v1")
  780. model_id = self.config.get("shutian", "SHUTIAN_122B_MODEL_ID", "/model/Qwen3.5-122B-A10B")
  781. api_key = self.config.get("shutian", "SHUTIAN_122B_API_KEY", "lq123456")
  782. # 检查服务连接
  783. if not self._check_connection(server_url, api_key, timeout=3):
  784. logger.warning(f"蜀天Qwen3.5-122B模型服务连接失败: {server_url}")
  785. raise ModelConnectionError(f"无法连接到蜀天Qwen3.5-122B模型服务: {server_url}")
  786. llm = ChatOpenAI(
  787. base_url=server_url,
  788. model=model_id,
  789. api_key=api_key,
  790. temperature=0.7,
  791. timeout=self.REQUEST_TIMEOUT,
  792. )
  793. logger.info(f"蜀天Qwen3.5-122B模型初始化成功: {model_id}")
  794. return llm
  795. except ModelConnectionError:
  796. raise
  797. except Exception as e:
  798. error = ModelAPIError(f"蜀天Qwen3.5-122B模型初始化异常: {e}")
  799. return self._handle_model_error("shutian_qwen3_5_122b", error)
  800. def _get_shutian_qwen3_8b_model(self):
  801. """
  802. 获取蜀天Qwen3-8B模型
  803. Returns:
  804. ChatOpenAI: 配置好的蜀天Qwen3-8B模型实例
  805. """
  806. try:
  807. server_url = self.config.get("shutian", "SHUTIAN_8B_SERVER_URL", "http://183.220.37.46:25424/v1")
  808. model_id = self.config.get("shutian", "SHUTIAN_8B_MODEL_ID", "/model/Qwen3-8B")
  809. api_key = self.config.get("shutian", "SHUTIAN_8B_API_KEY", "lq123456")
  810. # 检查服务连接
  811. if not self._check_connection(server_url, api_key, timeout=3):
  812. logger.warning(f"蜀天Qwen3-8B模型服务连接失败: {server_url}")
  813. raise ModelConnectionError(f"无法连接到蜀天Qwen3-8B模型服务: {server_url}")
  814. llm = ChatOpenAI(
  815. base_url=server_url,
  816. model=model_id,
  817. api_key=api_key,
  818. temperature=0.7,
  819. timeout=self.REQUEST_TIMEOUT,
  820. )
  821. logger.info(f"蜀天Qwen3-8B模型初始化成功: {model_id}")
  822. return llm
  823. except ModelConnectionError:
  824. raise
  825. except Exception as e:
  826. error = ModelAPIError(f"蜀天Qwen3-8B模型初始化异常: {e}")
  827. return self._handle_model_error("shutian_qwen3_8b", error)
  828. def _get_shutian_qwen3_6_27b_model(self):
  829. """
  830. 获取蜀天Qwen3.6-27B模型
  831. Returns:
  832. ChatOpenAI: 配置好的蜀天Qwen3.6-27B模型实例
  833. """
  834. try:
  835. server_url = self.config.get("shutian", "SHUTIAN_27B_SERVER_URL", "http://183.220.37.46:25424/v1")
  836. model_id = self.config.get("shutian", "SHUTIAN_27B_MODEL_ID", "/model/Qwen3.6-27B")
  837. api_key = self.config.get("shutian", "SHUTIAN_27B_API_KEY", "sk_prod_SELVoIV1d3gku28koH_ONg8L_B2cQis__71f55615")
  838. # 检查服务连接
  839. if not self._check_connection(server_url, api_key, timeout=3):
  840. logger.warning(f"蜀天Qwen3.6-27B模型服务连接失败: {server_url}")
  841. raise ModelConnectionError(f"无法连接到蜀天Qwen3.6-27B模型服务: {server_url}")
  842. llm = ChatOpenAI(
  843. base_url=server_url,
  844. model=model_id,
  845. api_key=api_key,
  846. temperature=0.7,
  847. timeout=self.REQUEST_TIMEOUT,
  848. )
  849. logger.info(f"蜀天Qwen3.6-27B模型初始化成功: {model_id}")
  850. return llm
  851. except ModelConnectionError:
  852. raise
  853. except Exception as e:
  854. error = ModelAPIError(f"蜀天Qwen3.6-27B模型初始化异常: {e}")
  855. return self._handle_model_error("shutian_qwen3_6_27b", error)
  856. def _get_shutian_qwen3_5_35b_model(self):
  857. """
  858. 获取蜀天Qwen3.5-35B模型
  859. Returns:
  860. ChatOpenAI: 配置好的蜀天Qwen3.5-35B模型实例
  861. """
  862. try:
  863. server_url = self.config.get("shutian", "SHUTIAN_35B_SERVER_URL", "http://183.220.37.46:25427/v1")
  864. model_id = self.config.get("shutian", "SHUTIAN_35B_MODEL_ID", "/model/Qwen3.5-35B")
  865. api_key = self.config.get("shutian", "SHUTIAN_35B_API_KEY", "lq123456")
  866. logger.info(f"正在初始化蜀天Qwen3.5-35B模型,服务器地址: {server_url}")
  867. # 检查服务连接(可通过配置禁用)
  868. skip_check = self.config.get("shutian", "SKIP_CONNECTION_CHECK", "false").lower() == "true"
  869. if not skip_check:
  870. connection_ok = self._check_connection(server_url, api_key, timeout=5)
  871. if not connection_ok:
  872. # 连接检查失败时记录警告,但不阻止初始化(实际调用时如果失败会报错)
  873. logger.warning(f"蜀天Qwen3.5-35B模型服务连接检查失败: {server_url},但仍尝试初始化")
  874. else:
  875. logger.info(f"蜀天Qwen3.5-35B模型服务连接检查通过: {server_url}")
  876. else:
  877. logger.info(f"跳过蜀天Qwen3.5-35B模型连接检查(SKIP_CONNECTION_CHECK=true)")
  878. llm = ChatOpenAI(
  879. base_url=server_url,
  880. model=model_id,
  881. api_key=api_key,
  882. temperature=0.7,
  883. timeout=self.REQUEST_TIMEOUT,
  884. )
  885. # 记录模型实例的详细信息用于调试
  886. logger.info(f"蜀天Qwen3.5-35B模型初始化成功: model_id={model_id}, base_url={llm.base_url if hasattr(llm, 'base_url') else server_url}")
  887. return llm
  888. except ModelConnectionError:
  889. raise
  890. except Exception as e:
  891. error = ModelAPIError(f"蜀天Qwen3.5-35B模型初始化异常: {e}")
  892. return self._handle_model_error("shutian_qwen3_5_35b", error)
  893. def _get_shutian_qwen3_embed(self):
  894. """
  895. 获取蜀天Qwen3-Embedding-8B嵌入模型
  896. Returns:
  897. OpenAIEmbeddings: 配置好的蜀天Embedding模型实例
  898. """
  899. try:
  900. server_url = self.config.get("shutian", "SHUTIAN_EMBED_SERVER_URL", "http://183.220.37.46:25425/v1")
  901. model_id = self.config.get("shutian", "SHUTIAN_EMBED_MODEL_ID", "/model/Qwen3-Embedding-8B")
  902. api_key = self.config.get("shutian", "SHUTIAN_EMBED_API_KEY", "lq123456")
  903. # 检查服务连接
  904. if not self._check_connection(server_url, api_key, timeout=3):
  905. logger.warning(f"蜀天Qwen3-Embedding模型服务连接失败: {server_url}")
  906. raise ModelConnectionError(f"无法连接到蜀天Qwen3-Embedding模型服务: {server_url}")
  907. embeddings = OpenAIEmbeddings(
  908. base_url=server_url,
  909. model=model_id,
  910. api_key=api_key,
  911. timeout=self.REQUEST_TIMEOUT,
  912. tiktoken_enabled=False,
  913. check_embedding_ctx_length=False,
  914. max_retries=0, # 禁用SDK内置重试,由EmbeddingClient统一管理
  915. )
  916. logger.info(f"蜀天Qwen3-Embedding-8B模型初始化成功: {model_id}")
  917. return embeddings
  918. except ModelConnectionError:
  919. raise
  920. except Exception as e:
  921. error = ModelAPIError(f"蜀天Qwen3-Embedding模型初始化异常: {e}")
  922. return self._handle_model_error("shutian_qwen3_embed", error)
  923. # 创建全局实例
  924. model_handler = ModelHandler()
  925. def get_models():
  926. """
  927. 获取模型的全局函数
  928. Returns:
  929. tuple: (llm, chat, embed) - LLM模型、聊天模型和嵌入模型实例
  930. 注意:当前llm和chat使用相同模型实例,embed暂时返回None
  931. Note:
  932. 这是一个便捷函数,直接使用全局model_handler实例获取模型
  933. """
  934. try:
  935. llm = model_handler.get_models()
  936. # 暂时返回相同的模型作为chat和embed
  937. return llm, llm, None
  938. except Exception as e:
  939. logger.error(f"获取模型失败: {e}")
  940. raise ModelConnectionError(f"无法获取模型服务: {e}")