model_handler.py 44 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. """
  4. AI模型处理器
  5. 用于管理生成、与嵌入模型的创建和配置
  6. 支持的模型类型:
  7. - doubao: 豆包模型
  8. - qwen: 通义千问模型
  9. - deepseek: DeepSeek模型
  10. - lq_qwen3_8b: 本地Qwen3-8B模型
  11. - lq_qwen3_8b_lq_lora: 本地Qwen3-8B-lq-lora模型
  12. - lq_qwen3_4b: 本地Qwen3-4B模型
  13. - qwen_local_14b: 本地Qwen3-14B模型
  14. - shutian_qwen3_embed: 蜀天Qwen3-Embedding-8B嵌入模型(默认)
  15. - siliconflow_embed: 硅基流动Qwen3-Embedding-8B嵌入模型
  16. - lq_bge_reranker_v2_m3: 本地BGE-reranker-v2-m3重排序模型
  17. - qwen3_5_35b_a3b: DashScope Qwen3.5-35B-A3B模型(默认兜底模型)
  18. - qwen3_5_27b: DashScope Qwen3.5-27B模型
  19. - qwen3_5_122b_a10b: DashScope Qwen3.5-122B-A10B模型
  20. - shutian_qwen3_5_122b: 蜀天Qwen3.5-122B-A10B模型(183.220.37.46:25423)
  21. - shutian_qwen3_8b: 蜀天Qwen3-8B模型(183.220.37.46:25424)
  22. - shutian_qwen3_5_35b: 蜀天Qwen3.5-35B模型(183.220.37.46:25427)
  23. - shutian_qwen3_6_27b: 蜀天Qwen3.6-27B模型(183.220.37.46:25424)
  24. - shutian_qwen3_embed: 蜀天Qwen3-Embedding-8B模型(183.220.37.46:25425)
  25. - shutian_qwen3_reranker: 蜀天Qwen3-Reranker-8B模型(183.220.37.46:25426)
  26. """
  27. # 禁用 transformers 的深度学习框架检测,避免启动时耗时扫描
  28. import os
  29. os.environ["TRANSFORMERS_VERBOSITY"] = "error"
  30. os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
  31. import requests
  32. from langchain_openai import ChatOpenAI, OpenAIEmbeddings
  33. from foundation.infrastructure.config.config import config_handler
  34. from foundation.observability.logger.loggering import review_logger as logger
  35. class ModelConnectionError(Exception):
  36. """模型连接错误"""
  37. pass
  38. class ModelConfigError(Exception):
  39. """模型配置错误"""
  40. pass
  41. class ModelAPIError(Exception):
  42. """模型API调用错误"""
  43. pass
  44. class ModelHandler:
  45. """
  46. AI模型处理器类,用于管理多种AI模型的创建和配置
  47. """
  48. # 模型连接超时时间配置(秒)
  49. CONNECTION_TIMEOUT = 30
  50. REQUEST_TIMEOUT = 180
  51. REQUEST_TIMEOUT_THINKING = 360
  52. MAX_RETRIES = 2
  53. # 模型类型 → 工厂方法 注册表
  54. MODEL_FACTORY_MAP = {
  55. "doubao": "_get_doubao_model",
  56. "qwen": "_get_qwen_model",
  57. "deepseek": "_get_deepseek_model",
  58. "lq_qwen3_8b": "_get_lq_qwen3_8b_model",
  59. "lq_qwen3_8b_lq_lora": "_get_lq_qwen3_8b_lora_model",
  60. "lq_qwen3_4b": "_get_lq_qwen3_4b_model",
  61. "qwen_local_14b": "_get_qwen_local_14b_model",
  62. "qwen3_5_35b_a3b": "_get_qwen3_5_35b_a3b_model",
  63. "qwen3_5_27b": "_get_qwen3_5_27b_model",
  64. "qwen3_5_122b_a10b": "_get_qwen3_5_122b_a10b_model",
  65. "shutian_qwen3_5_122b": "_get_shutian_qwen3_5_122b_model",
  66. "shutian_qwen3_8b": "_get_shutian_qwen3_8b_model",
  67. "shutian_qwen3_5_35b": "_get_shutian_qwen3_5_35b_model",
  68. "shutian_qwen3_6_27b": "_get_shutian_qwen3_6_27b_model",
  69. }
  70. DEFAULT_FALLBACK_MODEL = "qwen3_5_35b_a3b"
  71. def __init__(self):
  72. """
  73. 初始化模型处理器
  74. 加载配置处理器,用于后续读取各种模型的配置信息
  75. """
  76. self.config = config_handler
  77. self._model_cache = {} # 模型实例缓存
  78. self._request_timeout_override = None
  79. def _create_model_by_type(self, model_type: str):
  80. """根据模型类型名称创建模型实例(通过注册表分发)"""
  81. method_name = self.MODEL_FACTORY_MAP.get(model_type)
  82. if method_name:
  83. return getattr(self, method_name)()
  84. logger.warning(f"未知的模型类型 '{model_type}',使用默认 {self.DEFAULT_FALLBACK_MODEL} 模型")
  85. return self._get_qwen3_5_35b_a3b_model()
  86. @property
  87. def request_timeout(self):
  88. """当前请求超时时间,有 override 时优先返回 override"""
  89. return self._request_timeout_override or self.REQUEST_TIMEOUT
  90. def _check_connection(self, base_url: str, api_key: str = None, timeout: int = 5) -> bool:
  91. """
  92. 检查模型服务连接是否可用
  93. 支持两种检查方式:
  94. 1. GET /models - 标准 OpenAI 兼容接口
  95. 2. POST /chat/completions - 直接测试 chat 接口(部分服务只支持此接口)
  96. Args:
  97. base_url: 模型服务地址
  98. api_key: API密钥(可选)
  99. timeout: 超时时间(秒)
  100. Returns:
  101. bool: 连接是否可用
  102. """
  103. headers = {}
  104. if api_key and api_key != "dummy":
  105. headers["Authorization"] = f"Bearer {api_key}"
  106. # 方法1: 尝试 /models 端点
  107. try:
  108. health_url = f"{base_url.rstrip('/')}/models"
  109. response = requests.get(
  110. health_url,
  111. headers=headers,
  112. timeout=timeout
  113. )
  114. if 200 <= response.status_code < 300:
  115. logger.debug(f"连接检查通过 (/models): {base_url}")
  116. return True
  117. except requests.exceptions.Timeout:
  118. logger.debug(f"连接检查超时 (/models): {base_url}")
  119. except Exception as e:
  120. logger.debug(f"连接检查失败 (/models): {base_url}, {e}")
  121. # 方法2: 尝试 /chat/completions 端点(发送一个简单请求)
  122. try:
  123. chat_url = f"{base_url.rstrip('/')}/chat/completions"
  124. test_payload = {
  125. "model": "test",
  126. "messages": [{"role": "user", "content": "test"}],
  127. "max_tokens": 1
  128. }
  129. response = requests.post(
  130. chat_url,
  131. headers={**headers, "Content-Type": "application/json"},
  132. json=test_payload,
  133. timeout=timeout
  134. )
  135. # 即使返回 400/401/404 也说明服务是通的
  136. # 只有连接错误/超时才是真的连不上
  137. if response.status_code != 503: # 503 表示服务不可用
  138. logger.debug(f"连接检查通过 (/chat/completions): {base_url}, 状态码: {response.status_code}")
  139. return True
  140. except requests.exceptions.Timeout:
  141. logger.warning(f"连接检查超时: {base_url}")
  142. except requests.exceptions.ConnectionError as e:
  143. logger.warning(f"连接检查错误: {base_url}, {e}")
  144. except Exception as e:
  145. logger.warning(f"连接检查异常: {base_url}, {e}")
  146. return False
  147. def _handle_model_error(self, model_name: str, error: Exception, fallback_model=None):
  148. """
  149. 统一处理模型错误
  150. Args:
  151. model_name: 模型名称
  152. error: 异常对象
  153. fallback_model: 降级模型实例(可选)
  154. Returns:
  155. 降级模型实例,如果不可用则返回None
  156. """
  157. error_type = type(error).__name__
  158. error_msg = str(error)
  159. logger.error(f"模型初始化失败 [{model_name}]: {error_type} - {error_msg}")
  160. # 如果提供了降级模型,记录日志并返回
  161. if fallback_model:
  162. logger.warning(f"使用降级模型: {fallback_model.__class__.__name__}")
  163. return fallback_model
  164. # 如果没有降级模型,返回None让调用方处理
  165. return None
  166. def get_models(self):
  167. """
  168. 获取AI模型实例
  169. Returns:
  170. ChatOpenAI: 配置好的AI模型实例
  171. Note:
  172. 优先从 model_setting.yaml 读取默认模型配置,如果不存在则回退到 config.ini 的 MODEL_TYPE
  173. 支持的模型类型:doubao, qwen, deepseek, lq_qwen3_8b, lq_qwen3_8b_lora, lq_qwen3_4b, qwen_local_14b
  174. """
  175. # 优先从 model_setting.yaml 读取默认模型配置
  176. try:
  177. from foundation.ai.models.model_config_loader import get_model_for_function
  178. model_type = get_model_for_function("default")
  179. if model_type:
  180. logger.debug(f"从 model_setting.yaml 读取默认模型: {model_type}")
  181. else:
  182. model_type = self.DEFAULT_FALLBACK_MODEL
  183. except Exception as e:
  184. logger.debug(f"从 model_setting.yaml 读取默认模型失败: {e},回退到默认模型")
  185. model_type = self.DEFAULT_FALLBACK_MODEL
  186. logger.info(f"正在初始化AI模型,模型类型: {model_type}")
  187. # 检查缓存
  188. cache_key = f"chat_{model_type}"
  189. if cache_key in self._model_cache:
  190. logger.info(f"使用缓存的模型: {model_type}")
  191. return self._model_cache[cache_key]
  192. model = None
  193. try:
  194. model = self._create_model_by_type(model_type)
  195. if model:
  196. self._model_cache[cache_key] = model
  197. logger.info(f"AI模型初始化完成: {model_type}")
  198. return model
  199. else:
  200. raise ModelAPIError(f"模型初始化返回None: {model_type}")
  201. except Exception as e:
  202. logger.error(f"获取模型失败 [{model_type}]: {e}")
  203. # 使用 qwen3_5_35b_a3b 作为兜底降级方案
  204. if model_type != "qwen3_5_35b_a3b":
  205. logger.info("尝试使用 qwen3_5_35b_a3b 模型作为降级方案")
  206. try:
  207. fallback_model = self._get_qwen3_5_35b_a3b_model()
  208. if fallback_model:
  209. self._model_cache[cache_key] = fallback_model
  210. logger.warning("已切换到 qwen3_5_35b_a3b 降级模型")
  211. return fallback_model
  212. except Exception as fallback_error:
  213. logger.error(f"降级模型也失败: {fallback_error}")
  214. # 如果所有模型都失败,抛出异常
  215. raise ModelConnectionError(f"无法初始化任何模型服务: {e}")
  216. def get_model_by_name(self, model_type: str = None, enable_thinking: bool = False):
  217. """
  218. 根据模型名称动态获取指定的AI模型实例
  219. Args:
  220. model_type: 模型类型名称,如果为None则使用配置文件中的默认模型
  221. 支持的模型类型:doubao, qwen, deepseek, gemini,
  222. lq_qwen3_8b, lq_qwen3_8b_lq_lora,
  223. lq_qwen3_4b, qwen_local_14b
  224. enable_thinking: 是否启用推理模式,影响 HTTP 超时时间
  225. Returns:
  226. ChatOpenAI: 配置好的AI模型实例
  227. Note:
  228. 该方法支持动态切换模型,不受配置文件中的默认MODEL_TYPE限制
  229. 如果model_type为None,则使用配置文件中的默认模型
  230. 如果model_type无效,则使用gemini作为降级模型
  231. """
  232. # 如果未指定模型类型,使用配置文件中的默认模型
  233. if model_type is None:
  234. model_type = self.DEFAULT_FALLBACK_MODEL
  235. logger.info(f"动态获取AI模型,模型类型: {model_type}, thinking: {enable_thinking}")
  236. # 检查缓存(thinking 模式使用独立缓存)
  237. cache_key = f"chat_{model_type}" if not enable_thinking else f"chat_{model_type}_thinking"
  238. if cache_key in self._model_cache:
  239. logger.info(f"使用缓存的模型: {model_type}")
  240. return self._model_cache[cache_key]
  241. # 设置超时 override,工厂方法通过 self.request_timeout 读取
  242. self._request_timeout_override = (
  243. self.REQUEST_TIMEOUT_THINKING if enable_thinking else None
  244. )
  245. model = None
  246. try:
  247. model = self._create_model_by_type(model_type)
  248. if model:
  249. self._model_cache[cache_key] = model
  250. logger.info(f"AI模型动态初始化完成: {model_type}")
  251. return model
  252. else:
  253. raise ModelAPIError(f"模型初始化返回None: {model_type}")
  254. except Exception as e:
  255. logger.error(f"动态获取模型失败 [{model_type}]: {e}")
  256. # 使用 qwen3_5_35b_a3b 作为兜底降级方案
  257. if model_type != "qwen3_5_35b_a3b":
  258. logger.info("尝试使用 qwen3_5_35b_a3b 模型作为降级方案")
  259. try:
  260. fallback_model = self._get_qwen3_5_35b_a3b_model()
  261. if fallback_model:
  262. # 注意:不要把降级模型存入原模型的缓存,避免后续调用都使用错误的模型
  263. fallback_cache_key = "chat_qwen3_5_35b_a3b"
  264. self._model_cache[fallback_cache_key] = fallback_model
  265. logger.warning(f"已切换到 qwen3_5_35b_a3b 降级模型(不会缓存为 {model_type})")
  266. return fallback_model
  267. except Exception as fallback_error:
  268. logger.error(f"降级模型也失败: {fallback_error}")
  269. # 如果所有模型都失败,抛出异常
  270. raise ModelConnectionError(f"无法初始化任何模型服务: {e}")
  271. finally:
  272. self._request_timeout_override = None
  273. def get_model_by_function(self, function_name: str):
  274. """
  275. 根据功能名称获取对应的AI模型实例
  276. 从 config/model_setting.yaml 加载功能对应的模型配置
  277. Args:
  278. function_name: 功能名称(定义在 model_setting.yaml 中),如:
  279. - doc_classification_secondary: 文档二级分类
  280. - completeness_review_generate: 完整性审查生成
  281. - sensitive_check: 敏感信息检查
  282. - grammar_check: 语法检查
  283. Returns:
  284. ChatOpenAI: 配置好的AI模型实例
  285. Example:
  286. model = model_handler.get_model_by_function("doc_classification_secondary")
  287. """
  288. try:
  289. from foundation.ai.models.model_config_loader import get_model_for_function
  290. model_type = get_model_for_function(function_name)
  291. logger.info(f"根据功能 '{function_name}' 获取模型: {model_type}")
  292. return self.get_model_by_name(model_type)
  293. except Exception as e:
  294. logger.warning(f"根据功能获取模型失败 [{function_name}]: {e},尝试使用默认模型")
  295. try:
  296. default_model = get_model_for_function("default")
  297. return self.get_model_by_name(default_model)
  298. except Exception:
  299. return self.get_model_by_name(self.DEFAULT_FALLBACK_MODEL)
  300. def get_embedding_model(self):
  301. """
  302. 获取Embedding模型实例
  303. Returns:
  304. OpenAIEmbeddings: 配置好的Embedding模型实例
  305. Note:
  306. 从 model_setting.yaml 读取embedding模型配置
  307. 支持的模型类型:shutian_qwen3_embed, siliconflow_embed
  308. 默认返回蜀天 shutian_qwen3_embed 模型
  309. """
  310. embedding_model_type = "shutian_qwen3_embed"
  311. try:
  312. from .model_config_loader import get_model_for_function
  313. model_name = get_model_for_function("embedding")
  314. if model_name:
  315. embedding_model_type = model_name
  316. logger.debug(f"从 model_setting.yaml 读取embedding模型: {embedding_model_type}")
  317. except Exception as e:
  318. logger.debug(f"从 model_setting.yaml 读取embedding配置失败: {e}")
  319. logger.info(f"正在初始化Embedding模型,模型类型: {embedding_model_type}")
  320. # 检查缓存
  321. cache_key = f"embed_{embedding_model_type}"
  322. if cache_key in self._model_cache:
  323. logger.info(f"使用缓存的Embedding模型: {embedding_model_type}")
  324. return self._model_cache[cache_key]
  325. model = None
  326. try:
  327. if embedding_model_type == "siliconflow_embed":
  328. model = self._get_siliconflow_embedding_model()
  329. elif embedding_model_type == "shutian_qwen3_embed":
  330. model = self._get_shutian_qwen3_embed()
  331. else:
  332. # 默认返回蜀天Embedding模型
  333. logger.warning(f"未知的Embedding模型类型 '{embedding_model_type}',使用默认蜀天Embedding")
  334. model = self._get_shutian_qwen3_embed()
  335. if model:
  336. self._model_cache[cache_key] = model
  337. logger.info(f"Embedding模型初始化完成: {embedding_model_type}")
  338. return model
  339. else:
  340. raise ModelAPIError(f"Embedding模型初始化返回None: {embedding_model_type}")
  341. except (ModelConnectionError, Exception) as e:
  342. logger.error(f"获取Embedding模型失败 [{embedding_model_type}]: {e}")
  343. raise ModelConnectionError(f"无法初始化Embedding模型服务: {e}")
  344. def _get_doubao_model(self):
  345. """
  346. 获取豆包模型
  347. Returns:
  348. ChatOpenAI: 配置好的豆包模型实例
  349. """
  350. try:
  351. doubao_url = self.config.get("doubao", "DOUBAO_SERVER_URL")
  352. doubao_model_id = self.config.get("doubao", "DOUBAO_MODEL_ID")
  353. doubao_api_key = self.config.get("doubao", "DOUBAO_API_KEY")
  354. # 验证配置完整性
  355. if not all([doubao_url, doubao_model_id, doubao_api_key]):
  356. missing = []
  357. if not doubao_url:
  358. missing.append("DOUBAO_SERVER_URL")
  359. if not doubao_model_id:
  360. missing.append("DOUBAO_MODEL_ID")
  361. if not doubao_api_key:
  362. missing.append("DOUBAO_API_KEY")
  363. raise ModelConfigError(f"豆包模型配置不完整,缺少: {', '.join(missing)}")
  364. # 检查连接
  365. if not self._check_connection(doubao_url, doubao_api_key):
  366. logger.warning(f"豆包模型服务连接失败: {doubao_url}")
  367. raise ModelConnectionError(f"无法连接到豆包模型服务: {doubao_url}")
  368. llm = ChatOpenAI(
  369. base_url=doubao_url,
  370. model=doubao_model_id,
  371. api_key=doubao_api_key,
  372. temperature=0.7,
  373. timeout=self.request_timeout,
  374. extra_body={
  375. "enable_thinking": False,
  376. })
  377. logger.info(f"豆包模型初始化成功: {doubao_model_id}")
  378. return llm
  379. except ModelConfigError:
  380. raise
  381. except ModelConnectionError:
  382. raise
  383. except Exception as e:
  384. error = ModelAPIError(f"豆包模型初始化异常: {e}")
  385. return self._handle_model_error("doubao", error)
  386. def _get_qwen_model(self):
  387. """
  388. 获取通义千问模型
  389. Returns:
  390. ChatOpenAI: 配置好的通义千问模型实例
  391. """
  392. try:
  393. qwen_url = self.config.get("qwen", "QWEN_SERVER_URL")
  394. qwen_model_id = self.config.get("qwen", "QWEN_MODEL_ID")
  395. qwen_api_key = self.config.get("qwen", "QWEN_API_KEY")
  396. # 验证配置完整性
  397. if not all([qwen_url, qwen_model_id, qwen_api_key]):
  398. missing = []
  399. if not qwen_url:
  400. missing.append("QWEN_SERVER_URL")
  401. if not qwen_model_id:
  402. missing.append("QWEN_MODEL_ID")
  403. if not qwen_api_key:
  404. missing.append("QWEN_API_KEY")
  405. raise ModelConfigError(f"通义千问模型配置不完整,缺少: {', '.join(missing)}")
  406. # 检查连接
  407. if not self._check_connection(qwen_url, qwen_api_key):
  408. logger.warning(f"通义千问模型服务连接失败: {qwen_url}")
  409. raise ModelConnectionError(f"无法连接到通义千问模型服务: {qwen_url}")
  410. llm = ChatOpenAI(
  411. base_url=qwen_url,
  412. model=qwen_model_id,
  413. api_key=qwen_api_key,
  414. temperature=0.7,
  415. timeout=self.request_timeout,
  416. extra_body={
  417. "enable_thinking": False,
  418. })
  419. logger.info(f"通义千问模型初始化成功: {qwen_model_id}")
  420. return llm
  421. except ModelConfigError:
  422. raise
  423. except ModelConnectionError:
  424. raise
  425. except Exception as e:
  426. error = ModelAPIError(f"通义千问模型初始化异常: {e}")
  427. return self._handle_model_error("qwen", error)
  428. def _get_deepseek_model(self):
  429. """
  430. 获取DeepSeek模型
  431. Returns:
  432. ChatOpenAI: 配置好的DeepSeek模型实例
  433. """
  434. try:
  435. deepseek_url = self.config.get("deepseek", "DEEPSEEK_SERVER_URL")
  436. deepseek_model_id = self.config.get("deepseek", "DEEPSEEK_MODEL_ID")
  437. deepseek_api_key = self.config.get("deepseek", "DEEPSEEK_API_KEY")
  438. # 验证配置完整性
  439. if not all([deepseek_url, deepseek_model_id, deepseek_api_key]):
  440. missing = []
  441. if not deepseek_url:
  442. missing.append("DEEPSEEK_SERVER_URL")
  443. if not deepseek_model_id:
  444. missing.append("DEEPSEEK_MODEL_ID")
  445. if not deepseek_api_key:
  446. missing.append("DEEPSEEK_API_KEY")
  447. raise ModelConfigError(f"DeepSeek模型配置不完整,缺少: {', '.join(missing)}")
  448. # 检查连接
  449. if not self._check_connection(deepseek_url, deepseek_api_key):
  450. logger.warning(f"DeepSeek模型服务连接失败: {deepseek_url}")
  451. raise ModelConnectionError(f"无法连接到DeepSeek模型服务: {deepseek_url}")
  452. llm = ChatOpenAI(
  453. base_url=deepseek_url,
  454. model=deepseek_model_id,
  455. api_key=deepseek_api_key,
  456. temperature=0.7,
  457. timeout=self.request_timeout,
  458. extra_body={
  459. "enable_thinking": False,
  460. })
  461. logger.info(f"DeepSeek模型初始化成功: {deepseek_model_id}")
  462. return llm
  463. except ModelConfigError:
  464. raise
  465. except ModelConnectionError:
  466. raise
  467. except Exception as e:
  468. error = ModelAPIError(f"DeepSeek模型初始化异常: {e}")
  469. return self._handle_model_error("deepseek", error)
  470. def _get_lq_qwen3_8b_model(self):
  471. """
  472. 获取本地Qwen3-8B-Instruct模型
  473. Returns:
  474. ChatOpenAI: 配置好的本地Qwen3-8B模型实例
  475. """
  476. try:
  477. server_url = self.config.get("lq_qwen3_8b", "QWEN_LOCAL_1_5B_SERVER_URL", "http://192.168.91.253:9002/v1")
  478. model_id = self.config.get("lq_qwen3_8b", "QWEN_LOCAL_1_5B_MODEL_ID", "Qwen3-8B")
  479. api_key = self.config.get("lq_qwen3_8b", "QWEN_LOCAL_1_5B_API_KEY", "dummy")
  480. if not all([server_url, model_id]):
  481. raise ModelConfigError("本地Qwen3-8B模型配置不完整")
  482. if not self._check_connection(server_url, api_key, timeout=3):
  483. logger.warning(f"本地Qwen3-8B模型服务连接失败: {server_url}")
  484. raise ModelConnectionError(f"无法连接到本地Qwen3-8B模型服务: {server_url}")
  485. llm = ChatOpenAI(
  486. base_url=server_url,
  487. model=model_id,
  488. api_key=api_key,
  489. temperature=0.7,
  490. timeout=self.request_timeout,
  491. )
  492. logger.info(f"本地Qwen3-8B模型初始化成功: {model_id}")
  493. return llm
  494. except ModelConfigError:
  495. raise
  496. except ModelConnectionError:
  497. raise
  498. except Exception as e:
  499. error = ModelAPIError(f"本地Qwen3-8B模型初始化异常: {e}")
  500. return self._handle_model_error("lq_qwen3_8b", error)
  501. def _get_lq_qwen3_8b_lora_model(self):
  502. """
  503. 获取本地Qwen3-8B-lq-lora模型
  504. Returns:
  505. ChatOpenAI: 配置好的本地Qwen3-8B-lq-lora模型实例
  506. """
  507. try:
  508. server_url = self.config.get("lq_qwen3_8B_lora", "LQ_QWEN3_8B_LQ_LORA_SERVER_URL")
  509. model_id = self.config.get("lq_qwen3_8B_lora", "LQ_QWEN3_8B_LQ_LORA_MODEL_ID")
  510. api_key = self.config.get("lq_qwen3_8B_lora", "LQ_QWEN3_8B_LQ_LORA_API_KEY", "dummy")
  511. # 验证配置完整性
  512. if not all([server_url, model_id]):
  513. missing = []
  514. if not server_url:
  515. missing.append("LQ_QWEN3_8B_LQ_LORA_SERVER_URL")
  516. if not model_id:
  517. missing.append("LQ_QWEN3_8B_LQ_LORA_MODEL_ID")
  518. raise ModelConfigError(f"本地Qwen3-8B-lq-lora模型配置不完整,缺少: {', '.join(missing)}")
  519. # 检查本地服务连接
  520. if not self._check_connection(server_url, api_key, timeout=3):
  521. logger.warning(f"本地Qwen3-8B-lq-lora模型服务连接失败: {server_url}")
  522. raise ModelConnectionError(f"无法连接到本地Qwen3-8B-lq-lora模型服务: {server_url}")
  523. llm = ChatOpenAI(
  524. base_url=server_url,
  525. model=model_id,
  526. api_key=api_key,
  527. temperature=0.7,
  528. timeout=self.request_timeout,
  529. )
  530. logger.info(f"本地Qwen3-8B-lq-lora模型初始化成功: {model_id}")
  531. return llm
  532. except ModelConfigError:
  533. raise
  534. except ModelConnectionError:
  535. raise
  536. except Exception as e:
  537. error = ModelAPIError(f"本地Qwen3-8B-lq-lora模型初始化异常: {e}")
  538. return self._handle_model_error("lq_qwen3_8b_lora", error)
  539. def _get_lq_qwen3_4b_model(self):
  540. """
  541. 获取本地Qwen3-4B-Instruct模型
  542. Returns:
  543. ChatOpenAI: 配置好的本地Qwen3-4B模型实例
  544. """
  545. try:
  546. server_url = self.config.get("lq_qwen3_4b", "QWEN_LOCAL_1_5B_SERVER_URL", "http://192.168.91.253:9001/v1")
  547. model_id = self.config.get("lq_qwen3_4b", "QWEN_LOCAL_1_5B_MODEL_ID", "Qwen3-4B")
  548. api_key = self.config.get("lq_qwen3_4b", "QWEN_LOCAL_1_5B_API_KEY", "dummy")
  549. if not all([server_url, model_id]):
  550. raise ModelConfigError("本地Qwen3-4B模型配置不完整")
  551. if not self._check_connection(server_url, api_key, timeout=3):
  552. logger.warning(f"本地Qwen3-4B模型服务连接失败: {server_url}")
  553. raise ModelConnectionError(f"无法连接到本地Qwen3-4B模型服务: {server_url}")
  554. llm = ChatOpenAI(
  555. base_url=server_url,
  556. model=model_id,
  557. api_key=api_key,
  558. temperature=0.7,
  559. timeout=self.request_timeout,
  560. )
  561. logger.info(f"本地Qwen3-4B模型初始化成功: {model_id}")
  562. return llm
  563. except ModelConfigError:
  564. raise
  565. except ModelConnectionError:
  566. raise
  567. except Exception as e:
  568. error = ModelAPIError(f"本地Qwen3-4B模型初始化异常: {e}")
  569. return self._handle_model_error("lq_qwen3_4b", error)
  570. def _get_qwen_local_14b_model(self):
  571. """
  572. 获取本地Qwen3-14B-Instruct模型
  573. Returns:
  574. ChatOpenAI: 配置好的本地Qwen3-14B模型实例
  575. """
  576. try:
  577. server_url = self.config.get("qwen_local_14b", "QWEN_LOCAL_14B_SERVER_URL", "http://192.168.91.253:9003/v1")
  578. model_id = self.config.get("qwen_local_14b", "QWEN_LOCAL_14B_MODEL_ID", "Qwen3-14B")
  579. api_key = self.config.get("qwen_local_14b", "QWEN_LOCAL_14B_API_KEY", "dummy")
  580. if not all([server_url, model_id]):
  581. raise ModelConfigError("本地Qwen3-14B模型配置不完整")
  582. if not self._check_connection(server_url, api_key, timeout=3):
  583. logger.warning(f"本地Qwen3-14B模型服务连接失败: {server_url}")
  584. raise ModelConnectionError(f"无法连接到本地Qwen3-14B模型服务: {server_url}")
  585. llm = ChatOpenAI(
  586. base_url=server_url,
  587. model=model_id,
  588. api_key=api_key,
  589. temperature=0.7,
  590. timeout=self.request_timeout,
  591. )
  592. logger.info(f"本地Qwen3-14B模型初始化成功: {model_id}")
  593. return llm
  594. except ModelConfigError:
  595. raise
  596. except ModelConnectionError:
  597. raise
  598. except Exception as e:
  599. error = ModelAPIError(f"本地Qwen3-14B模型初始化异常: {e}")
  600. return self._handle_model_error("qwen_local_14b", error)
  601. def _get_qwen3_5_35b_a3b_model(self):
  602. """
  603. 获取 DashScope Qwen3.5-35B-A3B 模型
  604. Returns:
  605. ChatOpenAI: 配置好的 DashScope Qwen3.5-35B-A3B 模型实例
  606. """
  607. try:
  608. url = self.config.get("qwen3_5_35b_a3b", "DASHSCOPE_SERVER_URL")
  609. model_id = self.config.get("qwen3_5_35b_a3b", "DASHSCOPE_MODEL_ID")
  610. api_key = self.config.get("qwen3_5_35b_a3b", "DASHSCOPE_API_KEY")
  611. # 验证配置完整性
  612. if not all([url, model_id, api_key]):
  613. missing = []
  614. if not url:
  615. missing.append("DASHSCOPE_SERVER_URL")
  616. if not model_id:
  617. missing.append("DASHSCOPE_MODEL_ID")
  618. if not api_key:
  619. missing.append("DASHSCOPE_API_KEY")
  620. raise ModelConfigError(f"DashScope Qwen3.5-35B 模型配置不完整,缺少: {', '.join(missing)}")
  621. llm = ChatOpenAI(
  622. base_url=url,
  623. model=model_id,
  624. api_key=api_key,
  625. temperature=0.7,
  626. timeout=self.request_timeout,
  627. extra_body={
  628. "chat_template_kwargs": {"enable_thinking": False}
  629. }
  630. )
  631. logger.info(f"DashScope Qwen3.5-35B 模型初始化成功: {model_id} (思考模式: 关闭)")
  632. return llm
  633. except ModelConfigError:
  634. raise
  635. except Exception as e:
  636. return self._handle_model_error("qwen3_5_35b_a3b", ModelAPIError(str(e)))
  637. def _get_qwen3_5_27b_model(self):
  638. """
  639. 获取 DashScope Qwen3.5-27B 模型
  640. Returns:
  641. ChatOpenAI: 配置好的 DashScope Qwen3.5-27B 模型实例
  642. """
  643. try:
  644. url = self.config.get("qwen3_5_27b", "DASHSCOPE_SERVER_URL")
  645. model_id = self.config.get("qwen3_5_27b", "DASHSCOPE_MODEL_ID")
  646. api_key = self.config.get("qwen3_5_27b", "DASHSCOPE_API_KEY")
  647. # 验证配置完整性
  648. if not all([url, model_id, api_key]):
  649. missing = []
  650. if not url:
  651. missing.append("DASHSCOPE_SERVER_URL")
  652. if not model_id:
  653. missing.append("DASHSCOPE_MODEL_ID")
  654. if not api_key:
  655. missing.append("DASHSCOPE_API_KEY")
  656. raise ModelConfigError(f"DashScope Qwen3.5-27B 模型配置不完整,缺少: {', '.join(missing)}")
  657. llm = ChatOpenAI(
  658. base_url=url,
  659. model=model_id,
  660. api_key=api_key,
  661. temperature=0.7,
  662. timeout=self.request_timeout,
  663. extra_body={
  664. "chat_template_kwargs": {"enable_thinking": False}
  665. }
  666. )
  667. logger.info(f"DashScope Qwen3.5-27B 模型初始化成功: {model_id} (思考模式: 关闭)")
  668. return llm
  669. except ModelConfigError:
  670. raise
  671. except Exception as e:
  672. return self._handle_model_error("qwen3_5_27b", ModelAPIError(str(e)))
  673. def _get_qwen3_5_122b_a10b_model(self):
  674. """
  675. 获取 DashScope Qwen3.5-122B-A10B 模型
  676. Returns:
  677. ChatOpenAI: 配置好的 DashScope Qwen3.5-122B-A10B 模型实例
  678. """
  679. try:
  680. url = self.config.get("qwen3_5_122b_a10b", "DASHSCOPE_SERVER_URL")
  681. model_id = self.config.get("qwen3_5_122b_a10b", "DASHSCOPE_MODEL_ID")
  682. api_key = self.config.get("qwen3_5_122b_a10b", "DASHSCOPE_API_KEY")
  683. # 验证配置完整性
  684. if not all([url, model_id, api_key]):
  685. missing = []
  686. if not url:
  687. missing.append("DASHSCOPE_SERVER_URL")
  688. if not model_id:
  689. missing.append("DASHSCOPE_MODEL_ID")
  690. if not api_key:
  691. missing.append("DASHSCOPE_API_KEY")
  692. raise ModelConfigError(f"DashScope Qwen3.5-122B 模型配置不完整,缺少: {', '.join(missing)}")
  693. llm = ChatOpenAI(
  694. base_url=url,
  695. model=model_id,
  696. api_key=api_key,
  697. temperature=0.7,
  698. timeout=self.request_timeout,
  699. extra_body={
  700. "chat_template_kwargs": {"enable_thinking": False}
  701. }
  702. )
  703. logger.info(f"DashScope Qwen3.5-122B 模型初始化成功: {model_id} (思考模式: 关闭)")
  704. return llm
  705. except ModelConfigError:
  706. raise
  707. except Exception as e:
  708. return self._handle_model_error("qwen3_5_122b_a10b", ModelAPIError(str(e)))
  709. def _get_siliconflow_embedding_model(self):
  710. """
  711. 获取硅基流动Qwen3-Embedding-8B嵌入模型
  712. Returns:
  713. OpenAIEmbeddings: 配置好的硅基流动Qwen3-Embedding-8B嵌入模型实例
  714. """
  715. try:
  716. server_url = self.config.get("siliconflow_embed", "SLCF_EMBED_SERVER_URL")
  717. api_key = self.config.get("siliconflow_embed", "SLCF_EMBED_API_KEY")
  718. model_id = self.config.get("siliconflow_embed", "SLCF_EMBED_MODEL_ID", "Qwen/Qwen3-Embedding-8B")
  719. dimensions = self.config.get("siliconflow_embed", "SLCF_EMBED_DIMENSIONS", "4096")
  720. # 验证配置完整性
  721. if not all([server_url, api_key, model_id]):
  722. missing = []
  723. if not server_url:
  724. missing.append("SLCF_EMBED_SERVER_URL")
  725. if not api_key:
  726. missing.append("SLCF_EMBED_API_KEY")
  727. if not model_id:
  728. missing.append("SLCF_EMBED_MODEL_ID")
  729. raise ModelConfigError(f"硅基流动Embedding模型配置不完整,缺少: {', '.join(missing)}")
  730. # 检查连接
  731. if not self._check_connection(server_url, api_key):
  732. logger.warning(f"硅基流动Embedding模型服务连接失败: {server_url}")
  733. raise ModelConnectionError(f"无法连接到硅基流动Embedding模型服务: {server_url}")
  734. # 使用 langchain_openai 的 OpenAIEmbeddings
  735. embeddings = OpenAIEmbeddings(
  736. base_url=server_url,
  737. model=model_id,
  738. api_key=api_key,
  739. timeout=self.request_timeout,
  740. tiktoken_enabled=False,
  741. check_embedding_ctx_length=False,
  742. max_retries=0, # 禁用SDK内置重试,由EmbeddingClient统一管理
  743. )
  744. logger.info(f"硅基流动Embedding模型初始化成功: {model_id} (dimensions: {dimensions})")
  745. return embeddings
  746. except ModelConfigError:
  747. raise
  748. except ModelConnectionError:
  749. raise
  750. except Exception as e:
  751. error = ModelAPIError(f"硅基流动Embedding模型初始化异常: {e}")
  752. return self._handle_model_error("siliconflow_embed", error)
  753. def _get_shutian_qwen3_5_122b_model(self):
  754. """
  755. 获取蜀天Qwen3.5-122B-A10B模型
  756. Returns:
  757. ChatOpenAI: 配置好的蜀天Qwen3.5-122B模型实例
  758. """
  759. try:
  760. server_url = self.config.get("shutian", "SHUTIAN_122B_SERVER_URL", "http://183.220.37.46:25423/v1")
  761. model_id = self.config.get("shutian", "SHUTIAN_122B_MODEL_ID", "/model/Qwen3.5-122B-A10B")
  762. api_key = self.config.get("shutian", "SHUTIAN_122B_API_KEY", "lq123456")
  763. # 检查服务连接
  764. if not self._check_connection(server_url, api_key, timeout=3):
  765. logger.warning(f"蜀天Qwen3.5-122B模型服务连接失败: {server_url}")
  766. raise ModelConnectionError(f"无法连接到蜀天Qwen3.5-122B模型服务: {server_url}")
  767. llm = ChatOpenAI(
  768. base_url=server_url,
  769. model=model_id,
  770. api_key=api_key,
  771. temperature=0.7,
  772. timeout=self.request_timeout,
  773. )
  774. logger.info(f"蜀天Qwen3.5-122B模型初始化成功: {model_id}")
  775. return llm
  776. except ModelConnectionError:
  777. raise
  778. except Exception as e:
  779. error = ModelAPIError(f"蜀天Qwen3.5-122B模型初始化异常: {e}")
  780. return self._handle_model_error("shutian_qwen3_5_122b", error)
  781. def _get_shutian_qwen3_8b_model(self):
  782. """
  783. 获取蜀天Qwen3-8B模型
  784. Returns:
  785. ChatOpenAI: 配置好的蜀天Qwen3-8B模型实例
  786. """
  787. try:
  788. server_url = self.config.get("shutian", "SHUTIAN_8B_SERVER_URL", "http://183.220.37.46:25424/v1")
  789. model_id = self.config.get("shutian", "SHUTIAN_8B_MODEL_ID", "/model/Qwen3-8B")
  790. api_key = self.config.get("shutian", "SHUTIAN_8B_API_KEY", "lq123456")
  791. # 检查服务连接
  792. if not self._check_connection(server_url, api_key, timeout=3):
  793. logger.warning(f"蜀天Qwen3-8B模型服务连接失败: {server_url}")
  794. raise ModelConnectionError(f"无法连接到蜀天Qwen3-8B模型服务: {server_url}")
  795. llm = ChatOpenAI(
  796. base_url=server_url,
  797. model=model_id,
  798. api_key=api_key,
  799. temperature=0.7,
  800. timeout=self.request_timeout,
  801. )
  802. logger.info(f"蜀天Qwen3-8B模型初始化成功: {model_id}")
  803. return llm
  804. except ModelConnectionError:
  805. raise
  806. except Exception as e:
  807. error = ModelAPIError(f"蜀天Qwen3-8B模型初始化异常: {e}")
  808. return self._handle_model_error("shutian_qwen3_8b", error)
  809. def _get_shutian_qwen3_6_27b_model(self):
  810. """
  811. 获取蜀天Qwen3.6-27B模型
  812. Returns:
  813. ChatOpenAI: 配置好的蜀天Qwen3.6-27B模型实例
  814. """
  815. try:
  816. server_url = self.config.get("shutian", "SHUTIAN_27B_SERVER_URL", "http://183.220.37.46:25424/v1")
  817. model_id = self.config.get("shutian", "SHUTIAN_27B_MODEL_ID", "/model/Qwen3.6-27B")
  818. api_key = self.config.get("shutian", "SHUTIAN_27B_API_KEY", "sk_prod_SELVoIV1d3gku28koH_ONg8L_B2cQis__71f55615")
  819. # 检查服务连接
  820. if not self._check_connection(server_url, api_key, timeout=3):
  821. logger.warning(f"蜀天Qwen3.6-27B模型服务连接失败: {server_url}")
  822. raise ModelConnectionError(f"无法连接到蜀天Qwen3.6-27B模型服务: {server_url}")
  823. llm = ChatOpenAI(
  824. base_url=server_url,
  825. model=model_id,
  826. api_key=api_key,
  827. temperature=0.7,
  828. timeout=self.request_timeout,
  829. )
  830. logger.info(f"蜀天Qwen3.6-27B模型初始化成功: {model_id}")
  831. return llm
  832. except ModelConnectionError:
  833. raise
  834. except Exception as e:
  835. error = ModelAPIError(f"蜀天Qwen3.6-27B模型初始化异常: {e}")
  836. return self._handle_model_error("shutian_qwen3_6_27b", error)
  837. def _get_shutian_qwen3_5_35b_model(self):
  838. """
  839. 获取蜀天Qwen3.5-35B模型
  840. Returns:
  841. ChatOpenAI: 配置好的蜀天Qwen3.5-35B模型实例
  842. """
  843. try:
  844. server_url = self.config.get("shutian", "SHUTIAN_35B_SERVER_URL", "http://183.220.37.46:25427/v1")
  845. model_id = self.config.get("shutian", "SHUTIAN_35B_MODEL_ID", "/model/Qwen3.5-35B")
  846. api_key = self.config.get("shutian", "SHUTIAN_35B_API_KEY", "lq123456")
  847. logger.info(f"正在初始化蜀天Qwen3.5-35B模型,服务器地址: {server_url}")
  848. # 检查服务连接(可通过配置禁用)
  849. skip_check = self.config.get("shutian", "SKIP_CONNECTION_CHECK", "false").lower() == "true"
  850. if not skip_check:
  851. connection_ok = self._check_connection(server_url, api_key, timeout=5)
  852. if not connection_ok:
  853. # 连接检查失败时记录警告,但不阻止初始化(实际调用时如果失败会报错)
  854. logger.warning(f"蜀天Qwen3.5-35B模型服务连接检查失败: {server_url},但仍尝试初始化")
  855. else:
  856. logger.info(f"蜀天Qwen3.5-35B模型服务连接检查通过: {server_url}")
  857. else:
  858. logger.info(f"跳过蜀天Qwen3.5-35B模型连接检查(SKIP_CONNECTION_CHECK=true)")
  859. llm = ChatOpenAI(
  860. base_url=server_url,
  861. model=model_id,
  862. api_key=api_key,
  863. temperature=0.7,
  864. timeout=self.request_timeout,
  865. )
  866. # 记录模型实例的详细信息用于调试
  867. logger.info(f"蜀天Qwen3.5-35B模型初始化成功: model_id={model_id}, base_url={llm.base_url if hasattr(llm, 'base_url') else server_url}")
  868. return llm
  869. except ModelConnectionError:
  870. raise
  871. except Exception as e:
  872. error = ModelAPIError(f"蜀天Qwen3.5-35B模型初始化异常: {e}")
  873. return self._handle_model_error("shutian_qwen3_5_35b", error)
  874. def _get_shutian_qwen3_embed(self):
  875. """
  876. 获取蜀天Qwen3-Embedding-8B嵌入模型
  877. Returns:
  878. OpenAIEmbeddings: 配置好的蜀天Embedding模型实例
  879. """
  880. try:
  881. server_url = self.config.get("shutian", "SHUTIAN_EMBED_SERVER_URL", "http://183.220.37.46:25425/v1")
  882. model_id = self.config.get("shutian", "SHUTIAN_EMBED_MODEL_ID", "/model/Qwen3-Embedding-8B")
  883. api_key = self.config.get("shutian", "SHUTIAN_EMBED_API_KEY", "lq123456")
  884. # 检查服务连接
  885. if not self._check_connection(server_url, api_key, timeout=3):
  886. logger.warning(f"蜀天Qwen3-Embedding模型服务连接失败: {server_url}")
  887. raise ModelConnectionError(f"无法连接到蜀天Qwen3-Embedding模型服务: {server_url}")
  888. embeddings = OpenAIEmbeddings(
  889. base_url=server_url,
  890. model=model_id,
  891. api_key=api_key,
  892. timeout=self.request_timeout,
  893. tiktoken_enabled=False,
  894. check_embedding_ctx_length=False,
  895. max_retries=0, # 禁用SDK内置重试,由EmbeddingClient统一管理
  896. )
  897. logger.info(f"蜀天Qwen3-Embedding-8B模型初始化成功: {model_id}")
  898. return embeddings
  899. except ModelConnectionError:
  900. raise
  901. except Exception as e:
  902. error = ModelAPIError(f"蜀天Qwen3-Embedding模型初始化异常: {e}")
  903. return self._handle_model_error("shutian_qwen3_embed", error)
  904. # 创建全局实例
  905. model_handler = ModelHandler()
  906. def get_models():
  907. """
  908. 获取模型的全局函数
  909. Returns:
  910. tuple: (llm, chat, embed) - LLM模型、聊天模型和嵌入模型实例
  911. 注意:当前llm和chat使用相同模型实例,embed暂时返回None
  912. Note:
  913. 这是一个便捷函数,直接使用全局model_handler实例获取模型
  914. """
  915. try:
  916. llm = model_handler.get_models()
  917. # 暂时返回相同的模型作为chat和embed
  918. return llm, llm, None
  919. except Exception as e:
  920. logger.error(f"获取模型失败: {e}")
  921. raise ModelConnectionError(f"无法获取模型服务: {e}")