|
|
@@ -16,21 +16,46 @@ AI模型处理器
|
|
|
- lq_qwen3_4b: 本地Qwen3-4B模型
|
|
|
- qwen_local_14b: 本地Qwen3-14B模型
|
|
|
- lq_qwen3_8b_emd: 本地Qwen3-Embedding-8B嵌入模型
|
|
|
+- siliconflow_embed: 硅基流动Qwen3-Embedding-8B嵌入模型
|
|
|
- lq_bge_reranker_v2_m3: 本地BGE-reranker-v2-m3重排序模型
|
|
|
"""
|
|
|
|
|
|
+# 禁用 transformers 的深度学习框架检测,避免启动时耗时扫描
|
|
|
+import os
|
|
|
+os.environ["TRANSFORMERS_VERBOSITY"] = "error"
|
|
|
+os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
|
|
|
|
|
|
-
|
|
|
+import requests
|
|
|
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
|
|
from foundation.infrastructure.config.config import config_handler
|
|
|
from foundation.observability.logger.loggering import server_logger as logger
|
|
|
|
|
|
|
|
|
+class ModelConnectionError(Exception):
|
|
|
+ """模型连接错误"""
|
|
|
+ pass
|
|
|
+
|
|
|
+
|
|
|
+class ModelConfigError(Exception):
|
|
|
+ """模型配置错误"""
|
|
|
+ pass
|
|
|
+
|
|
|
+
|
|
|
+class ModelAPIError(Exception):
|
|
|
+ """模型API调用错误"""
|
|
|
+ pass
|
|
|
+
|
|
|
+
|
|
|
class ModelHandler:
|
|
|
"""
|
|
|
AI模型处理器类,用于管理多种AI模型的创建和配置
|
|
|
"""
|
|
|
|
|
|
+ # 模型连接超时时间配置(秒)
|
|
|
+ CONNECTION_TIMEOUT = 30
|
|
|
+ REQUEST_TIMEOUT = 120
|
|
|
+ MAX_RETRIES = 2
|
|
|
+
|
|
|
def __init__(self):
|
|
|
"""
|
|
|
初始化模型处理器
|
|
|
@@ -38,6 +63,71 @@ class ModelHandler:
|
|
|
加载配置处理器,用于后续读取各种模型的配置信息
|
|
|
"""
|
|
|
self.config = config_handler
|
|
|
+ self._model_cache = {} # 模型实例缓存
|
|
|
+
|
|
|
+ def _check_connection(self, base_url: str, api_key: str = None, timeout: int = 5) -> bool:
|
|
|
+ """
|
|
|
+ 检查模型服务连接是否可用
|
|
|
+
|
|
|
+ Args:
|
|
|
+ base_url: 模型服务地址
|
|
|
+ api_key: API密钥(可选)
|
|
|
+ timeout: 超时时间(秒)
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ bool: 连接是否可用
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ # 构造健康检查URL
|
|
|
+ health_url = f"{base_url.rstrip('/')}/models"
|
|
|
+
|
|
|
+ headers = {}
|
|
|
+ if api_key and api_key != "dummy":
|
|
|
+ headers["Authorization"] = f"Bearer {api_key}"
|
|
|
+
|
|
|
+ response = requests.get(
|
|
|
+ health_url,
|
|
|
+ headers=headers,
|
|
|
+ timeout=timeout
|
|
|
+ )
|
|
|
+
|
|
|
+ # 200-299 都认为可用
|
|
|
+ return 200 <= response.status_code < 300
|
|
|
+
|
|
|
+ except requests.exceptions.Timeout:
|
|
|
+ logger.warning(f"连接超时: {base_url}")
|
|
|
+ return False
|
|
|
+ except requests.exceptions.ConnectionError as e:
|
|
|
+ logger.warning(f"连接错误: {base_url}, 错误: {e}")
|
|
|
+ return False
|
|
|
+ except Exception as e:
|
|
|
+ logger.warning(f"连接检查异常: {base_url}, 错误: {e}")
|
|
|
+ return False
|
|
|
+
|
|
|
+ def _handle_model_error(self, model_name: str, error: Exception, fallback_model=None):
|
|
|
+ """
|
|
|
+ 统一处理模型错误
|
|
|
+
|
|
|
+ Args:
|
|
|
+ model_name: 模型名称
|
|
|
+ error: 异常对象
|
|
|
+ fallback_model: 降级模型实例(可选)
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ 降级模型实例,如果不可用则返回None
|
|
|
+ """
|
|
|
+ error_type = type(error).__name__
|
|
|
+ error_msg = str(error)
|
|
|
+
|
|
|
+ logger.error(f"模型初始化失败 [{model_name}]: {error_type} - {error_msg}")
|
|
|
+
|
|
|
+ # 如果提供了降级模型,记录日志并返回
|
|
|
+ if fallback_model:
|
|
|
+ logger.warning(f"使用降级模型: {fallback_model.__class__.__name__}")
|
|
|
+ return fallback_model
|
|
|
+
|
|
|
+ # 如果没有降级模型,返回None让调用方处理
|
|
|
+ return None
|
|
|
|
|
|
def get_models(self):
|
|
|
"""
|
|
|
@@ -54,29 +144,104 @@ class ModelHandler:
|
|
|
model_type = self.config.get("model", "MODEL_TYPE")
|
|
|
logger.info(f"正在初始化AI模型,模型类型: {model_type}")
|
|
|
|
|
|
- if model_type == "doubao":
|
|
|
- model = self._get_doubao_model()
|
|
|
- elif model_type == "gemini":
|
|
|
- model = self._get_gemini_model()
|
|
|
- elif model_type == "qwen":
|
|
|
- model = self._get_qwen_model()
|
|
|
- elif model_type == "deepseek":
|
|
|
- model = self._get_deepseek_model()
|
|
|
- elif model_type == "lq_qwen3_8b":
|
|
|
- model = self._get_lq_qwen3_8b_model()
|
|
|
- elif model_type == "lq_qwen3_8b_lq_lora":
|
|
|
- model = self._get_lq_qwen3_8b_lora_model()
|
|
|
- elif model_type == "lq_qwen3_4b":
|
|
|
- model = self._get_lq_qwen3_4b_model()
|
|
|
- elif model_type == "qwen_local_14b":
|
|
|
- model = self._get_qwen_local_14b_model()
|
|
|
- else:
|
|
|
- # 默认返回gemini
|
|
|
- logger.warning(f"未知的模型类型 '{model_type}',使用默认gemini模型")
|
|
|
- model = self._get_gemini_model()
|
|
|
-
|
|
|
- logger.info(f"AI模型初始化完成: {model_type}")
|
|
|
- return model
|
|
|
+ # 检查缓存
|
|
|
+ cache_key = f"chat_{model_type}"
|
|
|
+ if cache_key in self._model_cache:
|
|
|
+ logger.info(f"使用缓存的模型: {model_type}")
|
|
|
+ return self._model_cache[cache_key]
|
|
|
+
|
|
|
+ model = None
|
|
|
+
|
|
|
+ try:
|
|
|
+ if model_type == "doubao":
|
|
|
+ model = self._get_doubao_model()
|
|
|
+ elif model_type == "gemini":
|
|
|
+ model = self._get_gemini_model()
|
|
|
+ elif model_type == "qwen":
|
|
|
+ model = self._get_qwen_model()
|
|
|
+ elif model_type == "deepseek":
|
|
|
+ model = self._get_deepseek_model()
|
|
|
+ elif model_type == "lq_qwen3_8b":
|
|
|
+ model = self._get_lq_qwen3_8b_model()
|
|
|
+ elif model_type == "lq_qwen3_8b_lq_lora":
|
|
|
+ model = self._get_lq_qwen3_8b_lora_model()
|
|
|
+ elif model_type == "lq_qwen3_4b":
|
|
|
+ model = self._get_lq_qwen3_4b_model()
|
|
|
+ elif model_type == "qwen_local_14b":
|
|
|
+ model = self._get_qwen_local_14b_model()
|
|
|
+ else:
|
|
|
+ # 默认返回gemini
|
|
|
+ logger.warning(f"未知的模型类型 '{model_type}',使用默认gemini模型")
|
|
|
+ model = self._get_gemini_model()
|
|
|
+
|
|
|
+ if model:
|
|
|
+ self._model_cache[cache_key] = model
|
|
|
+ logger.info(f"AI模型初始化完成: {model_type}")
|
|
|
+ return model
|
|
|
+ else:
|
|
|
+ raise ModelAPIError(f"模型初始化返回None: {model_type}")
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"获取模型失败 [{model_type}]: {e}")
|
|
|
+
|
|
|
+ # 尝试使用gemini作为降级方案
|
|
|
+ if model_type != "gemini":
|
|
|
+ logger.info("尝试使用Gemini模型作为降级方案")
|
|
|
+ try:
|
|
|
+ fallback_model = self._get_gemini_model()
|
|
|
+ if fallback_model:
|
|
|
+ self._model_cache[cache_key] = fallback_model
|
|
|
+ logger.warning(f"已切换到Gemini降级模型")
|
|
|
+ return fallback_model
|
|
|
+ except Exception as fallback_error:
|
|
|
+ logger.error(f"降级模型也失败: {fallback_error}")
|
|
|
+
|
|
|
+ # 如果所有模型都失败,抛出异常
|
|
|
+ raise ModelConnectionError(f"无法初始化任何模型服务: {e}")
|
|
|
+
|
|
|
+ def get_embedding_model(self):
|
|
|
+ """
|
|
|
+ 获取Embedding模型实例
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ OpenAIEmbeddings: 配置好的Embedding模型实例
|
|
|
+
|
|
|
+ Note:
|
|
|
+ 根据配置文件中的EMBEDDING_MODEL_TYPE参数选择对应模型
|
|
|
+ 支持的模型类型:lq_qwen3_8b_emd, siliconflow_embed
|
|
|
+ 默认返回本地 lq_qwen3_8b_emd 模型
|
|
|
+ """
|
|
|
+ embedding_model_type = self.config.get("model", "EMBEDDING_MODEL_TYPE", "lq_qwen3_8b_emd")
|
|
|
+ logger.info(f"正在初始化Embedding模型,模型类型: {embedding_model_type}")
|
|
|
+
|
|
|
+ # 检查缓存
|
|
|
+ cache_key = f"embed_{embedding_model_type}"
|
|
|
+ if cache_key in self._model_cache:
|
|
|
+ logger.info(f"使用缓存的Embedding模型: {embedding_model_type}")
|
|
|
+ return self._model_cache[cache_key]
|
|
|
+
|
|
|
+ model = None
|
|
|
+
|
|
|
+ try:
|
|
|
+ if embedding_model_type == "siliconflow_embed":
|
|
|
+ model = self._get_siliconflow_embedding_model()
|
|
|
+ elif embedding_model_type == "lq_qwen3_8b_emd":
|
|
|
+ model = self._get_lq_qwen3_8b_emd()
|
|
|
+ else:
|
|
|
+ # 默认返回本地模型
|
|
|
+ logger.warning(f"未知的Embedding模型类型 '{embedding_model_type}',使用默认本地模型")
|
|
|
+ model = self._get_lq_qwen3_8b_emd()
|
|
|
+
|
|
|
+ if model:
|
|
|
+ self._model_cache[cache_key] = model
|
|
|
+ logger.info(f"Embedding模型初始化完成: {embedding_model_type}")
|
|
|
+ return model
|
|
|
+ else:
|
|
|
+ raise ModelAPIError(f"Embedding模型初始化返回None: {embedding_model_type}")
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"获取Embedding模型失败 [{embedding_model_type}]: {e}")
|
|
|
+ raise ModelConnectionError(f"无法初始化Embedding模型服务: {e}")
|
|
|
|
|
|
def _get_doubao_model(self):
|
|
|
"""
|
|
|
@@ -85,20 +250,47 @@ class ModelHandler:
|
|
|
Returns:
|
|
|
ChatOpenAI: 配置好的豆包模型实例
|
|
|
"""
|
|
|
- doubao_url = self.config.get("doubao", "DOUBAO_SERVER_URL")
|
|
|
- doubao_model_id = self.config.get("doubao", "DOUBAO_MODEL_ID")
|
|
|
- doubao_api_key = self.config.get("doubao", "DOUBAO_API_KEY")
|
|
|
-
|
|
|
- llm = ChatOpenAI(
|
|
|
- base_url=doubao_url,
|
|
|
- model=doubao_model_id,
|
|
|
- api_key=doubao_api_key,
|
|
|
- temperature=0.7,
|
|
|
- extra_body={
|
|
|
- "enable_thinking": False,
|
|
|
- })
|
|
|
-
|
|
|
- return llm
|
|
|
+ try:
|
|
|
+ doubao_url = self.config.get("doubao", "DOUBAO_SERVER_URL")
|
|
|
+ doubao_model_id = self.config.get("doubao", "DOUBAO_MODEL_ID")
|
|
|
+ doubao_api_key = self.config.get("doubao", "DOUBAO_API_KEY")
|
|
|
+
|
|
|
+ # 验证配置完整性
|
|
|
+ if not all([doubao_url, doubao_model_id, doubao_api_key]):
|
|
|
+ missing = []
|
|
|
+ if not doubao_url:
|
|
|
+ missing.append("DOUBAO_SERVER_URL")
|
|
|
+ if not doubao_model_id:
|
|
|
+ missing.append("DOUBAO_MODEL_ID")
|
|
|
+ if not doubao_api_key:
|
|
|
+ missing.append("DOUBAO_API_KEY")
|
|
|
+ raise ModelConfigError(f"豆包模型配置不完整,缺少: {', '.join(missing)}")
|
|
|
+
|
|
|
+ # 检查连接
|
|
|
+ if not self._check_connection(doubao_url, doubao_api_key):
|
|
|
+ logger.warning(f"豆包模型服务连接失败: {doubao_url}")
|
|
|
+ raise ModelConnectionError(f"无法连接到豆包模型服务: {doubao_url}")
|
|
|
+
|
|
|
+ llm = ChatOpenAI(
|
|
|
+ base_url=doubao_url,
|
|
|
+ model=doubao_model_id,
|
|
|
+ api_key=doubao_api_key,
|
|
|
+ temperature=0.7,
|
|
|
+ timeout=self.REQUEST_TIMEOUT,
|
|
|
+ extra_body={
|
|
|
+ "enable_thinking": False,
|
|
|
+ })
|
|
|
+
|
|
|
+ logger.info(f"豆包模型初始化成功: {doubao_model_id}")
|
|
|
+ return llm
|
|
|
+
|
|
|
+ except ModelConfigError:
|
|
|
+ raise
|
|
|
+ except ModelConnectionError:
|
|
|
+ raise
|
|
|
+ except Exception as e:
|
|
|
+ error = ModelAPIError(f"豆包模型初始化异常: {e}")
|
|
|
+ return self._handle_model_error("doubao", error)
|
|
|
|
|
|
def _get_qwen_model(self):
|
|
|
"""
|
|
|
@@ -107,20 +299,47 @@ class ModelHandler:
|
|
|
Returns:
|
|
|
ChatOpenAI: 配置好的通义千问模型实例
|
|
|
"""
|
|
|
- qwen_url = self.config.get("qwen", "QWEN_SERVER_URL")
|
|
|
- qwen_model_id = self.config.get("qwen", "QWEN_MODEL_ID")
|
|
|
- qwen_api_key = self.config.get("qwen", "QWEN_API_KEY")
|
|
|
-
|
|
|
- llm = ChatOpenAI(
|
|
|
- base_url=qwen_url,
|
|
|
- model=qwen_model_id,
|
|
|
- api_key=qwen_api_key,
|
|
|
- temperature=0.7,
|
|
|
- extra_body={
|
|
|
- "enable_thinking": False,
|
|
|
- })
|
|
|
-
|
|
|
- return llm
|
|
|
+ try:
|
|
|
+ qwen_url = self.config.get("qwen", "QWEN_SERVER_URL")
|
|
|
+ qwen_model_id = self.config.get("qwen", "QWEN_MODEL_ID")
|
|
|
+ qwen_api_key = self.config.get("qwen", "QWEN_API_KEY")
|
|
|
+
|
|
|
+ # 验证配置完整性
|
|
|
+ if not all([qwen_url, qwen_model_id, qwen_api_key]):
|
|
|
+ missing = []
|
|
|
+ if not qwen_url:
|
|
|
+ missing.append("QWEN_SERVER_URL")
|
|
|
+ if not qwen_model_id:
|
|
|
+ missing.append("QWEN_MODEL_ID")
|
|
|
+ if not qwen_api_key:
|
|
|
+ missing.append("QWEN_API_KEY")
|
|
|
+ raise ModelConfigError(f"通义千问模型配置不完整,缺少: {', '.join(missing)}")
|
|
|
+
|
|
|
+ # 检查连接
|
|
|
+ if not self._check_connection(qwen_url, qwen_api_key):
|
|
|
+ logger.warning(f"通义千问模型服务连接失败: {qwen_url}")
|
|
|
+ raise ModelConnectionError(f"无法连接到通义千问模型服务: {qwen_url}")
|
|
|
+
|
|
|
+ llm = ChatOpenAI(
|
|
|
+ base_url=qwen_url,
|
|
|
+ model=qwen_model_id,
|
|
|
+ api_key=qwen_api_key,
|
|
|
+ temperature=0.7,
|
|
|
+ timeout=self.REQUEST_TIMEOUT,
|
|
|
+ extra_body={
|
|
|
+ "enable_thinking": False,
|
|
|
+ })
|
|
|
+
|
|
|
+ logger.info(f"通义千问模型初始化成功: {qwen_model_id}")
|
|
|
+ return llm
|
|
|
+
|
|
|
+ except ModelConfigError:
|
|
|
+ raise
|
|
|
+ except ModelConnectionError:
|
|
|
+ raise
|
|
|
+ except Exception as e:
|
|
|
+ error = ModelAPIError(f"通义千问模型初始化异常: {e}")
|
|
|
+ return self._handle_model_error("qwen", error)
|
|
|
|
|
|
def _get_deepseek_model(self):
|
|
|
"""
|
|
|
@@ -129,20 +348,47 @@ class ModelHandler:
|
|
|
Returns:
|
|
|
ChatOpenAI: 配置好的DeepSeek模型实例
|
|
|
"""
|
|
|
- deepseek_url = self.config.get("deepseek", "DEEPSEEK_SERVER_URL")
|
|
|
- deepseek_model_id = self.config.get("deepseek", "DEEPSEEK_MODEL_ID")
|
|
|
- deepseek_api_key = self.config.get("deepseek", "DEEPSEEK_API_KEY")
|
|
|
-
|
|
|
- llm = ChatOpenAI(
|
|
|
- base_url=deepseek_url,
|
|
|
- model=deepseek_model_id,
|
|
|
- api_key=deepseek_api_key,
|
|
|
- temperature=0.7,
|
|
|
- extra_body={
|
|
|
- "enable_thinking": False,
|
|
|
- })
|
|
|
-
|
|
|
- return llm
|
|
|
+ try:
|
|
|
+ deepseek_url = self.config.get("deepseek", "DEEPSEEK_SERVER_URL")
|
|
|
+ deepseek_model_id = self.config.get("deepseek", "DEEPSEEK_MODEL_ID")
|
|
|
+ deepseek_api_key = self.config.get("deepseek", "DEEPSEEK_API_KEY")
|
|
|
+
|
|
|
+ # 验证配置完整性
|
|
|
+ if not all([deepseek_url, deepseek_model_id, deepseek_api_key]):
|
|
|
+ missing = []
|
|
|
+ if not deepseek_url:
|
|
|
+ missing.append("DEEPSEEK_SERVER_URL")
|
|
|
+ if not deepseek_model_id:
|
|
|
+ missing.append("DEEPSEEK_MODEL_ID")
|
|
|
+ if not deepseek_api_key:
|
|
|
+ missing.append("DEEPSEEK_API_KEY")
|
|
|
+ raise ModelConfigError(f"DeepSeek模型配置不完整,缺少: {', '.join(missing)}")
|
|
|
+
|
|
|
+ # 检查连接
|
|
|
+ if not self._check_connection(deepseek_url, deepseek_api_key):
|
|
|
+ logger.warning(f"DeepSeek模型服务连接失败: {deepseek_url}")
|
|
|
+ raise ModelConnectionError(f"无法连接到DeepSeek模型服务: {deepseek_url}")
|
|
|
+
|
|
|
+ llm = ChatOpenAI(
|
|
|
+ base_url=deepseek_url,
|
|
|
+ model=deepseek_model_id,
|
|
|
+ api_key=deepseek_api_key,
|
|
|
+ temperature=0.7,
|
|
|
+ timeout=self.REQUEST_TIMEOUT,
|
|
|
+ extra_body={
|
|
|
+ "enable_thinking": False,
|
|
|
+ })
|
|
|
+
|
|
|
+ logger.info(f"DeepSeek模型初始化成功: {deepseek_model_id}")
|
|
|
+ return llm
|
|
|
+
|
|
|
+ except ModelConfigError:
|
|
|
+ raise
|
|
|
+ except ModelConnectionError:
|
|
|
+ raise
|
|
|
+ except Exception as e:
|
|
|
+ error = ModelAPIError(f"DeepSeek模型初始化异常: {e}")
|
|
|
+ return self._handle_model_error("deepseek", error)
|
|
|
|
|
|
def _get_gemini_model(self):
|
|
|
"""
|
|
|
@@ -151,18 +397,45 @@ class ModelHandler:
|
|
|
Returns:
|
|
|
ChatOpenAI: 配置好的Gemini模型实例
|
|
|
"""
|
|
|
- gemini_url = self.config.get("gemini", "GEMINI_SERVER_URL")
|
|
|
- gemini_model_id = self.config.get("gemini", "GEMINI_MODEL_ID")
|
|
|
- gemini_api_key = self.config.get("gemini", "GEMINI_API_KEY")
|
|
|
-
|
|
|
- llm = ChatOpenAI(
|
|
|
- base_url=gemini_url,
|
|
|
- model=gemini_model_id,
|
|
|
- api_key=gemini_api_key,
|
|
|
- temperature=0.7,
|
|
|
+ try:
|
|
|
+ gemini_url = self.config.get("gemini", "GEMINI_SERVER_URL")
|
|
|
+ gemini_model_id = self.config.get("gemini", "GEMINI_MODEL_ID")
|
|
|
+ gemini_api_key = self.config.get("gemini", "GEMINI_API_KEY")
|
|
|
+
|
|
|
+ # 验证配置完整性
|
|
|
+ if not all([gemini_url, gemini_model_id, gemini_api_key]):
|
|
|
+ missing = []
|
|
|
+ if not gemini_url:
|
|
|
+ missing.append("GEMINI_SERVER_URL")
|
|
|
+ if not gemini_model_id:
|
|
|
+ missing.append("GEMINI_MODEL_ID")
|
|
|
+ if not gemini_api_key:
|
|
|
+ missing.append("GEMINI_API_KEY")
|
|
|
+ raise ModelConfigError(f"Gemini模型配置不完整,缺少: {', '.join(missing)}")
|
|
|
+
|
|
|
+ # 检查连接
|
|
|
+ if not self._check_connection(gemini_url, gemini_api_key):
|
|
|
+ logger.warning(f"Gemini模型服务连接失败: {gemini_url}")
|
|
|
+ raise ModelConnectionError(f"无法连接到Gemini模型服务: {gemini_url}")
|
|
|
+
|
|
|
+ llm = ChatOpenAI(
|
|
|
+ base_url=gemini_url,
|
|
|
+ model=gemini_model_id,
|
|
|
+ api_key=gemini_api_key,
|
|
|
+ temperature=0.7,
|
|
|
+ timeout=self.REQUEST_TIMEOUT,
|
|
|
)
|
|
|
|
|
|
- return llm
|
|
|
+ logger.info(f"Gemini模型初始化成功: {gemini_model_id}")
|
|
|
+ return llm
|
|
|
+
|
|
|
+ except ModelConfigError:
|
|
|
+ raise
|
|
|
+ except ModelConnectionError:
|
|
|
+ raise
|
|
|
+ except Exception as e:
|
|
|
+ error = ModelAPIError(f"Gemini模型初始化异常: {e}")
|
|
|
+ return self._handle_model_error("gemini", error)
|
|
|
|
|
|
def _get_lq_qwen3_8b_model(self):
|
|
|
"""
|
|
|
@@ -171,14 +444,31 @@ class ModelHandler:
|
|
|
Returns:
|
|
|
ChatOpenAI: 配置好的本地Qwen3-8B模型实例
|
|
|
"""
|
|
|
- llm = ChatOpenAI(
|
|
|
- base_url="http://192.168.91.253:9002/v1",
|
|
|
- model="Qwen3-8B",
|
|
|
- api_key="dummy", # 本地模型使用虚拟API key
|
|
|
- temperature=0.7,
|
|
|
- )
|
|
|
+ try:
|
|
|
+ server_url = "http://192.168.91.253:9002/v1"
|
|
|
+ model_id = "Qwen3-8B"
|
|
|
+
|
|
|
+ # 检查本地服务连接
|
|
|
+ if not self._check_connection(server_url, "dummy", timeout=3):
|
|
|
+ logger.warning(f"本地Qwen3-8B模型服务连接失败: {server_url}")
|
|
|
+ raise ModelConnectionError(f"无法连接到本地Qwen3-8B模型服务: {server_url}")
|
|
|
+
|
|
|
+ llm = ChatOpenAI(
|
|
|
+ base_url=server_url,
|
|
|
+ model=model_id,
|
|
|
+ api_key="dummy",
|
|
|
+ temperature=0.7,
|
|
|
+ timeout=self.REQUEST_TIMEOUT,
|
|
|
+ )
|
|
|
+
|
|
|
+ logger.info(f"本地Qwen3-8B模型初始化成功: {model_id}")
|
|
|
+ return llm
|
|
|
|
|
|
- return llm
|
|
|
+ except ModelConnectionError:
|
|
|
+ raise
|
|
|
+ except Exception as e:
|
|
|
+ error = ModelAPIError(f"本地Qwen3-8B模型初始化异常: {e}")
|
|
|
+ return self._handle_model_error("lq_qwen3_8b", error)
|
|
|
|
|
|
def _get_lq_qwen3_8b_lora_model(self):
|
|
|
"""
|
|
|
@@ -187,18 +477,43 @@ class ModelHandler:
|
|
|
Returns:
|
|
|
ChatOpenAI: 配置好的本地Qwen3-8B-lq-lora模型实例
|
|
|
"""
|
|
|
- server_url = self.config.get("lq_qwen3_8B_lora", "LQ_QWEN3_8B_LQ_LORA_SERVER_URL")
|
|
|
- model_id = self.config.get("lq_qwen3_8B_lora", "LQ_QWEN3_8B_LQ_LORA_MODEL_ID")
|
|
|
- api_key = self.config.get("lq_qwen3_8B_lora", "LQ_QWEN3_8B_LQ_LORA_API_KEY")
|
|
|
+ try:
|
|
|
+ server_url = self.config.get("lq_qwen3_8B_lora", "LQ_QWEN3_8B_LQ_LORA_SERVER_URL")
|
|
|
+ model_id = self.config.get("lq_qwen3_8B_lora", "LQ_QWEN3_8B_LQ_LORA_MODEL_ID")
|
|
|
+ api_key = self.config.get("lq_qwen3_8B_lora", "LQ_QWEN3_8B_LQ_LORA_API_KEY", "dummy")
|
|
|
+
|
|
|
+ # 验证配置完整性
|
|
|
+ if not all([server_url, model_id]):
|
|
|
+ missing = []
|
|
|
+ if not server_url:
|
|
|
+ missing.append("LQ_QWEN3_8B_LQ_LORA_SERVER_URL")
|
|
|
+ if not model_id:
|
|
|
+ missing.append("LQ_QWEN3_8B_LQ_LORA_MODEL_ID")
|
|
|
+ raise ModelConfigError(f"本地Qwen3-8B-lq-lora模型配置不完整,缺少: {', '.join(missing)}")
|
|
|
+
|
|
|
+ # 检查本地服务连接
|
|
|
+ if not self._check_connection(server_url, api_key, timeout=3):
|
|
|
+ logger.warning(f"本地Qwen3-8B-lq-lora模型服务连接失败: {server_url}")
|
|
|
+ raise ModelConnectionError(f"无法连接到本地Qwen3-8B-lq-lora模型服务: {server_url}")
|
|
|
+
|
|
|
+ llm = ChatOpenAI(
|
|
|
+ base_url=server_url,
|
|
|
+ model=model_id,
|
|
|
+ api_key=api_key,
|
|
|
+ temperature=0.7,
|
|
|
+ timeout=self.REQUEST_TIMEOUT,
|
|
|
+ )
|
|
|
|
|
|
- llm = ChatOpenAI(
|
|
|
- base_url=server_url,
|
|
|
- model=model_id,
|
|
|
- api_key=api_key,
|
|
|
- temperature=0.7,
|
|
|
- )
|
|
|
+ logger.info(f"本地Qwen3-8B-lq-lora模型初始化成功: {model_id}")
|
|
|
+ return llm
|
|
|
|
|
|
- return llm
|
|
|
+ except ModelConfigError:
|
|
|
+ raise
|
|
|
+ except ModelConnectionError:
|
|
|
+ raise
|
|
|
+ except Exception as e:
|
|
|
+ error = ModelAPIError(f"本地Qwen3-8B-lq-lora模型初始化异常: {e}")
|
|
|
+ return self._handle_model_error("lq_qwen3_8b_lora", error)
|
|
|
|
|
|
def _get_lq_qwen3_4b_model(self):
|
|
|
"""
|
|
|
@@ -207,14 +522,31 @@ class ModelHandler:
|
|
|
Returns:
|
|
|
ChatOpenAI: 配置好的本地Qwen3-4B模型实例
|
|
|
"""
|
|
|
- llm = ChatOpenAI(
|
|
|
- base_url="http://192.168.91.253:9001/v1",
|
|
|
- model="Qwen3-4B",
|
|
|
- api_key="dummy", # 本地模型使用虚拟API key
|
|
|
- temperature=0.7,
|
|
|
- )
|
|
|
+ try:
|
|
|
+ server_url = "http://192.168.91.253:9001/v1"
|
|
|
+ model_id = "Qwen3-4B"
|
|
|
+
|
|
|
+ # 检查本地服务连接
|
|
|
+ if not self._check_connection(server_url, "dummy", timeout=3):
|
|
|
+ logger.warning(f"本地Qwen3-4B模型服务连接失败: {server_url}")
|
|
|
+ raise ModelConnectionError(f"无法连接到本地Qwen3-4B模型服务: {server_url}")
|
|
|
+
|
|
|
+ llm = ChatOpenAI(
|
|
|
+ base_url=server_url,
|
|
|
+ model=model_id,
|
|
|
+ api_key="dummy",
|
|
|
+ temperature=0.7,
|
|
|
+ timeout=self.REQUEST_TIMEOUT,
|
|
|
+ )
|
|
|
+
|
|
|
+ logger.info(f"本地Qwen3-4B模型初始化成功: {model_id}")
|
|
|
+ return llm
|
|
|
|
|
|
- return llm
|
|
|
+ except ModelConnectionError:
|
|
|
+ raise
|
|
|
+ except Exception as e:
|
|
|
+ error = ModelAPIError(f"本地Qwen3-4B模型初始化异常: {e}")
|
|
|
+ return self._handle_model_error("lq_qwen3_4b", error)
|
|
|
|
|
|
def _get_qwen_local_14b_model(self):
|
|
|
"""
|
|
|
@@ -223,14 +555,31 @@ class ModelHandler:
|
|
|
Returns:
|
|
|
ChatOpenAI: 配置好的本地Qwen3-14B模型实例
|
|
|
"""
|
|
|
- llm = ChatOpenAI(
|
|
|
- base_url="http://192.168.91.253:9003/v1",
|
|
|
- model="Qwen3-14B",
|
|
|
- api_key="dummy", # 本地模型使用虚拟API key
|
|
|
- temperature=0.7,
|
|
|
- )
|
|
|
+ try:
|
|
|
+ server_url = "http://192.168.91.253:9003/v1"
|
|
|
+ model_id = "Qwen3-14B"
|
|
|
+
|
|
|
+ # 检查本地服务连接
|
|
|
+ if not self._check_connection(server_url, "dummy", timeout=3):
|
|
|
+ logger.warning(f"本地Qwen3-14B模型服务连接失败: {server_url}")
|
|
|
+ raise ModelConnectionError(f"无法连接到本地Qwen3-14B模型服务: {server_url}")
|
|
|
+
|
|
|
+ llm = ChatOpenAI(
|
|
|
+ base_url=server_url,
|
|
|
+ model=model_id,
|
|
|
+ api_key="dummy",
|
|
|
+ temperature=0.7,
|
|
|
+ timeout=self.REQUEST_TIMEOUT,
|
|
|
+ )
|
|
|
+
|
|
|
+ logger.info(f"本地Qwen3-14B模型初始化成功: {model_id}")
|
|
|
+ return llm
|
|
|
|
|
|
- return llm
|
|
|
+ except ModelConnectionError:
|
|
|
+ raise
|
|
|
+ except Exception as e:
|
|
|
+ error = ModelAPIError(f"本地Qwen3-14B模型初始化异常: {e}")
|
|
|
+ return self._handle_model_error("qwen_local_14b", error)
|
|
|
|
|
|
def _get_lq_qwen3_8b_emd(self):
|
|
|
"""
|
|
|
@@ -239,13 +588,79 @@ class ModelHandler:
|
|
|
Returns:
|
|
|
OpenAIEmbeddings: 配置好的本地Qwen3-Embedding-8B嵌入模型实例
|
|
|
"""
|
|
|
- embeddings = OpenAIEmbeddings(
|
|
|
- base_url="http://192.168.91.253:9003/v1",
|
|
|
- model="Qwen3-Embedding-8B",
|
|
|
- api_key="dummy", # 本地模型使用虚拟API key
|
|
|
- )
|
|
|
+ try:
|
|
|
+ server_url = "http://192.168.91.253:9003/v1"
|
|
|
+ model_id = "Qwen3-Embedding-8B"
|
|
|
+
|
|
|
+ # 检查本地服务连接
|
|
|
+ if not self._check_connection(server_url, "dummy", timeout=3):
|
|
|
+ logger.warning(f"本地Qwen3-Embedding-8B模型服务连接失败: {server_url}")
|
|
|
+ raise ModelConnectionError(f"无法连接到本地Qwen3-Embedding-8B模型服务: {server_url}")
|
|
|
+
|
|
|
+ # 使用 langchain_openai 的 OpenAIEmbeddings
|
|
|
+ embeddings = OpenAIEmbeddings(
|
|
|
+ base_url=server_url,
|
|
|
+ model=model_id,
|
|
|
+ api_key="dummy", # 本地模型使用虚拟API key
|
|
|
+ timeout=self.REQUEST_TIMEOUT,
|
|
|
+ )
|
|
|
+
|
|
|
+ logger.info(f"本地Qwen3-Embedding-8B模型初始化成功: {model_id}")
|
|
|
+ return embeddings
|
|
|
+
|
|
|
+ except ModelConnectionError:
|
|
|
+ raise
|
|
|
+ except Exception as e:
|
|
|
+ error = ModelAPIError(f"本地Qwen3-Embedding-8B模型初始化异常: {e}")
|
|
|
+ return self._handle_model_error("lq_qwen3_8b_emd", error)
|
|
|
+
|
|
|
+ def _get_siliconflow_embedding_model(self):
|
|
|
+ """
|
|
|
+ 获取硅基流动Qwen3-Embedding-8B嵌入模型
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ OpenAIEmbeddings: 配置好的硅基流动Qwen3-Embedding-8B嵌入模型实例
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ server_url = self.config.get("siliconflow_embed", "SLCF_EMBED_SERVER_URL")
|
|
|
+ api_key = self.config.get("siliconflow_embed", "SLCF_EMBED_API_KEY")
|
|
|
+ model_id = self.config.get("siliconflow_embed", "SLCF_EMBED_MODEL_ID", "Qwen/Qwen3-Embedding-8B")
|
|
|
+ dimensions = self.config.get("siliconflow_embed", "SLCF_EMBED_DIMENSIONS", "4096")
|
|
|
+
|
|
|
+ # 验证配置完整性
|
|
|
+ if not all([server_url, api_key, model_id]):
|
|
|
+ missing = []
|
|
|
+ if not server_url:
|
|
|
+ missing.append("SLCF_EMBED_SERVER_URL")
|
|
|
+ if not api_key:
|
|
|
+ missing.append("SLCF_EMBED_API_KEY")
|
|
|
+ if not model_id:
|
|
|
+ missing.append("SLCF_EMBED_MODEL_ID")
|
|
|
+ raise ModelConfigError(f"硅基流动Embedding模型配置不完整,缺少: {', '.join(missing)}")
|
|
|
+
|
|
|
+ # 检查连接
|
|
|
+ if not self._check_connection(server_url, api_key):
|
|
|
+ logger.warning(f"硅基流动Embedding模型服务连接失败: {server_url}")
|
|
|
+ raise ModelConnectionError(f"无法连接到硅基流动Embedding模型服务: {server_url}")
|
|
|
+
|
|
|
+ # 使用 langchain_openai 的 OpenAIEmbeddings
|
|
|
+ embeddings = OpenAIEmbeddings(
|
|
|
+ base_url=server_url,
|
|
|
+ model=model_id,
|
|
|
+ api_key=api_key,
|
|
|
+ timeout=self.REQUEST_TIMEOUT,
|
|
|
+ )
|
|
|
+
|
|
|
+ logger.info(f"硅基流动Embedding模型初始化成功: {model_id} (dimensions: {dimensions})")
|
|
|
+ return embeddings
|
|
|
|
|
|
- return embeddings
|
|
|
+ except ModelConfigError:
|
|
|
+ raise
|
|
|
+ except ModelConnectionError:
|
|
|
+ raise
|
|
|
+ except Exception as e:
|
|
|
+ error = ModelAPIError(f"硅基流动Embedding模型初始化异常: {e}")
|
|
|
+ return self._handle_model_error("siliconflow_embed", error)
|
|
|
|
|
|
|
|
|
|
|
|
@@ -264,6 +679,10 @@ def get_models():
|
|
|
Note:
|
|
|
这是一个便捷函数,直接使用全局model_handler实例获取模型
|
|
|
"""
|
|
|
- llm = model_handler.get_models()
|
|
|
- # 暂时返回相同的模型作为chat和embed
|
|
|
- return llm, llm, None
|
|
|
+ try:
|
|
|
+ llm = model_handler.get_models()
|
|
|
+ # 暂时返回相同的模型作为chat和embed
|
|
|
+ return llm, llm, None
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"获取模型失败: {e}")
|
|
|
+ raise ModelConnectionError(f"无法获取模型服务: {e}")
|