فهرست منبع

refactor: 重构 model_handler 模型加载与 RAG 检索逻辑

简化模型客户端创建流程,优化 retrieval/entities_enhance/query_rewrite 参数传递。

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
WangXuMing 1 هفته پیش
والد
کامیت
69766756d3

+ 53 - 113
foundation/ai/models/model_handler.py

@@ -14,7 +14,7 @@ AI模型处理器
 - lq_qwen3_8b_lq_lora: 本地Qwen3-8B-lq-lora模型
 - lq_qwen3_4b: 本地Qwen3-4B模型
 - qwen_local_14b: 本地Qwen3-14B模型
-- lq_qwen3_8b_emd: 本地Qwen3-Embedding-8B嵌入模型
+- shutian_qwen3_embed: 蜀天Qwen3-Embedding-8B嵌入模型(默认)
 - siliconflow_embed: 硅基流动Qwen3-Embedding-8B嵌入模型
 - lq_bge_reranker_v2_m3: 本地BGE-reranker-v2-m3重排序模型
 - qwen3_5_35b_a3b: DashScope Qwen3.5-35B-A3B模型(默认兜底模型)
@@ -23,6 +23,7 @@ AI模型处理器
 - shutian_qwen3_5_122b: 蜀天Qwen3.5-122B-A10B模型(183.220.37.46:25423)
 - shutian_qwen3_8b: 蜀天Qwen3-8B模型(183.220.37.46:25424)
 - shutian_qwen3_5_35b: 蜀天Qwen3.5-35B模型(183.220.37.46:25427)
+- shutian_qwen3_6_27b: 蜀天Qwen3.6-27B模型(183.220.37.46:25424)
 - shutian_qwen3_embed: 蜀天Qwen3-Embedding-8B模型(183.220.37.46:25425)
 - shutian_qwen3_reranker: 蜀天Qwen3-Reranker-8B模型(183.220.37.46:25426)
 """
@@ -198,8 +199,6 @@ class ModelHandler:
                 model = self._get_doubao_model()
             elif model_type == "qwen":
                 model = self._get_qwen_model()
-            elif model_type == "qwen3_30b":
-                model = self._get_qwen3_30b_model()
             elif model_type == "deepseek":
                 model = self._get_deepseek_model()
             elif model_type == "lq_qwen3_8b":
@@ -222,6 +221,8 @@ class ModelHandler:
                 model = self._get_shutian_qwen3_8b_model()
             elif model_type == "shutian_qwen3_5_35b":
                 model = self._get_shutian_qwen3_5_35b_model()
+            elif model_type == "shutian_qwen3_6_27b":
+                model = self._get_shutian_qwen3_6_27b_model()
             else:
                 logger.warning(f"未知的模型类型 '{model_type}',使用默认 qwen3_5_35b_a3b 模型")
                 model = self._get_qwen3_5_35b_a3b_model()
@@ -257,7 +258,7 @@ class ModelHandler:
 
         Args:
             model_type: 模型类型名称,如果为None则使用配置文件中的默认模型
-                       支持的模型类型:doubao, qwen, qwen3_30b, deepseek, gemini,
+                       支持的模型类型:doubao, qwen, deepseek, gemini,
                                      lq_qwen3_8b, lq_qwen3_8b_lq_lora,
                                      lq_qwen3_4b, qwen_local_14b
 
@@ -288,8 +289,6 @@ class ModelHandler:
                 model = self._get_doubao_model()
             elif model_type == "qwen":
                 model = self._get_qwen_model()
-            elif model_type == "qwen3_30b":
-                model = self._get_qwen3_30b_model()
             elif model_type == "deepseek":
                 model = self._get_deepseek_model()
             elif model_type == "lq_qwen3_8b":
@@ -312,6 +311,8 @@ class ModelHandler:
                 model = self._get_shutian_qwen3_8b_model()
             elif model_type == "shutian_qwen3_5_35b":
                 model = self._get_shutian_qwen3_5_35b_model()
+            elif model_type == "shutian_qwen3_6_27b":
+                model = self._get_shutian_qwen3_6_27b_model()
             else:
                 logger.warning(f"未知的模型类型 '{model_type}',使用默认 qwen3_5_35b_a3b 模型")
                 model = self._get_qwen3_5_35b_a3b_model()
@@ -372,8 +373,12 @@ class ModelHandler:
             logger.info(f"根据功能 '{function_name}' 获取模型: {model_type}")
             return self.get_model_by_name(model_type)
         except Exception as e:
-            logger.warning(f"根据功能获取模型失败 [{function_name}]: {e},使用默认模型")
-            return self.get_model_by_name("qwen3_5_35b_a3b")
+            logger.warning(f"根据功能获取模型失败 [{function_name}]: {e},尝试使用默认模型")
+            try:
+                default_model = get_model_for_function("default")
+                return self.get_model_by_name(default_model)
+            except Exception:
+                return self.get_model_by_name("qwen3_5_35b_a3b")
 
     def get_embedding_model(self):
         """
@@ -384,8 +389,8 @@ class ModelHandler:
 
         Note:
             根据配置文件中的EMBEDDING_MODEL_TYPE参数选择对应模型
-            支持的模型类型:lq_qwen3_8b_emd, siliconflow_embed
-            默认返回本地 lq_qwen3_8b_emd 模型
+            支持的模型类型:shutian_qwen3_embed, siliconflow_embed
+            默认返回蜀天 shutian_qwen3_embed 模型
         """
         # 优先从 model_setting.yaml 读取embedding配置
         embedding_model_type = None
@@ -401,7 +406,7 @@ class ModelHandler:
 
         # 回退到 config.ini
         if not embedding_model_type:
-            embedding_model_type = self.config.get("model", "EMBEDDING_MODEL_TYPE", "lq_qwen3_8b_emd")
+            embedding_model_type = self.config.get("model", "EMBEDDING_MODEL_TYPE", "shutian_qwen3_embed")
 
         logger.info(f"正在初始化Embedding模型,模型类型: {embedding_model_type}")
 
@@ -416,14 +421,12 @@ class ModelHandler:
         try:
             if embedding_model_type == "siliconflow_embed":
                 model = self._get_siliconflow_embedding_model()
-            elif embedding_model_type == "lq_qwen3_8b_emd":
-                model = self._get_lq_qwen3_8b_emd()
             elif embedding_model_type == "shutian_qwen3_embed":
                 model = self._get_shutian_qwen3_embed()
             else:
-                # 默认返回本地模型
-                logger.warning(f"未知的Embedding模型类型 '{embedding_model_type}',使用默认本地模型")
-                model = self._get_lq_qwen3_8b_emd()
+                # 默认返回蜀天Embedding模型
+                logger.warning(f"未知的Embedding模型类型 '{embedding_model_type}',使用默认蜀天Embedding")
+                model = self._get_shutian_qwen3_embed()
 
             if model:
                 self._model_cache[cache_key] = model
@@ -433,18 +436,6 @@ class ModelHandler:
                 raise ModelAPIError(f"Embedding模型初始化返回None: {embedding_model_type}")
 
         except (ModelConnectionError, Exception) as e:
-            # 如果配置的模型是本地模型且连接失败,自动回退到蜀天Embedding
-            if embedding_model_type == "lq_qwen3_8b_emd":
-                logger.warning(f"本地Embedding模型连接失败,自动回退到蜀天Embedding: {e}")
-                try:
-                    model = self._get_shutian_qwen3_embed()
-                    if model:
-                        self._model_cache["embed_shutian_qwen3_embed"] = model
-                        logger.info("Embedding模型回退成功: shutian_qwen3_embed")
-                        return model
-                except Exception as fallback_e:
-                    logger.error(f"回退到蜀天Embedding也失败: {fallback_e}")
-
             logger.error(f"获取Embedding模型失败 [{embedding_model_type}]: {e}")
             raise ModelConnectionError(f"无法初始化Embedding模型服务: {e}")
 
@@ -546,55 +537,6 @@ class ModelHandler:
             error = ModelAPIError(f"通义千问模型初始化异常: {e}")
             return self._handle_model_error("qwen", error)
 
-    def _get_qwen3_30b_model(self):
-        """
-        获取Qwen3-30B模型
-
-        Returns:
-            ChatOpenAI: 配置好的Qwen3-30B模型实例
-        """
-        try:
-            qwen3_30b_url = self.config.get("qwen3_30b", "QWEN3_30B_SERVER_URL")
-            qwen3_30b_model_id = self.config.get("qwen3_30b", "QWEN3_30B_MODEL_ID")
-            qwen3_30b_api_key = self.config.get("qwen3_30b", "QWEN3_30B_API_KEY")
-
-            # 验证配置完整性
-            if not all([qwen3_30b_url, qwen3_30b_model_id, qwen3_30b_api_key]):
-                missing = []
-                if not qwen3_30b_url:
-                    missing.append("QWEN3_30B_SERVER_URL")
-                if not qwen3_30b_model_id:
-                    missing.append("QWEN3_30B_MODEL_ID")
-                if not qwen3_30b_api_key:
-                    missing.append("QWEN3_30B_API_KEY")
-                raise ModelConfigError(f"Qwen3-30B模型配置不完整,缺少: {', '.join(missing)}")
-
-            # 检查连接
-            if not self._check_connection(qwen3_30b_url, qwen3_30b_api_key):
-                logger.warning(f"Qwen3-30B模型服务连接失败: {qwen3_30b_url}")
-                raise ModelConnectionError(f"无法连接到Qwen3-30B模型服务: {qwen3_30b_url}")
-
-            llm = ChatOpenAI(
-                base_url=qwen3_30b_url,
-                model=qwen3_30b_model_id,
-                api_key=qwen3_30b_api_key,
-                temperature=0.7,
-                timeout=self.REQUEST_TIMEOUT,
-                extra_body={
-                    "enable_thinking": False,
-                })
-
-            logger.info(f"Qwen3-30B模型初始化成功: {qwen3_30b_model_id}")
-            return llm
-
-        except ModelConfigError:
-            raise
-        except ModelConnectionError:
-            raise
-        except Exception as e:
-            error = ModelAPIError(f"Qwen3-30B模型初始化异常: {e}")
-            return self._handle_model_error("qwen3_30b", error)
-
     def _get_deepseek_model(self):
         """
         获取DeepSeek模型
@@ -911,42 +853,6 @@ class ModelHandler:
         except Exception as e:
             return self._handle_model_error("qwen3_5_122b_a10b", ModelAPIError(str(e)))
 
-    def _get_lq_qwen3_8b_emd(self):
-        """
-        获取本地Qwen3-Embedding-8B嵌入模型
-
-        Returns:
-            OpenAIEmbeddings: 配置好的本地Qwen3-Embedding-8B嵌入模型实例
-        """
-        try:
-            server_url = "http://192.168.91.253:9003/v1"
-            model_id = "Qwen3-Embedding-8B"
-
-            # 检查本地服务连接
-            if not self._check_connection(server_url, "dummy", timeout=3):
-                logger.warning(f"本地Qwen3-Embedding-8B模型服务连接失败: {server_url}")
-                raise ModelConnectionError(f"无法连接到本地Qwen3-Embedding-8B模型服务: {server_url}")
-
-            # 使用 langchain_openai 的 OpenAIEmbeddings
-            embeddings = OpenAIEmbeddings(
-                base_url=server_url,
-                model=model_id,
-                api_key="dummy",  # 本地模型使用虚拟API key
-                timeout=self.REQUEST_TIMEOUT,
-                tiktoken_enabled=False,
-                check_embedding_ctx_length=False,
-                max_retries=0,  # 禁用SDK内置重试,由EmbeddingClient统一管理
-            )
-
-            logger.info(f"本地Qwen3-Embedding-8B模型初始化成功: {model_id}")
-            return embeddings
-
-        except ModelConnectionError:
-            raise
-        except Exception as e:
-            error = ModelAPIError(f"本地Qwen3-Embedding-8B模型初始化异常: {e}")
-            return self._handle_model_error("lq_qwen3_8b_emd", error)
-
     def _get_siliconflow_embedding_model(self):
         """
         获取硅基流动Qwen3-Embedding-8B嵌入模型
@@ -1066,6 +972,40 @@ class ModelHandler:
             error = ModelAPIError(f"蜀天Qwen3-8B模型初始化异常: {e}")
             return self._handle_model_error("shutian_qwen3_8b", error)
 
+    def _get_shutian_qwen3_6_27b_model(self):
+        """
+        获取蜀天Qwen3.6-27B模型
+
+        Returns:
+            ChatOpenAI: 配置好的蜀天Qwen3.6-27B模型实例
+        """
+        try:
+            server_url = self.config.get("shutian", "SHUTIAN_27B_SERVER_URL", "http://183.220.37.46:25424/v1")
+            model_id = self.config.get("shutian", "SHUTIAN_27B_MODEL_ID", "/model/Qwen3.6-27B")
+            api_key = self.config.get("shutian", "SHUTIAN_27B_API_KEY", "sk_prod_SELVoIV1d3gku28koH_ONg8L_B2cQis__71f55615")
+
+            # 检查服务连接
+            if not self._check_connection(server_url, api_key, timeout=3):
+                logger.warning(f"蜀天Qwen3.6-27B模型服务连接失败: {server_url}")
+                raise ModelConnectionError(f"无法连接到蜀天Qwen3.6-27B模型服务: {server_url}")
+
+            llm = ChatOpenAI(
+                base_url=server_url,
+                model=model_id,
+                api_key=api_key,
+                temperature=0.7,
+                timeout=self.REQUEST_TIMEOUT,
+            )
+
+            logger.info(f"蜀天Qwen3.6-27B模型初始化成功: {model_id}")
+            return llm
+
+        except ModelConnectionError:
+            raise
+        except Exception as e:
+            error = ModelAPIError(f"蜀天Qwen3.6-27B模型初始化异常: {e}")
+            return self._handle_model_error("shutian_qwen3_6_27b", error)
+
     def _get_shutian_qwen3_5_35b_model(self):
         """
         获取蜀天Qwen3.5-35B模型

+ 1 - 1
foundation/ai/models/rerank_model.py

@@ -14,7 +14,7 @@ import json
 import requests
 from typing import List, Dict, Any
 from foundation.infrastructure.config.config import config_handler
-from foundation.observability.logger.loggering import server_logger
+from foundation.observability.logger.loggering import review_logger as server_logger
 
 
 class LqReranker:

+ 1 - 1
foundation/ai/rag/retrieval/entities_enhance.py

@@ -2,7 +2,7 @@ import json
 import asyncio
 from foundation.observability.monitoring.time_statistics import track_execution_time
 from foundation.ai.rag.retrieval.retrieval import retrieval_manager
-from foundation.observability.logger.loggering import server_logger
+from foundation.observability.logger.loggering import review_logger as server_logger
 
 
 

+ 3 - 3
foundation/ai/rag/retrieval/query_rewrite.py

@@ -2,7 +2,7 @@
 
 import uuid
 import asyncio
-from foundation.observability.logger.loggering import server_logger
+from foundation.observability.logger.loggering import review_logger as server_logger
 from foundation.ai.agent.generate.model_generate import generate_model_client
 
 class QueryRewriteManager():
@@ -67,7 +67,7 @@ class QueryRewriteManager():
                             trace_id=trace_id,
                             task_prompt_info=task_prompt_info,
                             timeout=60,
-                            model_name="qwen3_30b"  # 修复: 使用正确的模型名称(下划线)
+                            function_name="query_extract"
                         )
                     )
                     model_response = future.result()
@@ -77,7 +77,7 @@ class QueryRewriteManager():
                     trace_id=trace_id,
                     task_prompt_info=task_prompt_info,
                     timeout=60,
-                    model_name="qwen3_30b"  # 修复: 使用正确的模型名称(下划线)
+                    function_name="query_extract"
                 ))
 
             # 格式化模型响应

+ 4 - 4
foundation/ai/rag/retrieval/retrieval.py

@@ -7,7 +7,7 @@ from typing import List, Dict, Any, Optional
 from foundation.ai.models.rerank_model import rerank_model
 from foundation.observability.monitoring.time_statistics import track_execution_time
 from foundation.infrastructure.config.config import config_handler
-from foundation.observability.logger.loggering import server_logger
+from foundation.observability.logger.loggering import review_logger
 from foundation.database.base.vector.milvus_vector import MilvusVectorManager
 
 class RetrievalManager:
@@ -20,7 +20,7 @@ class RetrievalManager:
         初始化召回管理器
         """
         self.vector_manager = MilvusVectorManager()
-        self.logger = server_logger
+        self.logger = review_logger
         self.dense_weight = config_handler.get('hybrid_search', 'DENSE_WEIGHT', 0.7)
         self.sparse_weight = config_handler.get('hybrid_search', 'SPARSE_WEIGHT', 0.3)
 
@@ -162,7 +162,7 @@ class RetrievalManager:
         """
         self.logger.info(f"[entity_recall] 开始召回, recall_top_k={recall_top_k}, max_results={max_results}, 主实体='{main_entity}', 辅助实体数量={len(assisted_search_entity)}")
 
-        collection_name = "first_bfp_collection_entity"
+        collection_name = config_handler.get('rag_collections', 'ENTITY_COLLECTION', 'first_bfp_collection_entity')
         # 主实体搜索 - 使用异步方法
         entity_result = await self.async_multi_stage_recall(
             collection_name=collection_name,
@@ -225,7 +225,7 @@ class RetrievalManager:
         self.logger.info(f"[async_bfp_recall] 开始召回, top_k={top_k}, 实体数量={len(entity_list)}, 背景='{background[:50]}...'")
 
         # 异步并发召回编制依据
-        collection_name = "rag_children_hybrid"
+        collection_name = config_handler.get('rag_collections', 'CHILDREN_COLLECTION', 'rag_children_hybrid')
 
         gather_start = time.time()
         # 优化:降低hybrid_top_k参数从50到20,减少混合搜索时间