пре 1 недеља · ab7e86e93b
--- a/core/construction_review/component/ai_review_engine.py
+++ b/core/construction_review/component/ai_review_engine.py
@@ -5,7 +5,7 @@
 
				 @Project   : lq-agent-api
			
 
				 @File      : construction_review/ai_review_engine.py
			
 
				 @IDE       : VsCode
			
 
				-@Author    : 王旭明
			
 
				+@Author    : wandaan
			
 
				 @Date      : 2025-12-01 11:07:12
			
 
				 @Description: AI审查引擎核心组件，负责执行各类文档审查任务，支持并发处理和多种审查模式
			
 
				 
			
@@ -149,7 +149,6 @@ class AIReviewEngine(BaseReviewer):
 
				 
			
 
				         self.max_concurrent_reviews = max_concurrent_reviews
			
 
				         self.semaphore = asyncio.Semaphore(max_concurrent_reviews)
			
 
				-        self.milvus_collection = config_handler.get('milvus', 'MILVUS_COLLECTION', 'default')
			
 
				 
			
 
				         # [新增] 数据库连接池
			
 
				         self.db_pool = db_pool
			
@@ -699,41 +698,29 @@ class AIReviewEngine(BaseReviewer):
 
				                 model_client=getattr(self, 'model_client', None)
			
 
				             )
			
 
				             
			
 
				-            # 从state获取outline和原始chunks（如果有）
			
 
				+            # 从state获取outline（仅用于目录审查）
			
 
				             outline = None
			
 
				-            all_chunks = []
			
 
				             if state and isinstance(state, dict):
			
 
				-                structured = state.get('structured_content', {})
			
 
				-                outline = structured.get('outline')
			
 
				-                all_chunks = structured.get('chunks', [])
			
 
				+                outline = state.get('structured_content', {}).get('outline')
			
 
				 
			
 
				-            # 从传入的chunks中提取chapter_code和章节信息
			
 
				+            # 提取chunk信息（与其他审查一致，使用单个chunk）
			
 
				             chapter_code = "all"
			
 
				             chapter_name = ""
			
 
				-            if review_content and isinstance(review_content, list):
			
 
				-                first_chunk = review_content[0]
			
 
				-                if isinstance(first_chunk, dict):
			
 
				-                    chapter_code = first_chunk.get('chapter_classification', 'all')
			
 
				-                    chapter_name = first_chunk.get('chapter', '') or first_chunk.get('section_label', '')
			
 
				-                    # 只保留章级标题（"->" 之前的部分），避免二级标题污染 location
			
 
				-                    if chapter_name and '->' in chapter_name:
			
 
				-                        chapter_name = chapter_name.split('->')[0]
			
 
				-
			
 
				-            # 获取该章节的所有原始chunks用于完整性审查（包含所有分类代码）
			
 
				-            chapter_chunks = review_content
			
 
				-            if chapter_code != "all" and all_chunks:
			
 
				-                # 从state中获取该章节的所有原始chunks
			
 
				-                chapter_chunks = [
			
 
				-                    c for c in all_chunks
			
 
				-                    if c.get('chapter_classification') == chapter_code
			
 
				-                ]
			
 
				-                logger.info(f"[{name}] 章节 '{chapter_code}' 从state获取 {len(chapter_chunks)} 个原始chunks进行完整性审查")
			
 
				-
			
 
				-            # 执行检查（传入当前章节分类，只检查该章节下的三级分类）
			
 
				+            secondary_code = ""
			
 
				+            chunk = review_content[0] if (review_content and isinstance(review_content, list)) else {}
			
 
				+            if isinstance(chunk, dict):
			
 
				+                chapter_code = chunk.get('chapter_classification', 'all')
			
 
				+                secondary_code = chunk.get('secondary_category_code', '')
			
 
				+                chapter_name = chunk.get('chapter', '') or chunk.get('section_label', '')
			
 
				+                if chapter_name and '->' in chapter_name:
			
 
				+                    chapter_name = chapter_name.split('->')[0]
			
 
				+
			
 
				+            # 执行检查（传入单个chunk + 二级分类，只检查该二级下的标准项）
			
 
				             result = await checker.check(
			
 
				-                chunks=chapter_chunks,
			
 
				+                chunks=[chunk] if isinstance(chunk, dict) and chunk else review_content,
			
 
				                 outline=outline,
			
 
				-                chapter_classification=chapter_code if chapter_code != "all" else None
			
 
				+                chapter_classification=chapter_code if chapter_code != "all" else None,
			
 
				+                secondary_classification=secondary_code if secondary_code else None,
			
 
				             )
			
 
				             
			
 
				             # 转换为字典
			
@@ -779,21 +766,28 @@ class AIReviewEngine(BaseReviewer):
 
				                     response_item["second_seq"] = rec['second_seq']
			
 
				                 if 'third_seq' in rec:
			
 
				                     response_item["third_seq"] = rec['third_seq']
			
 
				-                # 继续添加其他字段
			
 
				+                # 继续添加其他字段（方案B增强：含 evidence / confidence）
			
 
				+                check_result_data = {
			
 
				+                    "issue_point": issue_point,
			
 
				+                    "location": location,
			
 
				+                    "suggestion": rec.get('suggestion', ''),
			
 
				+                    "reason": rec.get('reason', ''),
			
 
				+                    "risk_level": risk_level,
			
 
				+                }
			
 
				+                # 方案B特有字段
			
 
				+                if rec.get('evidence'):
			
 
				+                    check_result_data["evidence"] = rec['evidence']
			
 
				+                if rec.get('confidence'):
			
 
				+                    check_result_data["confidence"] = rec['confidence']
			
 
				+
			
 
				                 response_item.update({
			
 
				                     "check_item_code": f"{chapter_code if chapter_code != 'all' else 'unknown'}_completeness_check",
			
 
				-                    "check_result": {
			
 
				-                        "issue_point": issue_point,
			
 
				-                        "location": location,
			
 
				-                        "suggestion": rec.get('suggestion', ''),
			
 
				-                        "reason": rec.get('reason', ''),
			
 
				-                        "risk_level": risk_level
			
 
				-                    },
			
 
				+                    "check_result": check_result_data,
			
 
				                     "exist_issue": True,
			
 
				                     "risk_info": {"risk_level": risk_level_en}
			
 
				                 })
			
 
				                 response_items.append(response_item)
			
 
				-            
			
 
				+
			
 
				             # 如果没有缺失项，显示完整度
			
 
				             if not response_items:
			
 
				                 completeness_rate = result_dict.get('tertiary_completeness', {}).get('completeness_rate', '0%')
			
@@ -812,27 +806,38 @@ class AIReviewEngine(BaseReviewer):
 
				                     "exist_issue": False,
			
 
				                     "risk_info": {"risk_level": "low"}
			
 
				                 })
			
 
				-            
			
 
				+
			
 
				             execution_time = time.time() - start_time
			
 
				-            
			
 
				-            # 构建与原有格式兼容的结果
			
 
				+
			
 
				+            # 统计LLM调用信息
			
 
				+            llm_calls = result_dict.get('direct_llm_call_count', 0)
			
 
				+            direct_items_count = len(result_dict.get('direct_check_items', []))
			
 
				+
			
 
				+            # 构建与原有格式兼容的结果（方案B增强）
			
 
				             check_result = {
			
 
				                 "details": {
			
 
				                     "name": "completeness_check",
			
 
				                     "response": response_items,
			
 
				-                    "review_location_label": "三级完整性审查",
			
 
				+                    "review_location_label": "三级完整性审查（LLM直接解释）",
			
 
				                     "chapter_code": chapter_code,
			
 
				-                    "original_content": f"标准三级分类: {result_dict.get('tertiary_completeness', {}).get('total', 0)}个, "
			
 
				-                                        f"有内容: {result_dict.get('tertiary_completeness', {}).get('present', 0)}个, "
			
 
				-                                        f"缺失: {result_dict.get('tertiary_completeness', {}).get('missing', 0)}个",
			
 
				-                    # 保留完整的轻量级审查结果供前端使用
			
 
				-                    "lightweight_result": result_dict
			
 
				+                    "original_content": (
			
 
				+                        f"标准三级分类: {result_dict.get('tertiary_completeness', {}).get('total', 0)}个, "
			
 
				+                        f"有内容: {result_dict.get('tertiary_completeness', {}).get('present', 0)}个, "
			
 
				+                        f"缺失: {result_dict.get('tertiary_completeness', {}).get('missing', 0)}个"
			
 
				+                    ),
			
 
				+                    # 保留完整的审查结果供前端使用
			
 
				+                    "lightweight_result": result_dict,
			
 
				+                    # 方案B特有：LLM逐项判断详情
			
 
				+                    "direct_check_items": result_dict.get('direct_check_items', []),
			
 
				+                    "direct_llm_call_count": llm_calls,
			
 
				+                    "review_method": "direct_llm",
			
 
				                 },
			
 
				                 "success": True,
			
 
				                 "execution_time": execution_time
			
 
				             }
			
 
				-            
			
 
				-            logger.info(f"[{name}] 审查完成，耗时: {execution_time:.2f}s, "
			
 
				+
			
 
				+            logger.info(f"[{name}] 审查完成(方案B), 耗时: {execution_time:.2f}s, "
			
 
				+                       f"LLM调用: {llm_calls}次, 直接检查项: {direct_items_count}项, "
			
 
				                        f"三级完整率: {result_dict.get('tertiary_completeness', {}).get('completeness_rate', 'N/A')}")
			
 
				             
			
 
				             return check_result, trace_id_idx
			
@@ -1075,7 +1080,7 @@ class AIReviewEngine(BaseReviewer):
 
				             combined_content = review_content
			
 
				 
			
 
				         return await self.review("non_parameter_compliance_check", trace_id, reviewer_type, prompt_name, combined_content, review_references,
			
 
				-                               reference_source, state, stage_name, timeout=45, function_name="completeness_review_classify")
			
 
				+                               reference_source, state, stage_name, timeout=45, function_name="non_parameter_compliance_check")
			
 
				 
			
 
				     async def check_parameter_compliance(self, trace_id_idx: str, review_content: str, review_references: str,
			
 
				                                         reference_source: str, state: str, stage_name: str,
			
@@ -1108,7 +1113,7 @@ class AIReviewEngine(BaseReviewer):
 
				             combined_content = review_content
			
 
				 
			
 
				         return await self.review("parameter_compliance_check", trace_id, reviewer_type, prompt_name, combined_content, review_references,
			
 
				-                               reference_source, state, stage_name, timeout=45, function_name="completeness_review_classify")
			
 
				+                               reference_source, state, stage_name, timeout=45, function_name="parameter_compliance_check")
			
 
				 
			
 
				     async def reference_basis_reviewer(self, review_data: Dict[str, Any], trace_id: str,
			
 
				                                 state: dict = None, stage_name: str = None) -> Dict[str, Any]:
			
@@ -1249,114 +1254,6 @@ class AIReviewEngine(BaseReviewer):
 
				                     "error_message": error_msg
			
 
				                 }
			
 
				             }
			
 
				-        
			
 
				-    async def timeliness_content_reviewer(self, review_data: Dict[str, Any], trace_id: str,
			
 
				-                                state: dict = None, stage_name: str = None) -> Dict[str, Any]:
			
 
				-        """
			
 
				-        执行三级分类内容时效性审查：检查tertiary_classification_details中引用的规范是否过时
			
 
				-
			
 
				-        Args:
			
 
				-            review_data: 待审查数据，包含tertiary_classification_details
			
 
				-            trace_id: 追踪ID
			
 
				-            state: 状态字典
			
 
				-            stage_name: 阶段名称
			
 
				-
			
 
				-        Returns:
			
 
				-            审查结果字典，包含内容时效性审查结果
			
 
				-        """
			
 
				-        start_time = time.time()
			
 
				-        try:
			
 
				-            logger.info(f"开始三级分类内容时效性审查，trace_id: {trace_id}")
			
 
				-
			
 
				-            # 提取三级分类详情
			
 
				-            tertiary_details = review_data.get('tertiary_classification_details', [])
			
 
				-            max_concurrent = review_data.get('max_concurrent', 4)
			
 
				-
			
 
				-            if not tertiary_details:
			
 
				-                logger.warning("三级分类详情为空，将跳过内容时效性审查")
			
 
				-                return {
			
 
				-                    "timeliness_content_review_results": {
			
 
				-                        "review_results": [],
			
 
				-                        "total_items": 0,
			
 
				-                        "issue_items": 0,
			
 
				-                        "execution_time": time.time() - start_time,
			
 
				-                        "error_message": None,
			
 
				-                        "message": "未找到三级分类详情，跳过内容时效性审查"
			
 
				-                    }
			
 
				-                }
			
 
				-
			
 
				-            logger.info(f"提取到 {len(tertiary_details)} 个三级分类详情")
			
 
				-
			
 
				-            # 调用内容时效性审查
			
 
				-            try:
			
 
				-                # 使用信号量控制并发
			
 
				-                async with self.semaphore:
			
 
				-                    # 从state中获取progress_manager和callback_task_id
			
 
				-                    progress_manager = state.get('progress_manager') if state else None
			
 
				-                    callback_task_id = state.get('callback_task_id') if state else None
			
 
				-
			
 
				-                    # 调用内容时效性审查器（使用新的统一入口）
			
 
				-                    from core.construction_review.component.reviewers.timeliness_reviewer import TimelinessReviewService
			
 
				-                    async with TimelinessReviewService(max_concurrent=max_concurrent, db_pool=self.db_pool) as reviewer:
			
 
				-                        # 从 tertiary_details 提取内容
			
 
				-                        contents = []
			
 
				-                        for detail in tertiary_details:
			
 
				-                            content = detail.get("content", "") if isinstance(detail, dict) else str(detail)
			
 
				-                            if content:
			
 
				-                                contents.append(content)
			
 
				-                        full_content = "\n".join(contents)
			
 
				-
			
 
				-                        timeliness_content_results = await reviewer.review_from_content(
			
 
				-                            content=full_content,
			
 
				-                            chapter_code="content",
			
 
				-                            collection_name="first_bfp_collection_status"
			
 
				-                        )
			
 
				-
			
 
				-                    logger.info(f"内容时效性审查完成，发现问题数量: {len(timeliness_content_results)}")
			
 
				-
			
 
				-                    # 统计审查结果
			
 
				-                    total_items = len(timeliness_content_results)
			
 
				-                    issue_items = sum(1 for item in timeliness_content_results if item.get('exist_issue', False))
			
 
				-
			
 
				-                    logger.info(f"审查统计 - 总规范引用: {total_items}, 问题项: {issue_items}")
			
 
				-
			
 
				-            except Exception as e:
			
 
				-                logger.error(f"内容时效性审查失败: {str(e)}")
			
 
				-                return {
			
 
				-                    "timeliness_content_review_results": {
			
 
				-                        "review_results": [],
			
 
				-                        "total_items": 0,
			
 
				-                        "issue_items": 0,
			
 
				-                        "execution_time": time.time() - start_time,
			
 
				-                        "error_message": f"内容时效性审查失败: {str(e)}"
			
 
				-                    }
			
 
				-                }
			
 
				-
			
 
				-            # 返回完整结果
			
 
				-            return {
			
 
				-                "timeliness_content_review_results": {
			
 
				-                    "review_results": timeliness_content_results,
			
 
				-                    "total_items": total_items,
			
 
				-                    "issue_items": issue_items,
			
 
				-                    "execution_time": time.time() - start_time,
			
 
				-                    "error_message": None
			
 
				-                }
			
 
				-            }
			
 
				-
			
 
				-        except Exception as e:
			
 
				-            execution_time = time.time() - start_time
			
 
				-            error_msg = f"内容时效性审查失败: {str(e)}"
			
 
				-            logger.error(error_msg, exc_info=True)
			
 
				-
			
 
				-            return {
			
 
				-                "timeliness_content_review_results": {
			
 
				-                    "review_results": [],
			
 
				-                    "total_items": 0,
			
 
				-                    "issue_items": 0,
			
 
				-                    "execution_time": execution_time,
			
 
				-                    "error_message": error_msg
			
 
				-                }
			
 
				-            }
			
 
				 
			
 
				     async def timeliness_basis_reviewer(self, review_data: Dict[str, Any], trace_id: str,
			
 
				                                 state: dict = None, stage_name: str = None) -> Dict[str, Any]:
			
--- a/core/construction_review/component/doc_worker/classification/chunk_classifier.py
+++ b/core/construction_review/component/doc_worker/classification/chunk_classifier.py
@@ -1,7 +1,7 @@
 
				 """
			
 
				-内容块分类模块（二级和三级分类）
			
 
				+内容块分类模块（二级分类）
			
 
				 
			
 
				-对已经完成一级分类的内容块进行二级和三级分类
			
 
				+对已经完成一级分类的内容块进行二级分类
			
 
				 """
			
 
				 
			
 
				 from __future__ import annotations
			
@@ -22,22 +22,6 @@ from ..config.provider import default_config_provider
 
				 from ..utils.prompt_loader import PromptLoader
			
 
				 
			
 
				 
			
 
				-# 延迟导入新的三级分类器（避免循环导入）
			
 
				-_LLM_CONTENT_CLASSIFIER = None
			
 
				-
			
 
				-
			
 
				-def _get_llm_content_classifier():
			
 
				-    """延迟导入 LLMContentClassifier"""
			
 
				-    global _LLM_CONTENT_CLASSIFIER
			
 
				-    if _LLM_CONTENT_CLASSIFIER is None:
			
 
				-        from ...reviewers.utils.llm_content_classifier_v2 import (
			
 
				-            LLMContentClassifier,
			
 
				-            ClassifierConfig
			
 
				-        )
			
 
				-        _LLM_CONTENT_CLASSIFIER = (LLMContentClassifier, ClassifierConfig)
			
 
				-    return _LLM_CONTENT_CLASSIFIER
			
 
				-
			
 
				-
			
 
				 def _extract_json(text: str) -> Optional[Dict[str, Any]]:
			
 
				     """从字符串中提取第一个有效 JSON 对象"""
			
 
				     for pattern in [r"```json\s*(\{.*?})\s*```", r"```\s*(\{.*?})\s*```"]:
			
@@ -144,353 +128,6 @@ class ChunkClassifier:
 
				 
			
 
				         return "\n".join(standards_lines) if standards_lines else "（无二级分类标准）", index_mapping
			
 
				 
			
 
				-    def _build_tertiary_standards(self, first_category_code: str, second_category_code: str) -> tuple[str, dict]:
			
 
				-        """
			
 
				-        构建三级分类标准文本
			
 
				-
			
 
				-        返回:
			
 
				-            (标准文本, 索引映射字典)
			
 
				-        """
			
 
				-        if first_category_code not in self.classification_tree:
			
 
				-            return "（无三级分类标准）", {}
			
 
				-
			
 
				-        if second_category_code not in self.classification_tree[first_category_code]:
			
 
				-            return "（无三级分类标准）", {}
			
 
				-
			
 
				-        third_items = self.classification_tree[first_category_code][second_category_code]["third_items"]
			
 
				-
			
 
				-        if not third_items:
			
 
				-            return "（无三级分类标准）", {}
			
 
				-
			
 
				-        standards_lines = ["    0. 非标准项 - 不符合以下任何类别"]
			
 
				-        index_mapping = {0: ("非标准项", "non_standard")}
			
 
				-
			
 
				-        for idx, third_item in enumerate(third_items, 1):
			
 
				-            third_cn = third_item["third_cn"]
			
 
				-            third_code = third_item["third_code"]
			
 
				-            third_focus = third_item["third_focus"]
			
 
				-
			
 
				-            # 保存索引映射
			
 
				-            index_mapping[idx] = (third_cn, third_code)
			
 
				-
			
 
				-            if third_focus and third_focus != "NULL":
			
 
				-                standards_lines.append(f"    {idx}. {third_cn} - 关注点：{third_focus}")
			
 
				-            else:
			
 
				-                standards_lines.append(f"    {idx}. {third_cn}")
			
 
				-
			
 
				-        return "\n".join(standards_lines), index_mapping
			
 
				-
			
 
				-    # 默认模型（三级分类会从 model_setting.yaml 动态加载）
			
 
				-    DEFAULT_MODEL = "qwen3_5_122b_a10b"
			
 
				-
			
 
				-    # 二级分类模型（从 model_setting.yaml 动态加载，配置 key: doc_classification_secondary）
			
 
				-    @property
			
 
				-    def SECONDARY_MODEL(self) -> str:
			
 
				-        """二级分类模型，从 model_setting.yaml 读取配置"""
			
 
				-        try:
			
 
				-            from foundation.ai.models.model_config_loader import get_model_for_function
			
 
				-            model = get_model_for_function("doc_classification_secondary")
			
 
				-            if model:
			
 
				-                return model
			
 
				-        except Exception as e:
			
 
				-            logger.debug(f"加载二级分类模型配置失败: {e}")
			
 
				-        return "qwen3_5_35b_a3b"  # 兜底默认值
			
 
				-
			
 
				-    async def _call_llm_once(
			
 
				-        self,
			
 
				-        system_prompt: str,
			
 
				-        user_prompt: str,
			
 
				-        model_name: Optional[str] = None
			
 
				-    ) -> Optional[Dict[str, Any]]:
			
 
				-        """
			
 
				-        单次异步 LLM 调用（使用统一的 GenerateModelClient）
			
 
				-
			
 
				-        参数:
			
 
				-            system_prompt: 系统提示词
			
 
				-            user_prompt: 用户提示词
			
 
				-            model_name: 模型名称，默认使用 DEFAULT_MODEL
			
 
				-
			
 
				-        失败返回 None，由调用方决定处理逻辑
			
 
				-        """
			
 
				-        if model_name is None:
			
 
				-            model_name = self.DEFAULT_MODEL
			
 
				-
			
 
				-        try:
			
 
				-            content = await generate_model_client.get_model_generate_invoke(
			
 
				-                trace_id="chunk_classifier",
			
 
				-                system_prompt=system_prompt,
			
 
				-                user_prompt=user_prompt,
			
 
				-                model_name=model_name,
			
 
				-            )
			
 
				-            result = _extract_json(content)
			
 
				-            return result if result is not None else {"raw_content": content}
			
 
				-        except Exception as e:
			
 
				-            logger.error(f"[ChunkClassifier] LLM 调用失败: {e}")
			
 
				-            return None
			
 
				-
			
 
				-    async def _batch_call_llm(
			
 
				-        self,
			
 
				-        requests: List[tuple],  # [(system_prompt, user_prompt), ...]
			
 
				-        model_name: Optional[str] = None,
			
 
				-    ) -> List[Optional[Dict[str, Any]]]:
			
 
				-        """
			
 
				-        并发批量调用 LLM（带信号量控制）
			
 
				-
			
 
				-        参数:
			
 
				-            requests: 请求列表，每个元素是 (system_prompt, user_prompt) 元组
			
 
				-            model_name: 指定模型名称，None则使用默认模型
			
 
				-
			
 
				-        返回:
			
 
				-            结果列表，与输入请求一一对应
			
 
				-        """
			
 
				-        semaphore = asyncio.Semaphore(self._concurrency)
			
 
				-
			
 
				-        async def bounded_call(system_prompt: str, user_prompt: str):
			
 
				-            async with semaphore:
			
 
				-                return await self._call_llm_once(system_prompt, user_prompt, model_name)
			
 
				-
			
 
				-        tasks = [bounded_call(sp, up) for sp, up in requests]
			
 
				-        return list(await asyncio.gather(*tasks))
			
 
				-
			
 
				-    async def classify_chunks_secondary_async(self, chunks: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
			
 
				-        """
			
 
				-        异步对chunks进行二级分类（全部走LLM，移除本地规则）
			
 
				-
			
 
				-        参数:
			
 
				-            chunks: 已完成一级分类的chunk列表
			
 
				-
			
 
				-        返回:
			
 
				-            添加了二级分类字段的chunk列表
			
 
				-        """
			
 
				-        logger.info(f"正在对 {len(chunks)} 个内容块进行二级分类（LLM全量）...")
			
 
				-
			
 
				-        # 准备LLM请求
			
 
				-        llm_requests = []
			
 
				-        valid_chunks = []
			
 
				-        index_mappings = []  # 保存每个请求对应的索引映射
			
 
				-
			
 
				-        for chunk in chunks:
			
 
				-            first_category_code = chunk.get("chapter_classification", "")
			
 
				-            chunk_title = chunk.get("section_label", "")
			
 
				-            hierarchy_path = " -> ".join(chunk.get("hierarchy_path", []))
			
 
				-            content = chunk.get("review_chunk_content", "")
			
 
				-            content_preview = content[:300] if content else ""
			
 
				-
			
 
				-            # 获取一级分类的中文名称
			
 
				-            first_category_cn = self._get_first_category_cn(first_category_code)
			
 
				-
			
 
				-            # 构建二级分类标准（返回标准文本和索引映射）
			
 
				-            secondary_standards, index_mapping = self._build_secondary_standards(first_category_code)
			
 
				-
			
 
				-            if secondary_standards == "（无二级分类标准）":
			
 
				-                # 如果没有二级分类标准，跳过
			
 
				-                chunk["secondary_category_cn"] = "无"
			
 
				-                chunk["secondary_category_code"] = "none"
			
 
				-                continue
			
 
				-
			
 
				-            # 渲染提示词
			
 
				-            prompt = self.prompt_loader.render(
			
 
				-                "chunk_secondary_classification",
			
 
				-                first_category=first_category_cn,
			
 
				-                chunk_title=chunk_title,
			
 
				-                hierarchy_path=hierarchy_path,
			
 
				-                content_preview=content_preview,
			
 
				-                secondary_standards=secondary_standards
			
 
				-            )
			
 
				-
			
 
				-            llm_requests.append((prompt["system"], prompt["user"]))
			
 
				-            valid_chunks.append(chunk)
			
 
				-            index_mappings.append(index_mapping)
			
 
				-
			
 
				-        if not llm_requests:
			
 
				-            logger.info("所有内容块都没有二级分类标准，跳过二级分类")
			
 
				-            return chunks
			
 
				-
			
 
				-        # 全部走LLM分类
			
 
				-        logger.info(f"[二级分类] 全部 {len(valid_chunks)} 个内容块走LLM分类")
			
 
				-
			
 
				-        llm_results = await self._batch_call_llm(llm_requests, model_name=self.SECONDARY_MODEL)
			
 
				-
			
 
				-        # 处理LLM结果
			
 
				-        for chunk, llm_result, index_mapping in zip(valid_chunks, llm_results, index_mappings):
			
 
				-            if llm_result and isinstance(llm_result, dict):
			
 
				-                category_index = llm_result.get("category_index")
			
 
				-
			
 
				-                if isinstance(category_index, int) and category_index in index_mapping:
			
 
				-                    secondary_cn, secondary_code = index_mapping[category_index]
			
 
				-                    chunk["secondary_category_code"] = secondary_code
			
 
				-                    chunk["secondary_category_cn"] = secondary_cn
			
 
				-                else:
			
 
				-                    # LLM返回无效，使用非标准项
			
 
				-                    chunk["secondary_category_code"] = "non_standard"
			
 
				-                    chunk["secondary_category_cn"] = "非标准项"
			
 
				-            else:
			
 
				-                # LLM调用失败
			
 
				-                chunk["secondary_category_code"] = "non_standard"
			
 
				-                chunk["secondary_category_cn"] = "非标准项"
			
 
				-
			
 
				-        logger.info("二级分类完成！")
			
 
				-        return chunks
			
 
				-
			
 
				-    async def classify_chunks_tertiary_async(
			
 
				-        self,
			
 
				-        chunks: List[Dict[str, Any]],
			
 
				-        use_enhanced_classifier: bool = True,
			
 
				-        classifier_config: Optional[Any] = None,
			
 
				-        progress_callback: Optional[Any] = None
			
 
				-    ) -> List[Dict[str, Any]]:
			
 
				-        """
			
 
				-        异步对chunks进行三级分类
			
 
				-
			
 
				-        参数:
			
 
				-            chunks: 已完成二级分类的chunk列表
			
 
				-            use_enhanced_classifier: 是否使用增强型分类器（行级细粒度、多分类、Embedding优化）
			
 
				-                - True: 使用新的 llm_content_classifier_v2（推荐）
			
 
				-                - False: 使用原有逐chunk分类方式
			
 
				-            classifier_config: 增强型分类器的配置对象（ClassifierConfig），为None时使用默认配置
			
 
				-            progress_callback: 进度回调函数 (completed, total, section_name, success) -> None，支持 async
			
 
				-
			
 
				-        返回:
			
 
				-            添加了三级分类字段的chunk列表
			
 
				-
			
 
				-        新增字段（use_enhanced_classifier=True时）:
			
 
				-            - tertiary_category_code: 三级分类代码
			
 
				-            - tertiary_category_cn: 三级分类名称
			
 
				-            - tertiary_classification_details: 行级分类详情列表，每个条目包含:
			
 
				-                - third_category_code: 三级分类代码
			
 
				-                - third_category_name: 三级分类名称
			
 
				-                - start_line: 起始行号
			
 
				-                - end_line: 结束行号
			
 
				-                - content: 原文内容
			
 
				-        """
			
 
				-        if use_enhanced_classifier:
			
 
				-            return await self._classify_chunks_tertiary_enhanced(chunks, classifier_config, progress_callback)
			
 
				-        else:
			
 
				-            return await self._classify_chunks_tertiary_legacy(chunks)
			
 
				-
			
 
				-    async def _classify_chunks_tertiary_enhanced(
			
 
				-        self,
			
 
				-        chunks: List[Dict[str, Any]],
			
 
				-        config: Optional[Any] = None,
			
 
				-        progress_callback: Optional[Any] = None
			
 
				-    ) -> List[Dict[str, Any]]:
			
 
				-        """
			
 
				-        使用增强型分类器进行三级分类
			
 
				-
			
 
				-        特点：
			
 
				-        - 行级细粒度分类
			
 
				-        - 支持一个段落包含多个三级分类
			
 
				-        - Embedding 相似度优化（跳过明显对应的段落）
			
 
				-        - 全局行号支持
			
 
				-        """
			
 
				-        logger.info(f"正在使用增强型分类器对 {len(chunks)} 个内容块进行三级分类... 特点: 行级细粒度 | 多分类支持 | Embedding优化")
			
 
				-
			
 
				-        try:
			
 
				-            LLMContentClassifier, ClassifierConfig = _get_llm_content_classifier()
			
 
				-        except ImportError as e:
			
 
				-            logger.warning(f"无法导入增强型分类器，回退到传统方式: {e}")
			
 
				-            return await self._classify_chunks_tertiary_legacy(chunks)
			
 
				-
			
 
				-        # 创建分类器实例
			
 
				-        if config is None:
			
 
				-            config = ClassifierConfig()
			
 
				-            # 使用与二级分类相同的并发度
			
 
				-            config.max_concurrent_requests = self._concurrency
			
 
				-
			
 
				-            # 从全局配置加载模型和thinking模式
			
 
				-            try:
			
 
				-                from foundation.ai.models.model_config_loader import get_model_for_function, get_thinking_mode_for_function
			
 
				-                config.model = get_model_for_function("doc_classification_tertiary")
			
 
				-                config.enable_thinking = get_thinking_mode_for_function("doc_classification_tertiary") or False
			
 
				-                logger.info(f"三级分类配置 - 并发度: {config.max_concurrent_requests}, 模型: {config.model}, thinking: {config.enable_thinking}")
			
 
				-            except Exception as e:
			
 
				-                logger.warning(f"加载模型配置失败，使用默认配置: {e}")
			
 
				-                config.model = "qwen3_5_35b_a3b"
			
 
				-                config.enable_thinking = False
			
 
				-
			
 
				-        classifier = LLMContentClassifier(config)
			
 
				-
			
 
				-        # 调用增强型分类器
			
 
				-        updated_chunks = await classifier.classify_chunks(chunks, progress_callback=progress_callback)
			
 
				-
			
 
				-        return updated_chunks
			
 
				-
			
 
				-    async def _classify_chunks_tertiary_legacy(self, chunks: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
			
 
				-        """
			
 
				-        传统三级分类方式（逐chunk分类）
			
 
				-
			
 
				-        每个chunk只能属于一个三级分类
			
 
				-        """
			
 
				-        logger.info(f"正在对 {len(chunks)} 个内容块进行三级分类...")
			
 
				-
			
 
				-        # 准备LLM请求
			
 
				-        llm_requests = []
			
 
				-        valid_chunks = []
			
 
				-        index_mappings = []  # 保存每个请求对应的索引映射
			
 
				-
			
 
				-        for chunk in chunks:
			
 
				-            first_category_code = chunk.get("chapter_classification", "")
			
 
				-            second_category_code = chunk.get("secondary_category_code", "")
			
 
				-            second_category_cn = chunk.get("secondary_category_cn", "")
			
 
				-            chunk_title = chunk.get("section_label", "")
			
 
				-            content = chunk.get("review_chunk_content", "")
			
 
				-            content_preview = content[:300] if content else ""
			
 
				-
			
 
				-            # 获取一级分类的中文名称
			
 
				-            first_category_cn = self._get_first_category_cn(first_category_code)
			
 
				-
			
 
				-            # 构建三级分类标准（返回标准文本和索引映射）
			
 
				-            tertiary_standards, index_mapping = self._build_tertiary_standards(first_category_code, second_category_code)
			
 
				-
			
 
				-            if tertiary_standards == "（无三级分类标准）":
			
 
				-                # 如果没有三级分类标准，跳过
			
 
				-                chunk["tertiary_category_cn"] = "无"
			
 
				-                chunk["tertiary_category_code"] = "none"
			
 
				-                continue
			
 
				-
			
 
				-            # 渲染提示词
			
 
				-            prompt = self.prompt_loader.render(
			
 
				-                "chunk_tertiary_classification",
			
 
				-                first_category=first_category_cn,
			
 
				-                secondary_category=second_category_cn,
			
 
				-                chunk_title=chunk_title,
			
 
				-                content_preview=content_preview,
			
 
				-                tertiary_standards=tertiary_standards
			
 
				-            )
			
 
				-
			
 
				-            llm_requests.append((prompt["system"], prompt["user"]))
			
 
				-            valid_chunks.append(chunk)
			
 
				-            index_mappings.append(index_mapping)
			
 
				-
			
 
				-        if not llm_requests:
			
 
				-            logger.info("所有内容块都没有三级分类标准，跳过三级分类")
			
 
				-            return chunks
			
 
				-
			
 
				-        # 批量异步调用LLM API
			
 
				-        llm_results = await self._batch_call_llm(llm_requests)
			
 
				-
			
 
				-        # 处理分类结果
			
 
				-        for chunk, llm_result, index_mapping in zip(valid_chunks, llm_results, index_mappings):
			
 
				-            if llm_result and isinstance(llm_result, dict):
			
 
				-                category_index = llm_result.get("category_index")
			
 
				-
			
 
				-                # 验证索引并映射到类别
			
 
				-                if isinstance(category_index, int) and category_index in index_mapping:
			
 
				-                    tertiary_cn, tertiary_code = index_mapping[category_index]
			
 
				-                    chunk["tertiary_category_cn"] = tertiary_cn
			
 
				-                    chunk["tertiary_category_code"] = tertiary_code
			
 
				-                else:
			
 
				-                    # 索引无效，归类为非标准项
			
 
				-                    logger.warning(f"LLM返回的索引 {category_index} 无效，归类为'非标准项'")
			
 
				-                    chunk["tertiary_category_cn"] = "非标准项"
			
 
				-                    chunk["tertiary_category_code"] = "non_standard"
			
 
				-            else:
			
 
				-                chunk["tertiary_category_cn"] = "非标准项"
			
 
				-                chunk["tertiary_category_code"] = "non_standard"
			
 
				-
			
 
				-        logger.info("三级分类完成！")
			
 
				-        return chunks
			
 
				-
			
 
				     def _get_first_category_cn(self, first_category_code: str) -> str:
			
 
				         """获取一级分类的中文名称"""
			
 
				         category_mapping = {
			
@@ -515,24 +152,3 @@ class ChunkClassifier:
 
				         except RuntimeError:
			
 
				             raise RuntimeError("请使用 await classify_chunks_secondary_async")
			
 
				 
			
 
				-    def classify_chunks_tertiary(
			
 
				-        self,
			
 
				-        chunks: List[Dict[str, Any]],
			
 
				-        use_enhanced_classifier: bool = True,
			
 
				-        classifier_config: Optional[Any] = None
			
 
				-    ) -> List[Dict[str, Any]]:
			
 
				-        """同步包装：三级分类
			
 
				-
			
 
				-        Args:
			
 
				-            chunks: 已完成二级分类的chunk列表
			
 
				-            use_enhanced_classifier: 是否使用增强型分类器（默认True）
			
 
				-            classifier_config: 增强型分类器配置（可选）
			
 
				-        """
			
 
				-        try:
			
 
				-            return asyncio.run(self.classify_chunks_tertiary_async(
			
 
				-                chunks,
			
 
				-                use_enhanced_classifier=use_enhanced_classifier,
			
 
				-                classifier_config=classifier_config
			
 
				-            ))
			
 
				-        except RuntimeError:
			
 
				-            raise RuntimeError("请使用 await classify_chunks_tertiary_async")
			
--- a/core/construction_review/component/document_processor.py
+++ b/core/construction_review/component/document_processor.py
@@ -157,7 +157,6 @@ class DocumentProcessor:
 
				 
			
 
				             # 缓存结果
			
 
				             await self._cache_unified_structure(unified_doc)
			
 
				-            await self._cache_tertiary_results(unified_doc, [])
			
 
				 
			
 
				             return unified_doc
			
 
				 
			
@@ -286,18 +285,7 @@ class DocumentProcessor:
 
				                 else:
			
 
				                     group["second_content"] = content
			
 
				 
			
 
				-            # 收集三级分类详情
			
 
				-            details = chunk.get("tertiary_classification_details", [])
			
 
				-            for idx, detail in enumerate(details, 1):
			
 
				-                group["third_items"].append(TertiaryItem(
			
 
				-                    third_seq=len(group["third_items"]) + 1,
			
 
				-                    third_code=detail.get("third_category_code", ""),
			
 
				-                    third_name=detail.get("third_category_name", ""),
			
 
				-                    line_start=detail.get("start_line", 0),
			
 
				-                    line_end=detail.get("end_line", 0),
			
 
				-                    content=detail.get("content", ""),
			
 
				-                    confidence=1.0
			
 
				-                ))
			
 
				+            # 三级分类已由完整性审查(LLM直接解释)替代，不再从chunk收集
			
 
				 
			
 
				         # 构建tertiary_classifications列表
			
 
				         tertiary_list = []
			
@@ -478,47 +466,6 @@ class DocumentProcessor:
 
				             logger.warning(f"二级分类失败: {str(e)}，跳过二级分类", exc_info=True)
			
 
				         return chunks
			
 
				 
			
 
				-    async def _classify_chunks_tertiary(self, chunks: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
			
 
				-        """对chunks进行三级分类，返回处理后的chunks"""
			
 
				-        logger.info(f"{StageName.TERTIARY_CLASSIFICATION.value}: 对内容块进行三级分类")
			
 
				-        await self._push_classification_progress(
			
 
				-            stage="文档分类",
			
 
				-            current=60,
			
 
				-            message=f"正在进行三级分类，共 {len(chunks)} 个内容块..."
			
 
				-        )
			
 
				-
			
 
				-        try:
			
 
				-            cache.save(
			
 
				-                data=chunks,
			
 
				-                subdir="document_temp",
			
 
				-                filename="三级分类输入结果",
			
 
				-                base_cache_dir=CacheBaseDir.CONSTRUCTION_REVIEW
			
 
				-            )
			
 
				-            logger.info("[三级分类] 输入结果已保存到缓存: temp/construction_review/document_temp/三级分类输入结果.json")
			
 
				-        except Exception as e:
			
 
				-            logger.warning(f"[三级分类] 保存缓存失败: {e}")
			
 
				-        try:
			
 
				-            chunk_classifier = self._get_chunk_classifier()
			
 
				-
			
 
				-            async def _tertiary_progress(completed: int, total: int, section_name: str, success: bool):
			
 
				-                """将三级分类的 section 级进度映射到 60%→90%"""
			
 
				-                if total > 0:
			
 
				-                    current = 60 + int(completed / total * 30)
			
 
				-                    status = "完成" if success else "失败"
			
 
				-                    await self._push_classification_progress(
			
 
				-                        stage="文档分类",
			
 
				-                        current=current,
			
 
				-                        message=f"三级分类中：{section_name} {status} [{completed}/{total}]"
			
 
				-                    )
			
 
				-
			
 
				-            chunks = await chunk_classifier.classify_chunks_tertiary_async(
			
 
				-                chunks, progress_callback=_tertiary_progress
			
 
				-            )
			
 
				-            logger.info("三级分类完成")
			
 
				-        except Exception as e:
			
 
				-            logger.warning(f"三级分类失败: {str(e)}，跳过三级分类", exc_info=True)
			
 
				-        return chunks
			
 
				-
			
 
				     async def _cache_unified_structure(self, unified_doc: UnifiedDocumentStructure) -> None:
			
 
				         """
			
 
				         缓存统一文档结构（二级分类后、三级分类前）
			
@@ -539,64 +486,6 @@ class DocumentProcessor:
 
				         except Exception as e:
			
 
				             logger.warning(f"[缓存] 保存统一文档结构失败: {e}", exc_info=True)
			
 
				 
			
 
				-    async def _cache_tertiary_results(
			
 
				-        self,
			
 
				-        unified_doc: UnifiedDocumentStructure,
			
 
				-        chunks: List[Dict[str, Any]]
			
 
				-    ) -> None:
			
 
				-        """
			
 
				-        缓存三级分类结果
			
 
				-
			
 
				-        保存路径：
			
 
				-        - temp/construction_review/document_temp/三级分类结果.json
			
 
				-        - temp/construction_review/document_temp/三级分类_chunks.json
			
 
				-        """
			
 
				-        try:
			
 
				-            # 缓存统一文档结构
			
 
				-            cache_path = cache.save(
			
 
				-                data=unified_doc.to_dict(),
			
 
				-                subdir='document_temp',
			
 
				-                filename='三级分类结果',
			
 
				-                base_cache_dir=CacheBaseDir.CONSTRUCTION_REVIEW
			
 
				-            )
			
 
				-
			
 
				-            logger.info(f"[缓存] 三级分类结果已保存: {cache_path}")
			
 
				-            logger.info(f"[缓存] 包含 {unified_doc.secondary_count} 个二级分类, {unified_doc.tertiary_count} 个三级分类")
			
 
				-
			
 
				-            # 详细统计
			
 
				-            for t in unified_doc.tertiary_classifications:
			
 
				-                logger.info(f"[缓存] 三级分类 {t.second_code}: {len(t.third_items)} 个细项")
			
 
				-
			
 
				-            # 缓存chunks（简化版，只保留关键字段）
			
 
				-            # 如果外部未传入 chunks，从 legacy_dict 中提取
			
 
				-            source_chunks = chunks if chunks else unified_doc.to_legacy_dict().get("chunks", [])
			
 
				-            chunks_summary = []
			
 
				-            for chunk in source_chunks:
			
 
				-                summary = {
			
 
				-                    "chunk_id": chunk.get("chunk_id"),
			
 
				-                    "chapter_classification": chunk.get("chapter_classification"),
			
 
				-                    "secondary_category_code": chunk.get("secondary_category_code"),
			
 
				-                    "section_label": chunk.get("section_label"),
			
 
				-                    "content_length": len(chunk.get("review_chunk_content", "") or chunk.get("content", "")),
			
 
				-                    "tertiary_classification_details": chunk.get("tertiary_classification_details", []),
			
 
				-                }
			
 
				-                chunks_summary.append(summary)
			
 
				-
			
 
				-            chunks_cache_path = cache.save(
			
 
				-                data={
			
 
				-                    "total_chunks": len(source_chunks),
			
 
				-                    "chunks": chunks_summary
			
 
				-                },
			
 
				-                subdir='document_temp',
			
 
				-                filename='三级分类_chunks',
			
 
				-                base_cache_dir=CacheBaseDir.CONSTRUCTION_REVIEW
			
 
				-            )
			
 
				-
			
 
				-            logger.info(f"[缓存] 三级分类chunks已保存: {chunks_cache_path}")
			
 
				-
			
 
				-        except Exception as e:
			
 
				-            logger.warning(f"[缓存] 保存三级分类结果失败: {e}", exc_info=True)
			
 
				-
			
 
				     async def _push_classification_progress(self, stage: str, current: int, message: str) -> None:
			
 
				         """推送分类阶段进度，并同步更新心跳共享状态"""
			
 
				         if self._progress_state is not None:
			
--- a/core/construction_review/component/infrastructure/relevance.py
+++ b/core/construction_review/component/infrastructure/relevance.py
@@ -1,29 +1,24 @@
 
				 import asyncio
			
 
				 import json
			
 
				 import re
			
 
				-import requests
			
 
				+
			
 
				+from foundation.ai.models.model_handler import model_handler
			
 
				 
			
 
				 
			
 
				 # ===============================
			
 
				-# 1) 最小 async LLM 调用（等价 curl）
			
 
				+# 1) LLM 调用（通过统一模型管理，使用 蜀天122B）
			
 
				 # ===============================
			
 
				+def _build_messages(prompt: str):
			
 
				+    """构建 LangChain 消息格式"""
			
 
				+    from langchain_core.messages import HumanMessage
			
 
				+    return [HumanMessage(content=prompt)]
			
 
				+
			
 
				+
			
 
				 async def qwen_chat_async(prompt: str) -> str:
			
 
				-    def _call():
			
 
				-        url = "http://192.168.91.253:8003/v1/chat/completions"
			
 
				-        headers = {
			
 
				-            "Content-Type": "application/json",
			
 
				-            "Authorization": "Bearer sk-123456",
			
 
				-        }
			
 
				-        payload = {
			
 
				-            "model": "qwen3-30b",
			
 
				-            "messages": [{"role": "user", "content": prompt}],
			
 
				-        }
			
 
				-        resp = requests.post(url, json=payload, headers=headers, timeout=60)
			
 
				-        resp.raise_for_status()
			
 
				-        return resp.json()["choices"][0]["message"]["content"]
			
 
				-
			
 
				-    loop = asyncio.get_running_loop()
			
 
				-    return await loop.run_in_executor(None, _call)
			
 
				+    llm = model_handler.get_model_by_function("relevance_judge")
			
 
				+    messages = _build_messages(prompt)
			
 
				+    response = await llm.ainvoke(messages)
			
 
				+    return response.content if hasattr(response, 'content') else str(response)
			
 
				 
			
 
				 
			
 
				 # ===============================
			
--- a/core/construction_review/component/minimal_pipeline/simple_processor.py
+++ b/core/construction_review/component/minimal_pipeline/simple_processor.py
@@ -177,21 +177,9 @@ class SimpleDocumentProcessor:
 
				             return structure, primary_result, secondary_result, chunks, catalog
			
 
				         await self._emit_progress(progress_callback, "文档切分", 50, f"组装 {len(chunks)} 个内容块")
			
 
				 
			
 
				-        # 5. 三级分类
			
 
				-        async def _tertiary_progress(completed: int, total: int, section_name: str, success: bool):
			
 
				-            if total > 0:
			
 
				-                current = 60 + int(completed / total * 30)
			
 
				-                status = "完成" if success else "失败"
			
 
				-                await self._emit_progress(
			
 
				-                    progress_callback, "文档分类", current,
			
 
				-                    f"三级分类中：{section_name} {status} [{completed}/{total}]"
			
 
				-                )
			
 
				-
			
 
				-        chunks = await self.chunk_classifier.classify_chunks_tertiary_async(
			
 
				-            chunks, progress_callback=_tertiary_progress
			
 
				-        )
			
 
				-        logger.info("[SimpleProcessor] 三级分类完成")
			
 
				-        await self._emit_progress(progress_callback, "文档分类", 90, "三级分类完成")
			
 
				+        # 5. 三级分类已移除——完整性审查改为LLM直接解释，不再需要预分类
			
 
				+        logger.info("[SimpleProcessor] 跳过三级分类（已由LLM直接完整性审查替代）")
			
 
				+        await self._emit_progress(progress_callback, "文档分类", 90, "文档处理完成")
			
 
				 
			
 
				         # 验证返回前的catalog
			
 
				         if catalog:
			
@@ -316,17 +304,7 @@ class SimpleDocumentProcessor:
 
				                 else:
			
 
				                     group["second_content"] = content
			
 
				 
			
 
				-            details = chunk.get("tertiary_classification_details", [])
			
 
				-            for detail in details:
			
 
				-                group["third_items"].append(TertiaryItem(
			
 
				-                    third_seq=len(group["third_items"]) + 1,
			
 
				-                    third_code=detail.get("third_category_code", ""),
			
 
				-                    third_name=detail.get("third_category_name", ""),
			
 
				-                    line_start=detail.get("start_line", 0),
			
 
				-                    line_end=detail.get("end_line", 0),
			
 
				-                    content=detail.get("content", ""),
			
 
				-                    confidence=1.0,
			
 
				-                ))
			
 
				+            # 三级分类已由LLM直接完整性审查替代，不再从chunk收集
			
 
				 
			
 
				         tertiary_list = []
			
 
				         second_seq = 0
			
--- a/core/construction_review/component/outline_catalogue_matcher.py
+++ b/core/construction_review/component/outline_catalogue_matcher.py
@@ -8,7 +8,6 @@
 
				 """
			
 
				 
			
 
				 import difflib
			
 
				-import logging
			
 
				 import re
			
 
				 from typing import Dict, List, Optional, Set, Tuple, Any
			
 
				 from collections import defaultdict
			
@@ -16,7 +15,7 @@ from pathlib import Path
 
				 
			
 
				 import pandas as pd
			
 
				 
			
 
				-logger = logging.getLogger(__name__)
			
 
				+from foundation.observability.logger.loggering import review_logger as logger
			
 
				 
			
 
				 
			
 
				 class OutlineCatalogueMatcher:
			
--- a/core/construction_review/component/reviewers/reference_basis_reviewer/punctuation_result_processor.py
+++ b/core/construction_review/component/reviewers/reference_basis_reviewer/punctuation_result_processor.py
@@ -9,10 +9,6 @@ import re
 
				 from typing import Dict, List, Literal, Optional
			
 
				 
			
 
				 from pydantic import BaseModel, Field, ValidationError
			
 
				-from langchain_core.prompts import ChatPromptTemplate
			
 
				-from langchain_core.output_parsers import PydanticOutputParser, StrOutputParser
			
 
				-from langchain_openai import ChatOpenAI
			
 
				-
			
 
				 # 多模型投票已移除，格式建议直接返回基础建议
			
 
				 
			
 
				 
			
@@ -107,25 +103,7 @@ HUMAN = """
 
				 /no_think
			
 
				 """
			
 
				 
			
 
				-# ===== 3) Output Parser =====
			
 
				-parser = PydanticOutputParser(pydantic_object=PunctuationIssueResults)
			
 
				-
			
 
				-# ===== 4) Prompt =====
			
 
				-prompt = ChatPromptTemplate.from_messages([
			
 
				-    ("system", SYSTEM),
			
 
				-    ("human", HUMAN)
			
 
				-])
			
 
				-
			
 
				-# ===== 5) LLM =====
			
 
				-llm = ChatOpenAI(
			
 
				-    model="qwen3-30b",
			
 
				-    base_url="http://192.168.91.253:8003/v1",
			
 
				-    api_key="sk-123456",
			
 
				-    temperature=0,
			
 
				-)
			
 
				-
			
 
				-
			
 
				-# ===== 6) 提取第一个 JSON =====
			
 
				+# ===== 3) 提取第一个 JSON =====
			
 
				 def extract_first_json(text: str) -> dict:
			
 
				     """从任意模型输出中提取第一个完整 JSON 对象 { ... }"""
			
 
				     start = text.find("{")
			
--- a/core/construction_review/component/reviewers/timeliness_reviewer.py
+++ b/core/construction_review/component/reviewers/timeliness_reviewer.py
@@ -3,7 +3,7 @@
 
				 """
			
 
				 统一时效性审查模块
			
 
				 
			
 
				-整合原 standard_timeliness_reviewer、timeliness_basis_reviewer、timeliness_content_reviewer 的功能，
			
 
				+整合原 standard_timeliness_reviewer、timeliness_basis_reviewer 的功能，
			
 
				 提供统一的时效性审查入口。
			
 
				 
			
 
				 主要组件：
			
@@ -447,7 +447,7 @@ class StandardTimelinessReviewer:
 
				                 trace_id=f"timeliness_mismatch_{self.callback_task_id or 'default'}_{result.seq_no}",
			
 
				                 system_prompt=system_prompt,
			
 
				                 user_prompt=user_prompt,
			
 
				-                model_name="shutian_qwen3_5_122b",
			
 
				+                function_name="timeliness_review",
			
 
				                 enable_thinking=False
			
 
				             )
			
 
				             payload = self._extract_first_json(raw)
			
--- a/core/construction_review/workflows/ai_review_workflow.py
+++ b/core/construction_review/workflows/ai_review_workflow.py
@@ -346,14 +346,11 @@ class AIReviewWorkflow:
 
				             else:
			
 
				                 outline_content_str = self._build_outline_text(original_outline)
			
 
				 
			
 
				-            # 筛选与合并章节内容
			
 
				+            # 筛选章节内容（完整性审查与其他审查共用同一chunk，无需合并标记）
			
 
				             filtered_chunks = [
			
 
				                 chunk for chunk in original_chunks
			
 
				                 if chunk.get("chapter_classification") in review_item_dict_sorted.keys()
			
 
				             ]
			
 
				-            filtered_chunks = self.core_fun._merge_chunks_for_completeness_check(
			
 
				-                filtered_chunks, review_item_dict_sorted
			
 
				-            )
			
 
				             cache.filtered_chunks(filtered_chunks, base_cache_dir=CacheBaseDir.CONSTRUCTION_REVIEW)
			
 
				 
			
 
				             # 计算总任务数
			
@@ -377,7 +374,6 @@ class AIReviewWorkflow:
 
				                 "chapter": "目录",
			
 
				                 "title": "目录",
			
 
				                 "original_content": outline_content_str,
			
 
				-                "is_complete_field": True
			
 
				             }]
			
 
				 
			
 
				             # ===== Phase 3: 分章节执行审查 =====
			
--- a/core/construction_review/workflows/core_functions/ai_review_core_fun.py
+++ b/core/construction_review/workflows/core_functions/ai_review_core_fun.py
@@ -262,15 +262,14 @@ class AIReviewCoreFun:
 
				         rag_enhanced_content = None  # 初始化变量，避免作用域错误
			
 
				         basis_content = None  # 初始化变量，避免作用域错误
			
 
				         rows_df = None
			
 
				-        is_complete_field = chunk.get('is_complete_field', False)
			
 
				-        logger.info(f"检查is_complete_field值是否正常: {is_complete_field}")
			
 
				+
			
 
				         # 只有非完整性审查的chunk才执行RAG检索（注意括号位置，确保运算符优先级正确）
			
 
				-        if ('check_parameter_compliance' in func_names or 'check_non_parameter_compliance' in func_names) and not is_complete_field:
			
 
				+        if 'check_parameter_compliance' in func_names or 'check_non_parameter_compliance' in func_names:
			
 
				             logger.debug("开始执行RAG检索增强")
			
 
				             rag_enhanced_content = self.ai_review_engine.rag_enhanced_check(chunk.get('content', ''))
			
 
				 
			
 
				         if ('reference_basis_reviewer' in func_names or 'timeliness_reviewer' in func_names or
			
 
				-            'timeliness_basis_reviewer' in func_names or 'timeliness_content_reviewer' in func_names) and not is_complete_field:
			
 
				+            'timeliness_basis_reviewer' in func_names):
			
 
				             logger.debug("开始执行编制依据/时效性预处理")
			
 
				             # 预处理编制依据/时效性审查所需内容
			
 
				             basis_content = await directory_extraction.extract_basis(
			
@@ -376,11 +375,10 @@ class AIReviewCoreFun:
 
				 
			
 
				         # 获取块内容
			
 
				         review_content = chunk.get("content", "")
			
 
				-        is_complete_field = chunk.get("is_complete_field", False)
			
 
				         logger.debug(f"执行审查: {trace_id} -> {func_name}")
			
 
				 
			
 
				         # 根据func_name构建对应的参数并调用
			
 
				-        if func_name == "grammar_check" and not is_complete_field:
			
 
				+        if func_name == "grammar_check":
			
 
				             raw_result = await method(trace_id, review_content, state, stage_name)
			
 
				             # 基础审查方法，放入 basic_compliance
			
 
				             return UnitReviewResult(
			
@@ -393,7 +391,7 @@ class AIReviewCoreFun:
 
				                 is_sse_push=True
			
 
				             )
			
 
				 
			
 
				-        elif func_name == "check_semantic_logic" and not is_complete_field:
			
 
				+        elif func_name == "check_semantic_logic":
			
 
				             raw_result = await method(trace_id, review_content, state, stage_name)
			
 
				             # 基础审查方法，放入 basic_compliance
			
 
				             return UnitReviewResult(
			
@@ -406,7 +404,7 @@ class AIReviewCoreFun:
 
				                 is_sse_push=True
			
 
				             )
			
 
				 
			
 
				-        elif func_name == "check_sensitive" and not is_complete_field:
			
 
				+        elif func_name == "check_sensitive":
			
 
				             raw_result = await method(trace_id, review_content, state, stage_name)
			
 
				             # 基础审查方法，放入 basic_compliance
			
 
				             return UnitReviewResult(
			
@@ -419,7 +417,7 @@ class AIReviewCoreFun:
 
				                 is_sse_push=True
			
 
				             )
			
 
				 
			
 
				-        elif func_name == "check_completeness" and is_complete_field:
			
 
				+        elif func_name == "check_completeness":
			
 
				             # check_completeness 需要列表类型，将单个 chunk 包装成列表
			
 
				             completeness_result, trace_id_idx = await method(trace_id, [chunk], state, stage_name)
			
 
				 
			
@@ -470,7 +468,7 @@ class AIReviewCoreFun:
 
				                 overall_risk=self._calculate_single_result_risk(outline_result),
			
 
				                 is_sse_push=True
			
 
				             )
			
 
				-        elif func_name == "check_non_parameter_compliance" and not is_complete_field:
			
 
				+        elif func_name == "check_non_parameter_compliance":
			
 
				             # 技术审查方法需要从 RAG 检索结果中获取 references
			
 
				             raw_result = await self._execute_technical_review(
			
 
				                 method, trace_id, review_content, chunk, state, stage_name, rag_enhanced_content, func_name
			
@@ -486,7 +484,7 @@ class AIReviewCoreFun:
 
				                 is_sse_push=True
			
 
				             )
			
 
				 
			
 
				-        elif func_name == "check_parameter_compliance" and not is_complete_field:
			
 
				+        elif func_name == "check_parameter_compliance":
			
 
				             # 技术审查方法需要从 RAG 检索结果中获取 references
			
 
				             raw_result = await self._execute_technical_review(
			
 
				                 method, trace_id, review_content, chunk, state, stage_name, rag_enhanced_content, func_name
			
@@ -505,7 +503,7 @@ class AIReviewCoreFun:
 
				 
			
 
				 
			
 
				         # reference_basis_reviewer：规范性审查（逐块处理，支持basis和其他章节）
			
 
				-        elif func_name == "reference_basis_reviewer" and not is_complete_field:
			
 
				+        elif func_name == "reference_basis_reviewer":
			
 
				             review_data = {
			
 
				                 "content": review_content,  # 原始文本内容
			
 
				                 "basis_items": basis_content,  # 提取的 BasisItems 对象（basis章节使用）
			
@@ -530,7 +528,7 @@ class AIReviewCoreFun:
 
				             )
			
 
				 
			
 
				         # timeliness_reviewer：统一的时效性审查入口（支持basis和content两种来源）
			
 
				-        elif func_name in ("timeliness_basis_reviewer", "timeliness_content_reviewer", "timeliness_reviewer") and not is_complete_field:
			
 
				+        elif func_name in ("timeliness_basis_reviewer", "timeliness_reviewer"):
			
 
				             review_data = {
			
 
				                 "content": review_content,  # 原始文本内容
			
 
				                 "basis_items": basis_content,  # 提取的 BasisItems 对象（可能为None）
			
@@ -553,21 +551,7 @@ class AIReviewCoreFun:
 
				             )
			
 
				 
			
 
				         else:
			
 
				-            # 处理 check_completeness 但 is_complete_field=False 的情况
			
 
				-            if func_name == "check_completeness" and not is_complete_field:
			
 
				-                logger.debug(f"跳过 {func_name}，当前 chunk 不是完整性审查类型")
			
 
				-                return UnitReviewResult(
			
 
				-                    unit_index=chunk_index,
			
 
				-                    unit_content=chunk,
			
 
				-                    basic_compliance={},
			
 
				-                    technical_compliance={},
			
 
				-                    rag_enhanced={},
			
 
				-                    overall_risk="low",
			
 
				-                    is_sse_push=False  # 不推送，因为跳过了
			
 
				-                )
			
 
				-
			
 
				             logger.warning(f"未知的审查方法: {func_name}")
			
 
				-            logger.warning(f"is_complete_field: {is_complete_field}")
			
 
				             return UnitReviewResult(
			
 
				                 unit_index=chunk_index,
			
 
				                 unit_content=chunk,
			
@@ -1221,96 +1205,3 @@ class AIReviewCoreFun:
 
				             review_item_dict_sorted[key] = review_item_dict[key]
			
 
				         return review_item_dict_sorted
			
 
				 
			
 
				-    def _merge_chunks_for_completeness_check(
			
 
				-        self,
			
 
				-        chunks: List[Dict[str, Any]],
			
 
				-        review_item_dict: Dict[str, List[str]]
			
 
				-    ) -> List[Dict[str, Any]]:
			
 
				-        """
			
 
				-        筛选包含完整性审查的分类，标记该章节的第一个chunk进行完整性审查
			
 
				-
			
 
				-        Args:
			
 
				-            chunks: 筛选后的chunks列表
			
 
				-            review_item_dict: 审查项字典 {chapter_code: [func_names]}
			
 
				-
			
 
				-        Returns:
			
 
				-            List[Dict[str, Any]]: 标记后的chunks列表，并按页码排序
			
 
				-
			
 
				-        Note:
			
 
				-            标记规则：
			
 
				-            1. 找出包含 'check_completeness' 的章节分类
			
 
				-            2. 给所有chunk添加 is_complete_field=False
			
 
				-            3. 对每个需要完整性审查的章节，标记第一个chunk的 is_complete_field=True
			
 
				-            4. 按页码排序
			
 
				-            5. 完整性审查时，check_completeness方法会从state获取该章节的所有原始chunks
			
 
				-        """
			
 
				-        try:
			
 
				-            # 1. 找出包含完整性审查的章节分类
			
 
				-            completeness_chapters = set()
			
 
				-            for chapter_code, func_names in review_item_dict.items():
			
 
				-                if 'check_completeness' in func_names or 'outline_check' in func_names:
			
 
				-                    completeness_chapters.add(chapter_code)
			
 
				-
			
 
				-            if not completeness_chapters:
			
 
				-                logger.info("没有包含完整性审查的章节，无需合并")
			
 
				-                return chunks
			
 
				-
			
 
				-            logger.info(f"包含完整性审查的章节分类: {completeness_chapters}")
			
 
				-
			
 
				-            # 2. 筛选出需要合并的chunks（属于完整性审查章节的）
			
 
				-            chunks_to_merge = []
			
 
				-            for chunk in chunks:
			
 
				-                chapter_code = chunk.get("chapter_classification", "")
			
 
				-                if chapter_code in completeness_chapters:
			
 
				-                    chunks_to_merge.append(chunk)
			
 
				-
			
 
				-            if not chunks_to_merge:
			
 
				-                logger.info("没有找到需要合并的chunks")
			
 
				-                return chunks
			
 
				-
			
 
				-            # 3. 按章节分组（章节定义：去除->及其之后的内容）
			
 
				-            chapter_groups = {}
			
 
				-            for chunk in chunks_to_merge:
			
 
				-                chapter_full = chunk.get("chapter", chunk.get("section_label", ""))
			
 
				-                # 提取章节名：去除->及其之后的内容
			
 
				-                chapter_name = chapter_full.split("->")[0].strip() if "->" in chapter_full else chapter_full
			
 
				-
			
 
				-                if chapter_name not in chapter_groups:
			
 
				-                    chapter_groups[chapter_name] = []
			
 
				-                chapter_groups[chapter_name].append(chunk)
			
 
				-
			
 
				-            logger.info(f"按章节分组完成，共 {len(chapter_groups)} 个章节需要合并")
			
 
				-
			
 
				-            # 4. 标记完整性审查章节的第一个chunk
			
 
				-            # 给所有原chunk添加 is_complete_field: False
			
 
				-            result_chunks = []
			
 
				-            for chunk in chunks:
			
 
				-                chunk_copy = chunk.copy()
			
 
				-                chunk_copy["is_complete_field"] = False
			
 
				-                result_chunks.append(chunk_copy)
			
 
				-
			
 
				-            for chapter_name, chapter_chunk_list in chapter_groups.items():
			
 
				-                # 按page升序排列
			
 
				-                chapter_chunk_list.sort(key=lambda x: int(x.get("page", 0)) if str(x.get("page", 0)).isdigit() else x.get("page", 0))
			
 
				-
			
 
				-                # 找到该章节在result_chunks中的第一个chunk并标记
			
 
				-                first_chunk_id = chapter_chunk_list[0].get('chunk_id')
			
 
				-                for rc in result_chunks:
			
 
				-                    if rc.get('chunk_id') == first_chunk_id:
			
 
				-                        rc["is_complete_field"] = True
			
 
				-                        logger.info(f"[完整性审查] 章节 '{chapter_name}' 标记第一个chunk (chunk_id={first_chunk_id}) 用于完整性审查")
			
 
				-                        break
			
 
				-
			
 
				-            # 5. 按页码排序
			
 
				-            result_chunks.sort(
			
 
				-                key=lambda x: int(x.get("page", 0)) if str(x.get("page", 0)).isdigit() else x.get("page", 0)
			
 
				-            )
			
 
				-
			
 
				-            logger.info(f"完整性审查标记完成: 共 {len(result_chunks)} 个chunk，章节数: {len(chapter_groups)}")
			
 
				-
			
 
				-            return result_chunks
			
 
				-
			
 
				-        except Exception as e:
			
 
				-            logger.error(f"合并chunks失败: {str(e)}", exc_info=True)
			
 
				-            # 出错时返回原始列表
			
 
				-            return chunks