1 mese fa · 812fabb9b4
--- a/core/construction_review/component/ai_review_engine.py
+++ b/core/construction_review/component/ai_review_engine.py
@@ -756,12 +756,7 @@ class AIReviewEngine(BaseReviewer):
 
				                 risk_level, risk_level_en = _level_risk.get(level, ("中风险", "medium"))
			
 
				                 issue_point = rec.get('issue_point', '')
			
 
				                 location = rec.get('location', '')
			
 
				-                # 三级缺失：将 location 中的标准分类名替换为文档实际章节名，信息更直观
			
 
				-                if level == '三级' and chapter_name and ' > ' in location:
			
 
				-                    sec_part = location.split(' > ', 1)[1]
			
 
				-                    new_location = f"{chapter_name} > {sec_part}"
			
 
				-                    issue_point = issue_point.replace(location, new_location, 1)
			
 
				-                    location = new_location
			
 
				+                # location 已从 completeness_reviewer 获取实际章节名，无需额外处理
			
 
				                 # 按顺序构建响应字段（first_seq -> second_seq -> third_seq 相邻）
			
 
				                 response_item = {
			
 
				                     "check_item": "completeness_check",
			
@@ -901,14 +896,16 @@ class AIReviewEngine(BaseReviewer):
 
				         logger.info(f"[{name}] 开始LLM目录完整性检查")
			
 
				 
			
 
				         try:
			
 
				-            # 获取 catalog 的标准格式文本
			
 
				+            # 获取 catalog 的标准格式文本和目录页页码
			
 
				             formatted_text = ""
			
 
				+            toc_page_range = None
			
 
				 
			
 
				-            # 优先从 catalog.formatted_text 获取
			
 
				+            # 优先从 catalog 获取
			
 
				             if outline_data and isinstance(outline_data, dict):
			
 
				                 catalog_raw = outline_data.get('catalog')
			
 
				                 if catalog_raw and isinstance(catalog_raw, dict):
			
 
				                     formatted_text = catalog_raw.get('formatted_text', '')
			
 
				+                    toc_page_range = catalog_raw.get('toc_page_range')
			
 
				 
			
 
				             # 回退到从 state 获取
			
 
				             if not formatted_text and state and isinstance(state, dict):
			
@@ -916,6 +913,7 @@ class AIReviewEngine(BaseReviewer):
 
				                 catalog_raw = structured.get('catalog')
			
 
				                 if catalog_raw and isinstance(catalog_raw, dict):
			
 
				                     formatted_text = catalog_raw.get('formatted_text', '')
			
 
				+                    toc_page_range = catalog_raw.get('toc_page_range')
			
 
				 
			
 
				             # 如果没有标准格式，从 chapters 构建
			
 
				             if not formatted_text:
			
@@ -976,7 +974,7 @@ class AIReviewEngine(BaseReviewer):
 
				 
			
 
				             # 使用 CatalogReviewer 进行审查
			
 
				             reviewer = CatalogReviewer()
			
 
				-            result = await reviewer.review(formatted_text, trace_id_idx)
			
 
				+            result = await reviewer.review(formatted_text, trace_id_idx, toc_page_range)
			
 
				 
			
 
				             logger.info(f"[DEBUG][{name}] 检查完成，返回结果")
			
 
				             logger.info(f"[DEBUG][{name}] result type: {type(result)}")
			
--- a/core/construction_review/component/minimal_pipeline/catalog_reviewer.py
+++ b/core/construction_review/component/minimal_pipeline/catalog_reviewer.py
@@ -32,6 +32,7 @@ class CatalogReviewer:
 
				         "check_result": {
			
 
				           "issue_point": "【一级缺失】第四章 施工工艺技术",
			
 
				           "location": "目录页",
			
 
				+          "page": 3,
			
 
				           "suggestion": "建议补充'第四章 施工工艺技术'章节",
			
 
				           "reason": "目录页缺少该章节",
			
 
				           "risk_level": "高风险"
			
@@ -46,6 +47,7 @@ class CatalogReviewer:
 
				         "check_result": {
			
 
				           "issue_point": "【一级缺失】第十章 其他资料",
			
 
				           "location": "目录页",
			
 
				+          "page": 3,
			
 
				           "suggestion": "建议补充'第十章 其他资料'章节",
			
 
				           "reason": "目录页缺少该章节",
			
 
				           "risk_level": "高风险"
			
@@ -60,6 +62,7 @@ class CatalogReviewer:
 
				         "check_result": {
			
 
				           "issue_point": "【二级缺失】第一章 编制依据 - 四、编制原则",
			
 
				           "location": "第一章",
			
 
				+          "page": 3,
			
 
				           "suggestion": "建议补充'四、编制原则'",
			
 
				           "reason": "第一章缺少该二级目录",
			
 
				           "risk_level": "中风险"
			
@@ -157,13 +160,15 @@ class CatalogReviewer:
 
				 三、附图附表
			
 
				 四、编制及审核人员情况"""
			
 
				 
			
 
				-    async def review(self, actual_catalog_text: str, trace_id_idx: str = "") -> Dict[str, Any]:
			
 
				+    async def review(self, actual_catalog_text: str, trace_id_idx: str = "",
			
 
				+                      toc_page_range: Dict[str, int] = None) -> Dict[str, Any]:
			
 
				         """
			
 
				         审查目录完整性
			
 
				 
			
 
				         Args:
			
 
				             actual_catalog_text: 实际目录文本（标准格式）
			
 
				             trace_id_idx: 追踪ID索引
			
 
				+            toc_page_range: 目录页页码范围，如 {"start": 3, "end": 4}
			
 
				 
			
 
				         Returns:
			
 
				             对齐 completeness_check 格式的结果字典
			
@@ -174,7 +179,7 @@ class CatalogReviewer:
 
				         try:
			
 
				             from foundation.ai.agent.generate.model_generate import generate_model_client
			
 
				 
			
 
				-            prompt = self._build_prompt(actual_catalog_text)
			
 
				+            prompt = self._build_prompt(actual_catalog_text, toc_page_range)
			
 
				 
			
 
				             # 重试机制：最多3次
			
 
				             max_retries = 3
			
@@ -251,10 +256,21 @@ class CatalogReviewer:
 
				                 "execution_time": execution_time
			
 
				             }
			
 
				 
			
 
				-    def _build_prompt(self, actual_catalog_text: str) -> str:
			
 
				+    def _build_prompt(self, actual_catalog_text: str,
			
 
				+                       toc_page_range: Dict[str, int] = None) -> str:
			
 
				         """构建审查Prompt"""
			
 
				         json_example = self._JSON_EXAMPLE_TEMPLATE
			
 
				 
			
 
				+        # 构建页码信息说明
			
 
				+        page_info = ""
			
 
				+        if toc_page_range:
			
 
				+            start_page = toc_page_range.get('start', 3)
			
 
				+            end_page = toc_page_range.get('end', 3)
			
 
				+            if start_page == end_page:
			
 
				+                page_info = f"目录页位于第 {start_page} 页"
			
 
				+            else:
			
 
				+                page_info = f"目录页位于第 {start_page}-{end_page} 页"
			
 
				+
			
 
				         # 基础 JSON 模板（使用单引号字符串避免 f-string 转义问题）
			
 
				         base_template = '''{
			
 
				   "details": {
			
@@ -267,6 +283,7 @@ class CatalogReviewer:
 
				         "check_result": {
			
 
				           "issue_point": "【一级缺失】xxx",
			
 
				           "location": "目录页",
			
 
				+          "page": 3,
			
 
				           "suggestion": "建议补充'xxx'章节",
			
 
				           "reason": "简要说明",
			
 
				           "risk_level": "高风险"
			
@@ -281,6 +298,29 @@ class CatalogReviewer:
 
				   "success": true
			
 
				 }'''
			
 
				 
			
 
				+        page_instruction = f"""
			
 
				+## 页码信息
			
 
				+{page_info if page_info else "目录页页码未知，统一使用 page=3"}
			
 
				+
			
 
				+## 输出格式要求
			
 
				+check_result 中必须包含以下字段：
			
 
				+- issue_point: 问题描述
			
 
				+- location: 问题定位（一级缺失填"目录页"，二级缺失填对应的一级章节名）
			
 
				+- page: 页码数字（{toc_page_range.get('start', 3) if toc_page_range else 3}）
			
 
				+- suggestion: 补充建议
			
 
				+- reason: 原因说明
			
 
				+- risk_level: 风险等级（"高风险"或"中风险"）
			
 
				+""" if toc_page_range else """
			
 
				+## 输出格式要求
			
 
				+check_result 中必须包含以下字段：
			
 
				+- issue_point: 问题描述
			
 
				+- location: 问题定位（一级缺失填"目录页"，二级缺失填对应的一级章节名）
			
 
				+- page: 页码数字（统一使用 3）
			
 
				+- suggestion: 补充建议
			
 
				+- reason: 原因说明
			
 
				+- risk_level: 风险等级（"高风险"或"中风险"）
			
 
				+"""
			
 
				+
			
 
				         return f"""你是一位施工方案文档审查专家。请对比【实际目录】和【标准目录】，找出缺失项。
			
 
				 
			
 
				 ## 审查原则
			
@@ -329,6 +369,8 @@ class CatalogReviewer:
 
				 - 一级缺失：risk_level 为 "高风险", risk_info.risk_level 为 "high"
			
 
				 - 二级缺失：risk_level 为 "中风险", risk_info.risk_level 为 "medium"
			
 
				 - 如无缺失，response 中放一条 "issue_point": "【目录完整】一二级目录结构完整", "exist_issue": false
			
 
				+
			
 
				+{page_instruction}
			
 
				 """
			
 
				 
			
 
				     def _extract_json(self, content: str) -> Optional[Dict[str, Any]]:
			
--- a/core/construction_review/component/minimal_pipeline/toc_detector.py
+++ b/core/construction_review/component/minimal_pipeline/toc_detector.py
@@ -142,6 +142,13 @@ class TOCCatalogExtractor:
 
				 
			
 
				             catalog = self._parse_toc_text(toc_text)
			
 
				 
			
 
				+            # 添加目录页页码范围（1-based）
			
 
				+            if toc_pages:
			
 
				+                catalog["toc_page_range"] = {
			
 
				+                    "start": toc_pages[0] + 1,  # 转换为1-based页码
			
 
				+                    "end": toc_pages[-1] + 1
			
 
				+                }
			
 
				+
			
 
				             if progress_callback:
			
 
				                 progress_callback("目录识别", 100, f"目录提取完成，共{catalog['total_chapters']}章")
			
 
				 
			
--- a/core/construction_review/component/reviewers/completeness_reviewer.py
+++ b/core/construction_review/component/reviewers/completeness_reviewer.py
@@ -482,7 +482,8 @@ JSON输出："""
 
				         recommendations = await self._generate_recommendations(
			
 
				             tertiary_result, catalogue_result, outline_result,
			
 
				             actual_first, actual_secondary, actual_tertiary,
			
 
				-            chapter_classification
			
 
				+            chapter_classification,
			
 
				+            chunks  # 传入 chunks 用于获取实际章节名
			
 
				         )
			
 
				 
			
 
				         return LightweightCompletenessResult(
			
@@ -856,6 +857,62 @@ JSON输出："""
 
				         else:
			
 
				             return "incomplete"
			
 
				     
			
 
				+    def _build_section_label_map(self, chunks: List[Dict]) -> Dict[Tuple[str, str], str]:
			
 
				+        """
			
 
				+        从 chunks 构建 (first_code, second_code) -> section_label 映射
			
 
				+        section_label 格式："第一章编制依据->一、法律法规"
			
 
				+        """
			
 
				+        label_map: Dict[Tuple[str, str], str] = {}
			
 
				+        for chunk in chunks:
			
 
				+            metadata = chunk.get("metadata", {})
			
 
				+            cat1 = (metadata.get("chapter_classification") or
			
 
				+                    chunk.get("chapter_classification") or
			
 
				+                    chunk.get("first_code"))
			
 
				+            cat2 = (metadata.get("secondary_category_code") or
			
 
				+                    chunk.get("secondary_category_code") or
			
 
				+                    chunk.get("second_code"))
			
 
				+            section_label = (metadata.get("section_label") or
			
 
				+                             chunk.get("section_label") or
			
 
				+                             "")
			
 
				+            if cat1 and cat2 and section_label:
			
 
				+                label_map[(cat1, cat2)] = section_label
			
 
				+        return label_map
			
 
				+
			
 
				+    def _get_actual_chapter_name(self, label_map: Dict[Tuple[str, str], str],
			
 
				+                                  first_code: str, second_code: str = None) -> str:
			
 
				+        """
			
 
				+        获取实际章节名
			
 
				+        - 一级缺失：返回 first_name（保持原逻辑）
			
 
				+        - 二级缺失：返回一级章节名（section_label.split('->')[0]）
			
 
				+        - 三级缺失：返回二级小节名（section_label.split('->')[-1]）
			
 
				+        """
			
 
				+        if not second_code:
			
 
				+            return self.spec_loader.first_names.get(first_code, first_code)
			
 
				+
			
 
				+        section_label = label_map.get((first_code, second_code), "")
			
 
				+        if not section_label:
			
 
				+            # 回退到标准名称
			
 
				+            sec_item = self.secondary_specs.get((first_code, second_code))
			
 
				+            if sec_item:
			
 
				+                return f"{sec_item.first_cn} > {sec_item.second_cn}"
			
 
				+            return f"{first_code} > {second_code}"
			
 
				+
			
 
				+        parts = section_label.split("->")
			
 
				+        if len(parts) >= 2:
			
 
				+            return parts[-1].strip()  # 返回二级小节名
			
 
				+        return section_label.strip()
			
 
				+
			
 
				+    def _get_actual_first_name(self, label_map: Dict[Tuple[str, str], str],
			
 
				+                                first_code: str) -> str:
			
 
				+        """
			
 
				+        获取实际一级章节名（从任意一个该一级下的 section_label 提取）
			
 
				+        """
			
 
				+        for (fc, sc), label in label_map.items():
			
 
				+            if fc == first_code and "->" in label:
			
 
				+                return label.split("->")[0].strip()
			
 
				+        # 回退到标准名称
			
 
				+        return self.spec_loader.first_names.get(first_code, first_code)
			
 
				+
			
 
				     async def _generate_recommendations(
			
 
				         self,
			
 
				         tertiary_result: Dict,
			
@@ -864,7 +921,8 @@ JSON输出："""
 
				         actual_first: Set[str],
			
 
				         actual_secondary: Set[Tuple[str, str]],
			
 
				         actual_tertiary: Set[Tuple[str, str, str]],
			
 
				-        chapter_classification: Optional[str] = None
			
 
				+        chapter_classification: Optional[str] = None,
			
 
				+        chunks: List[Dict] = None
			
 
				     ) -> List[Dict[str, Any]]:
			
 
				         """
			
 
				         生成结构化分级改进建议。
			
@@ -872,12 +930,15 @@ JSON输出："""
 
				         每条建议包含：
			
 
				           level        : 缺失级别（一级 / 二级 / 三级 / 一致性）
			
 
				           issue_point  : 问题摘要（含级别标识）
			
 
				-          location     : 问题定位路径
			
 
				+          location     : 问题定位路径（使用实际章节名）
			
 
				           suggestion   : 补充建议（使用LLM生成）
			
 
				           reason       : 规范依据说明（使用LLM生成）
			
 
				         """
			
 
				         recommendations: List[Dict[str, Any]] = []
			
 
				 
			
 
				+        # 构建 section_label 映射，用于获取实际章节名
			
 
				+        label_map = self._build_section_label_map(chunks or [])
			
 
				+
			
 
				         # 确定需要检查的一级分类范围
			
 
				         if chapter_classification:
			
 
				             required_first = (
			
@@ -939,15 +1000,18 @@ JSON输出："""
 
				 
			
 
				                 # ── 二级缺失 ──────────────────────────────────────────
			
 
				                 if (cat1, cat2) not in actual_secondary:
			
 
				+                    # 获取实际一级章节名
			
 
				+                    actual_first_name = self._get_actual_first_name(label_map, cat1)
			
 
				+
			
 
				                     # issue_point 和 reason 使用简单拼接
			
 
				-                    issue_point = f"【二级章节缺失】{first_name} > '{second_name}'整个章节不存在"
			
 
				-                    reason = f"依据《桥梁公司危险性较大工程管理实施细则（2025版）》规定，'{first_name}'下应包含'{second_name}'二级章节，当前正文中未发现该章节内容"
			
 
				+                    issue_point = f"【二级章节缺失】{actual_first_name} > '{second_name}'整个章节不存在"
			
 
				+                    reason = f"依据《桥梁公司危险性较大工程管理实施细则（2025版）》规定，'{actual_first_name}'下应包含'{second_name}'二级章节，当前正文中未发现该章节内容"
			
 
				 
			
 
				                     # 尝试使用LLM生成 suggestion
			
 
				                     llm_result = await self._generate_recommendation_with_llm(
			
 
				                         level="二级",
			
 
				                         first_code=cat1,
			
 
				-                        first_name=first_name,
			
 
				+                        first_name=actual_first_name,
			
 
				                         second_code=cat2,
			
 
				                         second_name=second_name,
			
 
				                         first_seq=first_seq,
			
@@ -958,12 +1022,12 @@ JSON输出："""
 
				                         suggestion = llm_result.get("suggestion")
			
 
				                     else:
			
 
				                         # 回退到简单拼接
			
 
				-                        suggestion = f"请在'{first_name}'下添加'{second_name}'章节内容"
			
 
				+                        suggestion = f"请在'{actual_first_name}'下添加'{second_name}'章节内容"
			
 
				 
			
 
				                     recommendations.append({
			
 
				                         "level": "二级",
			
 
				                         "issue_point": issue_point,
			
 
				-                        "location": f"{first_name} > {second_name}",
			
 
				+                        "location": actual_first_name,  # 二级缺失定位到一级章节
			
 
				                         "suggestion": suggestion,
			
 
				                         "reason": reason,
			
 
				                         "first_seq": first_seq,
			
@@ -986,6 +1050,9 @@ JSON输出："""
 
				                 if not missing_t_items:
			
 
				                     continue
			
 
				 
			
 
				+                # 获取实际二级小节名
			
 
				+                actual_second_name = self._get_actual_chapter_name(label_map, cat1, cat2)
			
 
				+
			
 
				                 # issue_point 和 reason 使用简单拼接（三级缺失）
			
 
				                 # 尝试使用LLM批量生成 suggestion
			
 
				                 llm_result = await self._generate_recommendation_with_llm(
			
@@ -1012,7 +1079,7 @@ JSON输出："""
 
				                     recommendations.append({
			
 
				                         "level": "三级",
			
 
				                         "issue_point": f"【三级内容缺失】{first_name} > {second_name} > '{t_item.third_cn}'",
			
 
				-                        "location": f"{first_name} > {second_name}",
			
 
				+                        "location": actual_second_name,  # 三级缺失定位到二级小节
			
 
				                         "suggestion": suggestion,
			
 
				                         "reason": f"依据《桥梁公司危险性较大工程管理实施细则（2025版）》规定，'{second_name}'下应包含'{t_item.third_cn}'内容要点",
			
 
				                         "first_seq": first_seq,