瀏覽代碼

fix(completeness_reviewer): 修复完整性审查 location 字段定位问题

- 二级缺失:location 指向实际一级章节名(如"第三章 施工计划")
- 三级缺失:location 指向实际二级小节名(如"3.2 施工准备")
- 新增 _build_section_label_map 辅助方法从 chunks 提取 section_label
- 新增 _get_actual_first_name 和 _get_actual_chapter_name 方法获取实际章节名
- 移除 ai_review_engine 中对 location 的额外处理逻辑
WangXuMing 4 天之前
父節點
當前提交
4c17a6d10e

+ 1 - 6
core/construction_review/component/ai_review_engine.py

@@ -756,12 +756,7 @@ class AIReviewEngine(BaseReviewer):
                 risk_level, risk_level_en = _level_risk.get(level, ("中风险", "medium"))
                 issue_point = rec.get('issue_point', '')
                 location = rec.get('location', '')
-                # 三级缺失:将 location 中的标准分类名替换为文档实际章节名,信息更直观
-                if level == '三级' and chapter_name and ' > ' in location:
-                    sec_part = location.split(' > ', 1)[1]
-                    new_location = f"{chapter_name} > {sec_part}"
-                    issue_point = issue_point.replace(location, new_location, 1)
-                    location = new_location
+                # location 已从 completeness_reviewer 获取实际章节名,无需额外处理
                 # 按顺序构建响应字段(first_seq -> second_seq -> third_seq 相邻)
                 response_item = {
                     "check_item": "completeness_check",

+ 76 - 9
core/construction_review/component/reviewers/completeness_reviewer.py

@@ -482,7 +482,8 @@ JSON输出:"""
         recommendations = await self._generate_recommendations(
             tertiary_result, catalogue_result, outline_result,
             actual_first, actual_secondary, actual_tertiary,
-            chapter_classification
+            chapter_classification,
+            chunks  # 传入 chunks 用于获取实际章节名
         )
 
         return LightweightCompletenessResult(
@@ -856,6 +857,62 @@ JSON输出:"""
         else:
             return "incomplete"
     
+    def _build_section_label_map(self, chunks: List[Dict]) -> Dict[Tuple[str, str], str]:
+        """
+        从 chunks 构建 (first_code, second_code) -> section_label 映射
+        section_label 格式:"第一章编制依据->一、法律法规"
+        """
+        label_map: Dict[Tuple[str, str], str] = {}
+        for chunk in chunks:
+            metadata = chunk.get("metadata", {})
+            cat1 = (metadata.get("chapter_classification") or
+                    chunk.get("chapter_classification") or
+                    chunk.get("first_code"))
+            cat2 = (metadata.get("secondary_category_code") or
+                    chunk.get("secondary_category_code") or
+                    chunk.get("second_code"))
+            section_label = (metadata.get("section_label") or
+                             chunk.get("section_label") or
+                             "")
+            if cat1 and cat2 and section_label:
+                label_map[(cat1, cat2)] = section_label
+        return label_map
+
+    def _get_actual_chapter_name(self, label_map: Dict[Tuple[str, str], str],
+                                  first_code: str, second_code: str = None) -> str:
+        """
+        获取实际章节名
+        - 一级缺失:返回 first_name(保持原逻辑)
+        - 二级缺失:返回一级章节名(section_label.split('->')[0])
+        - 三级缺失:返回二级小节名(section_label.split('->')[-1])
+        """
+        if not second_code:
+            return self.spec_loader.first_names.get(first_code, first_code)
+
+        section_label = label_map.get((first_code, second_code), "")
+        if not section_label:
+            # 回退到标准名称
+            sec_item = self.secondary_specs.get((first_code, second_code))
+            if sec_item:
+                return f"{sec_item.first_cn} > {sec_item.second_cn}"
+            return f"{first_code} > {second_code}"
+
+        parts = section_label.split("->")
+        if len(parts) >= 2:
+            return parts[-1].strip()  # 返回二级小节名
+        return section_label.strip()
+
+    def _get_actual_first_name(self, label_map: Dict[Tuple[str, str], str],
+                                first_code: str) -> str:
+        """
+        获取实际一级章节名(从任意一个该一级下的 section_label 提取)
+        """
+        for (fc, sc), label in label_map.items():
+            if fc == first_code and "->" in label:
+                return label.split("->")[0].strip()
+        # 回退到标准名称
+        return self.spec_loader.first_names.get(first_code, first_code)
+
     async def _generate_recommendations(
         self,
         tertiary_result: Dict,
@@ -864,7 +921,8 @@ JSON输出:"""
         actual_first: Set[str],
         actual_secondary: Set[Tuple[str, str]],
         actual_tertiary: Set[Tuple[str, str, str]],
-        chapter_classification: Optional[str] = None
+        chapter_classification: Optional[str] = None,
+        chunks: List[Dict] = None
     ) -> List[Dict[str, Any]]:
         """
         生成结构化分级改进建议。
@@ -872,12 +930,15 @@ JSON输出:"""
         每条建议包含:
           level        : 缺失级别(一级 / 二级 / 三级 / 一致性)
           issue_point  : 问题摘要(含级别标识)
-          location     : 问题定位路径
+          location     : 问题定位路径(使用实际章节名)
           suggestion   : 补充建议(使用LLM生成)
           reason       : 规范依据说明(使用LLM生成)
         """
         recommendations: List[Dict[str, Any]] = []
 
+        # 构建 section_label 映射,用于获取实际章节名
+        label_map = self._build_section_label_map(chunks or [])
+
         # 确定需要检查的一级分类范围
         if chapter_classification:
             required_first = (
@@ -939,15 +1000,18 @@ JSON输出:"""
 
                 # ── 二级缺失 ──────────────────────────────────────────
                 if (cat1, cat2) not in actual_secondary:
+                    # 获取实际一级章节名
+                    actual_first_name = self._get_actual_first_name(label_map, cat1)
+
                     # issue_point 和 reason 使用简单拼接
-                    issue_point = f"【二级章节缺失】{first_name} > '{second_name}'整个章节不存在"
-                    reason = f"依据《桥梁公司危险性较大工程管理实施细则(2025版)》规定,'{first_name}'下应包含'{second_name}'二级章节,当前正文中未发现该章节内容"
+                    issue_point = f"【二级章节缺失】{actual_first_name} > '{second_name}'整个章节不存在"
+                    reason = f"依据《桥梁公司危险性较大工程管理实施细则(2025版)》规定,'{actual_first_name}'下应包含'{second_name}'二级章节,当前正文中未发现该章节内容"
 
                     # 尝试使用LLM生成 suggestion
                     llm_result = await self._generate_recommendation_with_llm(
                         level="二级",
                         first_code=cat1,
-                        first_name=first_name,
+                        first_name=actual_first_name,
                         second_code=cat2,
                         second_name=second_name,
                         first_seq=first_seq,
@@ -958,12 +1022,12 @@ JSON输出:"""
                         suggestion = llm_result.get("suggestion")
                     else:
                         # 回退到简单拼接
-                        suggestion = f"请在'{first_name}'下添加'{second_name}'章节内容"
+                        suggestion = f"请在'{actual_first_name}'下添加'{second_name}'章节内容"
 
                     recommendations.append({
                         "level": "二级",
                         "issue_point": issue_point,
-                        "location": f"{first_name} > {second_name}",
+                        "location": actual_first_name,  # 二级缺失定位到一级章节
                         "suggestion": suggestion,
                         "reason": reason,
                         "first_seq": first_seq,
@@ -986,6 +1050,9 @@ JSON输出:"""
                 if not missing_t_items:
                     continue
 
+                # 获取实际二级小节名
+                actual_second_name = self._get_actual_chapter_name(label_map, cat1, cat2)
+
                 # issue_point 和 reason 使用简单拼接(三级缺失)
                 # 尝试使用LLM批量生成 suggestion
                 llm_result = await self._generate_recommendation_with_llm(
@@ -1012,7 +1079,7 @@ JSON输出:"""
                     recommendations.append({
                         "level": "三级",
                         "issue_point": f"【三级内容缺失】{first_name} > {second_name} > '{t_item.third_cn}'",
-                        "location": f"{first_name} > {second_name}",
+                        "location": actual_second_name,  # 三级缺失定位到二级小节
                         "suggestion": suggestion,
                         "reason": f"依据《桥梁公司危险性较大工程管理实施细则(2025版)》规定,'{second_name}'下应包含'{t_item.third_cn}'内容要点",
                         "first_seq": first_seq,