فهرست منبع

fix(sgsc-时效性审查模型-xth): 修复编号识别错误bug

suhua31 2 هفته پیش
والد
کامیت
2f79340223

+ 6 - 2
core/construction_review/component/ai_review_engine.py

@@ -678,8 +678,12 @@ class AIReviewEngine(BaseReviewer):
                 'StandardCategoryTable.csv'
                 'StandardCategoryTable.csv'
             )
             )
             
             
-            # 创建轻量级审查器
-            checker = LightweightCompletenessChecker(csv_path)
+            # 创建轻量级审查器(传入model_client用于LLM生成建议)
+            # self.model_client 是从 BaseReviewer 继承的
+            checker = LightweightCompletenessChecker(
+                csv_path,
+                model_client=getattr(self, 'model_client', None)
+            )
             
             
             # 从state获取outline和原始chunks(如果有)
             # 从state获取outline和原始chunks(如果有)
             outline = None
             outline = None

+ 373 - 64
core/construction_review/component/reviewers/completeness_reviewer.py

@@ -15,6 +15,9 @@ from typing import Dict, List, Optional, Set, Tuple, Any
 from dataclasses import dataclass, field
 from dataclasses import dataclass, field
 from collections import defaultdict
 from collections import defaultdict
 from pathlib import Path
 from pathlib import Path
+import json
+
+from foundation.observability.logger.loggering import review_logger as logger
 
 
 
 
 @dataclass
 @dataclass
@@ -180,18 +183,42 @@ class TertiarySpecLoader:
 
 
 class LightweightCompletenessChecker:
 class LightweightCompletenessChecker:
     """轻量级完整性检查器"""
     """轻量级完整性检查器"""
-    
-    def __init__(self, standard_csv_path: str):
+
+    def __init__(self, standard_csv_path: str, model_client=None, prompt_loader=None):
         """
         """
         初始化检查器
         初始化检查器
-        
+
         Args:
         Args:
             standard_csv_path: StandardCategoryTable.csv 文件路径
             standard_csv_path: StandardCategoryTable.csv 文件路径
+            model_client: 模型客户端(可选),用于生成智能建议
+            prompt_loader: 提示词加载器(可选)
         """
         """
         self.spec_loader = TertiarySpecLoader(standard_csv_path)
         self.spec_loader = TertiarySpecLoader(standard_csv_path)
         self.tertiary_specs = self.spec_loader.get_tertiary_items()
         self.tertiary_specs = self.spec_loader.get_tertiary_items()
         self.secondary_specs = self.spec_loader.get_secondary_items()
         self.secondary_specs = self.spec_loader.get_secondary_items()
         self.secondary_names = self.spec_loader.get_secondary_names()
         self.secondary_names = self.spec_loader.get_secondary_names()
+
+        # 大模型客户端和提示词加载器(用于生成智能建议)
+        self.model_client = model_client
+        self.prompt_loader = prompt_loader
+
+        # 如果没有提供model_client,尝试从foundation导入
+        if self.model_client is None:
+            try:
+                from foundation.ai.agent.generate.model_generate import generate_model_client
+                self.model_client = generate_model_client
+            except ImportError:
+                logger.warning("无法导入generate_model_client,建议生成功能将使用简单拼接模式")
+                self.model_client = None
+
+        # 如果没有提供prompt_loader,尝试从当前模块导入
+        if self.prompt_loader is None:
+            try:
+                from .utils.prompt_loader import prompt_loader
+                self.prompt_loader = prompt_loader
+            except ImportError:
+                logger.warning("无法导入prompt_loader,建议生成功能将使用简单拼接模式")
+                self.prompt_loader = None
     
     
     def _normalize_chapter_code(self, code: str) -> str:
     def _normalize_chapter_code(self, code: str) -> str:
         """将章节分类码大小写归一化为与CSV一致(如 'management' -> 'management')"""
         """将章节分类码大小写归一化为与CSV一致(如 'management' -> 'management')"""
@@ -202,6 +229,198 @@ class LightweightCompletenessChecker:
                 return k
                 return k
         return code
         return code
 
 
+    def _build_llm_prompt_for_recommendation(
+        self,
+        level: str,
+        first_code: str,
+        first_name: str,
+        second_code: str = None,
+        second_name: str = None,
+        tertiary_items: List[TertiaryItem] = None,
+        outline_title: str = None
+    ) -> str:
+        """
+        构建用于LLM生成建议的prompt
+
+        Args:
+            level: 缺失级别(一级 / 二级 / 三级 / 一致性)
+            first_code: 一级分类代码
+            first_name: 一级分类名称
+            second_code: 二级分类代码(可选)
+            second_name: 二级分类名称(可选)
+            tertiary_items: 缺失的三级分类项列表(可选)
+            outline_title: 目录中的标题(用于一致性检查)
+
+        Returns:
+            str: 构建的prompt
+        """
+        # 构建问题上下文
+        if level == "一级":
+            context = f"""
+【问题类型】一级章节缺失
+【缺失章节】{first_name} ({first_code})
+【问题描述】文档中缺少'{first_name}'整个章节,这是专项施工方案中必须包含的一级章节。"""
+            # 获取该一级下的所有二级和三级信息作为参考
+            related_specs = []
+            for (fc, sc), sec_item in self.secondary_specs.items():
+                if fc == first_code:
+                    # 获取该二级下的所有三级
+                    tertiary_list = self.spec_loader.get_tertiary_by_secondary(fc, sc)
+                    tertiary_info = []
+                    for t_item in tertiary_list:
+                        tertiary_info.append(f"      - {t_item.third_cn}: {t_item.third_focus}")
+                    related_specs.append(f"""
+  【二级分类】{sec_item.second_cn}
+    【包含的三级内容要点】
+{chr(10).join(tertiary_info)}""")
+
+            reference = f"""
+【规范参考信息】
+根据《桥梁公司危险性较大工程管理实施细则(2025版)》,'{first_name}'章节应包含以下内容:
+{chr(10).join(related_specs)}
+"""
+
+        elif level == "二级":
+            context = f"""
+【问题类型】二级章节缺失
+【所属一级】{first_name} ({first_code})
+【缺失章节】{second_name} ({second_code})
+【问题描述】'{first_name}'下缺少'{second_name}'二级章节。"""
+            # 获取该二级下的所有三级信息
+            tertiary_list = self.spec_loader.get_tertiary_by_secondary(first_code, second_code)
+            tertiary_info = []
+            for t_item in tertiary_list:
+                tertiary_info.append(f"    - {t_item.third_cn}: {t_item.third_focus}")
+
+            reference = f"""
+【规范参考信息】
+根据《桥梁公司危险性较大工程管理实施细则(2025版)》,'{second_name}'章节应包含以下三级内容要点:
+{chr(10).join(tertiary_info)}
+"""
+
+        elif level == "三级":
+            context = f"""
+【问题类型】三级内容缺失
+【所属一级】{first_name} ({first_code})
+【所属二级】{second_name} ({second_code})
+【缺失内容】"""
+            missing_contents = []
+            for item in tertiary_items or []:
+                missing_contents.append(f"    - {item.third_cn}: {item.third_focus}")
+            context += "\n" + "\n".join(missing_contents)
+
+            reference = f"""
+【规范参考信息】
+以上缺失的内容要点是'{second_name}'章节下的标准内容要求,具体包括:
+{chr(10).join([f'  - {t.third_cn}: 应包含{t.third_focus}' for t in (tertiary_items or [])])}
+"""
+
+        elif level == "一致性":
+            context = f"""
+【问题类型】目录与正文不一致
+【涉及章节】{outline_title or second_name}
+【问题描述】目录页列有该章节,但正文中未发现对应内容。"""
+            reference = """
+【规范参考信息】
+根据文档一致性要求,目录中列出的章节应在正文中有对应的内容描述。若该章节确实不需要,应从目录中移除;若需要保留,则必须补充正文内容。
+"""
+        else:
+            context = "【问题类型】未知"
+            reference = ""
+
+        prompt = f"""你是一位资深的工程施工方案审查专家。请根据以下问题上下文和规范参考信息,生成专业的审查建议。
+
+{context}
+
+{reference}
+
+请用JSON格式输出审查建议,包含以下字段:
+- issue_point: 问题摘要(简洁明了,50字以内)
+- suggestion: 具体补充建议(详细可行,100-200字,包含具体应该补充的内容要点)
+- reason: 规范依据说明(引用具体规范要求,说明为什么需要补充)
+
+注意:
+1. suggestion应该具体、可操作,引用规范中的具体内容要求
+2. 使用专业的工程术语
+3. 语气应该是指导性的,帮助编制人员理解需要补充什么内容
+
+JSON输出:"""
+        return prompt
+
+    async def _generate_recommendation_with_llm(
+        self,
+        level: str,
+        first_code: str,
+        first_name: str,
+        second_code: str = None,
+        second_name: str = None,
+        tertiary_items: List[TertiaryItem] = None,
+        outline_title: str = None,
+        timeout: int = 30
+    ) -> Dict[str, str]:
+        """
+        使用大模型生成建议
+
+        Returns:
+            Dict[str, str]: 包含 issue_point, suggestion, reason 的字典
+        """
+        if not self.model_client:
+            return None
+
+        try:
+            prompt = self._build_llm_prompt_for_recommendation(
+                level=level,
+                first_code=first_code,
+                first_name=first_name,
+                second_code=second_code,
+                second_name=second_name,
+                tertiary_items=tertiary_items,
+                outline_title=outline_title
+            )
+
+            # 调用大模型
+            task_prompt_info = {
+                "task_prompt": prompt,
+                "task_name": f"completeness_suggestion_{level}"
+            }
+
+            # 生成唯一trace_id
+            import uuid
+            trace_id = f"completeness_llm_{uuid.uuid4().hex[:8]}"
+
+            model_response = await self.model_client.get_model_generate_invoke(
+                trace_id=trace_id,
+                task_prompt_info=task_prompt_info,
+                timeout=timeout,
+                model_name="qwen"  # 使用默认模型,可根据需要调整
+            )
+
+            # 解析模型返回的JSON
+            try:
+                # 尝试从返回文本中提取JSON
+                response_text = model_response.strip()
+                # 查找JSON块
+                if "```json" in response_text:
+                    json_str = response_text.split("```json")[1].split("```")[0].strip()
+                elif "```" in response_text:
+                    json_str = response_text.split("```")[1].split("```")[0].strip()
+                else:
+                    json_str = response_text
+
+                result = json.loads(json_str)
+                return {
+                    "issue_point": result.get("issue_point", ""),
+                    "suggestion": result.get("suggestion", ""),
+                    "reason": result.get("reason", "")
+                }
+            except (json.JSONDecodeError, IndexError) as e:
+                logger.warning(f"LLM建议生成结果解析失败: {e},返回: {model_response[:200]}")
+                return None
+
+        except Exception as e:
+            logger.warning(f"LLM建议生成失败: {e}")
+            return None
+
     async def check(
     async def check(
         self,
         self,
         chunks: List[Dict],
         chunks: List[Dict],
@@ -259,7 +478,7 @@ class LightweightCompletenessChecker:
 
 
         # 7. 生成分级建议
         # 7. 生成分级建议
         actual_first = {cat1 for cat1, _ in actual_secondary}
         actual_first = {cat1 for cat1, _ in actual_secondary}
-        recommendations = self._generate_recommendations(
+        recommendations = await self._generate_recommendations(
             tertiary_result, catalogue_result, outline_result,
             tertiary_result, catalogue_result, outline_result,
             actual_first, actual_secondary, actual_tertiary,
             actual_first, actual_secondary, actual_tertiary,
             chapter_classification
             chapter_classification
@@ -636,7 +855,7 @@ class LightweightCompletenessChecker:
         else:
         else:
             return "incomplete"
             return "incomplete"
     
     
-    def _generate_recommendations(
+    async def _generate_recommendations(
         self,
         self,
         tertiary_result: Dict,
         tertiary_result: Dict,
         catalogue_result: Dict,
         catalogue_result: Dict,
@@ -653,8 +872,8 @@ class LightweightCompletenessChecker:
           level        : 缺失级别(一级 / 二级 / 三级 / 一致性)
           level        : 缺失级别(一级 / 二级 / 三级 / 一致性)
           issue_point  : 问题摘要(含级别标识)
           issue_point  : 问题摘要(含级别标识)
           location     : 问题定位路径
           location     : 问题定位路径
-          suggestion   : 补充建议
-          reason       : 规范依据说明
+          suggestion   : 补充建议(使用LLM生成)
+          reason       : 规范依据说明(使用LLM生成)
         """
         """
         recommendations: List[Dict[str, Any]] = []
         recommendations: List[Dict[str, Any]] = []
 
 
@@ -679,17 +898,36 @@ class LightweightCompletenessChecker:
 
 
             # ── 一级缺失 ──────────────────────────────────────────────
             # ── 一级缺失 ──────────────────────────────────────────────
             if first_code not in actual_first:
             if first_code not in actual_first:
-                recommendations.append({
-                    "level": "一级",
-                    "issue_point": f"【一级章节缺失】'{first_name}'整个章节不存在",
-                    "location": first_name,
-                    "suggestion": f"请添加'{first_name}'章节及其下全部子章节内容",
-                    "reason": (
-                        f"根据规范要求,文档必须包含'{first_name}'一级章节,"
-                        f"当前正文中未发现该章节任何内容"
-                    ),
-                    "first_seq": first_seq,
-                })
+                # 尝试使用LLM生成建议
+                llm_result = await self._generate_recommendation_with_llm(
+                    level="一级",
+                    first_code=first_code,
+                    first_name=first_name,
+                    first_seq=first_seq
+                )
+
+                if llm_result:
+                    recommendations.append({
+                        "level": "一级",
+                        "issue_point": llm_result.get("issue_point", f"【一级章节缺失】'{first_name}'整个章节不存在"),
+                        "location": first_name,
+                        "suggestion": llm_result.get("suggestion", f"请添加'{first_name}'章节及其下全部子章节内容"),
+                        "reason": llm_result.get("reason", f"根据规范要求,文档必须包含'{first_name}'一级章节,当前正文中未发现该章节任何内容"),
+                        "first_seq": first_seq,
+                    })
+                else:
+                    # 回退到简单拼接
+                    recommendations.append({
+                        "level": "一级",
+                        "issue_point": f"【一级章节缺失】'{first_name}'整个章节不存在",
+                        "location": first_name,
+                        "suggestion": f"请添加'{first_name}'章节及其下全部子章节内容",
+                        "reason": (
+                            f"根据规范要求,文档必须包含'{first_name}'一级章节,"
+                            f"当前正文中未发现该章节任何内容"
+                        ),
+                        "first_seq": first_seq,
+                    })
                 continue
                 continue
 
 
             # ── 一级存在,检查二级 ─────────────────────────────────────
             # ── 一级存在,检查二级 ─────────────────────────────────────
@@ -703,20 +941,41 @@ class LightweightCompletenessChecker:
 
 
                 # ── 二级缺失 ──────────────────────────────────────────
                 # ── 二级缺失 ──────────────────────────────────────────
                 if (cat1, cat2) not in actual_secondary:
                 if (cat1, cat2) not in actual_secondary:
-                    recommendations.append({
-                        "level": "二级",
-                        "issue_point": (
-                            f"【二级章节缺失】{first_name} > '{second_name}'整个章节不存在"
-                        ),
-                        "location": f"{first_name} > {second_name}",
-                        "suggestion": f"请在'{first_name}'下添加'{second_name}'章节内容",
-                        "reason": (
-                            f"根据规范要求,'{first_name}'下应包含'{second_name}'二级章节,"
-                            f"当前正文中未发现该章节内容"
-                        ),
-                        "first_seq": first_seq,
-                        "second_seq": second_seq,
-                    })
+                    # 尝试使用LLM生成建议
+                    llm_result = await self._generate_recommendation_with_llm(
+                        level="二级",
+                        first_code=cat1,
+                        first_name=first_name,
+                        second_code=cat2,
+                        second_name=second_name
+                    )
+
+                    if llm_result:
+                        recommendations.append({
+                            "level": "二级",
+                            "issue_point": llm_result.get("issue_point", f"【二级章节缺失】{first_name} > '{second_name}'整个章节不存在"),
+                            "location": f"{first_name} > {second_name}",
+                            "suggestion": llm_result.get("suggestion", f"请在'{first_name}'下添加'{second_name}'章节内容"),
+                            "reason": llm_result.get("reason", f"根据规范要求,'{first_name}'下应包含'{second_name}'二级章节,当前正文中未发现该章节内容"),
+                            "first_seq": first_seq,
+                            "second_seq": second_seq,
+                        })
+                    else:
+                        # 回退到简单拼接
+                        recommendations.append({
+                            "level": "二级",
+                            "issue_point": (
+                                f"【二级章节缺失】{first_name} > '{second_name}'整个章节不存在"
+                            ),
+                            "location": f"{first_name} > {second_name}",
+                            "suggestion": f"请在'{first_name}'下添加'{second_name}'章节内容",
+                            "reason": (
+                                f"根据规范要求,'{first_name}'下应包含'{second_name}'二级章节,"
+                                f"当前正文中未发现该章节内容"
+                            ),
+                            "first_seq": first_seq,
+                            "second_seq": second_seq,
+                        })
                     continue
                     continue
 
 
                 # ── 二级存在,检查三级缺失 ────────────────────────────
                 # ── 二级存在,检查三级缺失 ────────────────────────────
@@ -734,40 +993,82 @@ class LightweightCompletenessChecker:
                 if not missing_t_items:
                 if not missing_t_items:
                     continue
                     continue
 
 
-                # 为每个缺失的三级项创建单独的 recommendation
-                for t_item in missing_t_items:
-                    recommendations.append({
-                        "level": "三级",
-                        "issue_point": (
-                            f"【三级内容缺失】{first_name} > {second_name} > '{t_item.third_cn}'"
-                        ),
-                        "location": f"{first_name} > {second_name}",
-                        "suggestion": f"请补充'{second_name}'下的'{t_item.third_cn}'内容",
-                        "reason": f"'{second_name}'下缺失规范要求的'{t_item.third_cn}'内容要点",
-                        "first_seq": first_seq,
-                        "second_seq": second_seq,
-                        "third_seq": t_item.third_seq,
-                    })
+                # 尝试使用LLM批量生成三级缺失建议
+                llm_result = await self._generate_recommendation_with_llm(
+                    level="三级",
+                    first_code=cat1,
+                    first_name=first_name,
+                    second_code=cat2,
+                    second_name=second_name,
+                    tertiary_items=missing_t_items
+                )
+
+                if llm_result:
+                    # LLM生成了整体建议,为每个缺失项添加相同建议(但位置不同)
+                    for t_item in missing_t_items:
+                        recommendations.append({
+                            "level": "三级",
+                            "issue_point": f"【三级内容缺失】{first_name} > {second_name} > '{t_item.third_cn}'",
+                            "location": f"{first_name} > {second_name}",
+                            "suggestion": llm_result.get("suggestion", f"请补充'{second_name}'下的'{t_item.third_cn}'内容"),
+                            "reason": llm_result.get("reason", f"'{second_name}'下缺失规范要求的'{t_item.third_cn}'内容要点"),
+                            "first_seq": first_seq,
+                            "second_seq": second_seq,
+                            "third_seq": t_item.third_seq,
+                        })
+                else:
+                    # 回退到简单拼接
+                    for t_item in missing_t_items:
+                        recommendations.append({
+                            "level": "三级",
+                            "issue_point": (
+                                f"【三级内容缺失】{first_name} > {second_name} > '{t_item.third_cn}'"
+                            ),
+                            "location": f"{first_name} > {second_name}",
+                            "suggestion": f"请补充'{second_name}'下的'{t_item.third_cn}'内容",
+                            "reason": f"'{second_name}'下缺失规范要求的'{t_item.third_cn}'内容要点",
+                            "first_seq": first_seq,
+                            "second_seq": second_seq,
+                            "third_seq": t_item.third_seq,
+                        })
 
 
         # ── 一致性审查:目录有列但正文无内容 ─────────────────────────────
         # ── 一致性审查:目录有列但正文无内容 ─────────────────────────────
         if outline_result:
         if outline_result:
             for e in outline_result.get("empty_sections", []):
             for e in outline_result.get("empty_sections", []):
                 f_name = e.get("first_name", "")
                 f_name = e.get("first_name", "")
-                # 优先用目录页原始标题,回退到标准名称
                 sec_title = e.get("outline_title") or e.get("secondary_name", "")
                 sec_title = e.get("outline_title") or e.get("secondary_name", "")
                 location = f"{f_name} > {sec_title}" if f_name else sec_title
                 location = f"{f_name} > {sec_title}" if f_name else sec_title
-                recommendations.append({
-                    "level": "一致性",
-                    "issue_point": f"【目录正文不一致】'{location}'目录已列但正文无内容",
-                    "location": location,
-                    "suggestion": (
-                        f"请补充'{sec_title}'章节的正文内容,或从目录中移除该章节"
-                    ),
-                    "reason": (
-                        f"目录页列有'{sec_title}'章节,但正文中未发现对应内容,"
-                        f"存在目录与正文不一致的问题"
-                    ),
-                })
+
+                # 尝试使用LLM生成建议
+                llm_result = await self._generate_recommendation_with_llm(
+                    level="一致性",
+                    first_code="",
+                    first_name=f_name,
+                    second_name=sec_title,
+                    outline_title=sec_title
+                )
+
+                if llm_result:
+                    recommendations.append({
+                        "level": "一致性",
+                        "issue_point": llm_result.get("issue_point", f"【目录正文不一致】'{location}'目录已列但正文无内容"),
+                        "location": location,
+                        "suggestion": llm_result.get("suggestion", f"请补充'{sec_title}'章节的正文内容,或从目录中移除该章节"),
+                        "reason": llm_result.get("reason", f"目录页列有'{sec_title}'章节,但正文中未发现对应内容,存在目录与正文不一致的问题"),
+                    })
+                else:
+                    recommendations.append({
+                        "level": "一致性",
+                        "issue_point": f"【目录正文不一致】'{location}'目录已列但正文无内容",
+                        "location": location,
+                        "suggestion": (
+                            f"请补充'{sec_title}'章节的正文内容,或从目录中移除该章节"
+                        ),
+                        "reason": (
+                            f"目录页列有'{sec_title}'章节,但正文中未发现对应内容,"
+                            f"存在目录与正文不一致的问题"
+                        ),
+                    })
 
 
         if not recommendations:
         if not recommendations:
             recommendations.append({
             recommendations.append({
@@ -785,16 +1086,20 @@ class LightweightCompletenessChecker:
 async def check_completeness_lightweight(
 async def check_completeness_lightweight(
     chunks: List[Dict],
     chunks: List[Dict],
     outline: Optional[List[Dict]] = None,
     outline: Optional[List[Dict]] = None,
-    standard_csv_path: Optional[str] = None
+    standard_csv_path: Optional[str] = None,
+    model_client=None,
+    prompt_loader=None
 ) -> LightweightCompletenessResult:
 ) -> LightweightCompletenessResult:
     """
     """
     轻量级完整性审查入口函数
     轻量级完整性审查入口函数
-    
+
     Args:
     Args:
         chunks: 文档分块列表,每个chunk需包含tertiary_category_code
         chunks: 文档分块列表,每个chunk需包含tertiary_category_code
         outline: 目录结构(可选)
         outline: 目录结构(可选)
         standard_csv_path: 三级标准CSV文件路径,默认为doc_worker/config/StandardCategoryTable.csv
         standard_csv_path: 三级标准CSV文件路径,默认为doc_worker/config/StandardCategoryTable.csv
-    
+        model_client: 模型客户端(可选),用于生成智能建议
+        prompt_loader: 提示词加载器(可选)
+
     Returns:
     Returns:
         LightweightCompletenessResult
         LightweightCompletenessResult
     """
     """
@@ -802,8 +1107,12 @@ async def check_completeness_lightweight(
         # 默认路径
         # 默认路径
         default_path = Path(__file__).parent.parent.parent.parent.parent / "doc_worker" / "config" / "StandardCategoryTable.csv"
         default_path = Path(__file__).parent.parent.parent.parent.parent / "doc_worker" / "config" / "StandardCategoryTable.csv"
         standard_csv_path = str(default_path)
         standard_csv_path = str(default_path)
-    
-    checker = LightweightCompletenessChecker(standard_csv_path)
+
+    checker = LightweightCompletenessChecker(
+        standard_csv_path,
+        model_client=model_client,
+        prompt_loader=prompt_loader
+    )
     return await checker.check(chunks=chunks, outline=outline)
     return await checker.check(chunks=chunks, outline=outline)
 
 
 
 

+ 5 - 5
core/construction_review/component/reviewers/timeliness_content_reviewer.py

@@ -46,14 +46,14 @@ class StandardExtractor:
 
 
     # 规范编号正则模式(匹配类似 GB 50010-2010、JTG B01-2014、GB/T 50502-2020 等格式)
     # 规范编号正则模式(匹配类似 GB 50010-2010、JTG B01-2014、GB/T 50502-2020 等格式)
     STANDARD_NUMBER_PATTERNS = [
     STANDARD_NUMBER_PATTERNS = [
-        # 中国国家标准:GB 50010-2010、GB/T 50502-2020
-        r'GB(?:/T)?\s*\d{4,5}(?:\.\d+)?\s*-\s*\d{4}',
+        # 中国国家标准:GB 50010-2010、GB/T 50502-2020、GB 51-2001
+        r'GB(?:/T)?\s*\d{1,5}(?:\.\d+)?\s*-\s*\d{4}',
         # 中国行业标准:JTG B01-2014、JTG D60-2015、JTG/T 3650-2020
         # 中国行业标准:JTG B01-2014、JTG D60-2015、JTG/T 3650-2020
-        r'[A-Z]{2,3}(?:/T)?\s*[A-Z]?\s*\d{2,4}(?:\.\d+)?\s*-\s*\d{4}',
+        r'[A-Z]{2,3}(?:/T)?\s*[A-Z]?\s*\d{1,5}(?:\.\d+)?\s*-\s*\d{4}',
         # 地方标准:DB11/T 1234-2020
         # 地方标准:DB11/T 1234-2020
-        r'DB\d{2}(?:/T)?\s*\d{4,5}\s*-\s*\d{4}',
+        r'DB\d{2}(?:/T)?\s*\d{1,5}\s*-\s*\d{4}',
         # 团体标准:T/CECS 123-2020
         # 团体标准:T/CECS 123-2020
-        r'T/\w+\s*\d{3,5}\s*-\s*\d{4}',
+        r'T/\w+\s*\d{1,5}\s*-\s*\d{4}',
     ]
     ]
 
 
     # 规范名称与编号组合的正则模式
     # 规范名称与编号组合的正则模式

+ 87 - 28
core/construction_review/component/reviewers/utils/reference_matcher.py

@@ -283,13 +283,16 @@ async def validate_and_generate_number(
     if existing_number:
     if existing_number:
         logger.info(f"[时效性验证] 验证编号: 《{regulation_name}》 {existing_number}")
         logger.info(f"[时效性验证] 验证编号: 《{regulation_name}》 {existing_number}")
         
         
-        # 先进行本地标准化比较:检查参考候选中是否有编号完全匹配(忽略括号差异)的
-        normalized_existing = _normalize_text(existing_number)
+        # 先进行本地标准化比较:检查参考候选中是否有名称和编号都完全匹配(忽略括号差异)的
+        normalized_existing_number = _normalize_text(existing_number)
+        normalized_regulation_name = _normalize_text(regulation_name)
         for candidate in reference_candidates:
         for candidate in reference_candidates:
-            # 从候选中提取编号
-            _, candidate_number = _extract_regulation_info(candidate)
-            if candidate_number and _normalize_text(candidate_number) == normalized_existing:
-                logger.info(f"[时效性验证] 本地验证通过(编号匹配): 《{regulation_name}》 {existing_number}")
+            # 从候选中提取名称和编号
+            candidate_name, candidate_number = _extract_regulation_info(candidate)
+            if (candidate_name and candidate_number and
+                _normalize_text(candidate_name) == normalized_regulation_name and
+                _normalize_text(candidate_number) == normalized_existing_number):
+                logger.info(f"[时效性验证] 本地验证通过(名称和编号都匹配): 《{regulation_name}》 {existing_number}")
                 return ValidationMatchResult(
                 return ValidationMatchResult(
                     review_item=review_item,
                     review_item=review_item,
                     reference_candidates=reference_candidates,
                     reference_candidates=reference_candidates,
@@ -297,6 +300,21 @@ async def validate_and_generate_number(
                     validated_number=existing_number,
                     validated_number=existing_number,
                     status="验证通过"
                     status="验证通过"
                 )
                 )
+
+        # 【关键】检查是否有编号相同但名称不同的情况(规范名称错误)
+        for candidate in reference_candidates:
+            candidate_name, candidate_number = _extract_regulation_info(candidate)
+            if (candidate_name and candidate_number and
+                _normalize_text(candidate_number) == normalized_existing_number and
+                _normalize_text(candidate_name) != normalized_regulation_name):
+                logger.info(f"[时效性验证] 编号相同但名称不同: 《{regulation_name}》-> 应为《{candidate_name}》")
+                return ValidationMatchResult(
+                    review_item=review_item,
+                    reference_candidates=reference_candidates,
+                    is_valid=False,
+                    validated_number=existing_number,
+                    status="规范名称错误"
+                )
         
         
         # 调用3模型验证
         # 调用3模型验证
         validation = await validate_reference_number(
         validation = await validate_reference_number(
@@ -432,28 +450,34 @@ async def match_reference_files(reference_text: str, review_text: str) -> str:
         exact_info = raw_item.get("exact_match_info", "")
         exact_info = raw_item.get("exact_match_info", "")
         same_name_current = raw_item.get("same_name_current", "")
         same_name_current = raw_item.get("same_name_current", "")
         
         
-        # 【校正逻辑】如果LLM判断has_exact_match=false,但本地比较发现编号相同(忽略括号差异),则校正为true
+        # 【校正逻辑】如果LLM判断has_exact_match=false,但本地比较发现名称和编号相同(忽略括号差异),则校正为true
         if not has_exact and exact_info:
         if not has_exact and exact_info:
-            _, review_number = _extract_regulation_info(review_item)
-            _, exact_number = _extract_regulation_info(exact_info)
-            if review_number and exact_number and _normalize_text(review_number) == _normalize_text(exact_number):
-                logger.info(f"[规范匹配校正] review_item='{review_item}' 编号实质相同,校正has_exact_match为true")
+            review_name, review_number = _extract_regulation_info(review_item)
+            exact_name, exact_number = _extract_regulation_info(exact_info)
+            if (review_name and exact_name and
+                _normalize_text(review_name) == _normalize_text(exact_name) and
+                review_number and exact_number and
+                _normalize_text(review_number) == _normalize_text(exact_number)):
+                logger.info(f"[规范匹配校正] review_item='{review_item}' 名称和编号都相同,校正has_exact_match为true")
                 has_exact = True
                 has_exact = True
         
         
-        # 【第一步】先检查向量搜索候选中是否有精确匹配(编号完全相同)
+        # 【第一步】检查向量搜索候选中的匹配情况
         # ref_candidates 是 List[List[str]],需要获取当前项对应的候选列表
         # ref_candidates 是 List[List[str]],需要获取当前项对应的候选列表
         current_candidates = ref_candidates[i] if i < len(ref_candidates) else []
         current_candidates = ref_candidates[i] if i < len(ref_candidates) else []
-        _, review_number = _extract_regulation_info(review_item)
-        
-        if review_number and current_candidates:
+        review_name, review_number = _extract_regulation_info(review_item)
+
+        if review_name and review_number and current_candidates:
+            normalized_review_name = _normalize_text(review_name)
             normalized_review_number = _normalize_text(review_number)
             normalized_review_number = _normalize_text(review_number)
-            exact_match_found = False
-            
+
+            # 先检查是否有完全匹配(名称和编号都相同)
             for candidate in current_candidates:
             for candidate in current_candidates:
                 if isinstance(candidate, str):
                 if isinstance(candidate, str):
-                    _, candidate_number = _extract_regulation_info(candidate)
-                    if candidate_number and _normalize_text(candidate_number) == normalized_review_number:
-                        # 向量库中找到精确匹配,直接使用,不需要AI投票
+                    candidate_name, candidate_number = _extract_regulation_info(candidate)
+                    if (candidate_name and candidate_number and
+                        _normalize_text(candidate_name) == normalized_review_name and
+                        _normalize_text(candidate_number) == normalized_review_number):
+                        # 向量库中找到精确匹配(名称和编号都相同)
                         logger.info(f"[规范匹配] 向量库中找到精确匹配: '{review_item}' -> '{candidate}'")
                         logger.info(f"[规范匹配] 向量库中找到精确匹配: '{review_item}' -> '{candidate}'")
                         final_results.append({
                         final_results.append({
                             "review_item": review_item,
                             "review_item": review_item,
@@ -462,11 +486,34 @@ async def match_reference_files(reference_text: str, review_text: str) -> str:
                             "exact_match_info": candidate,
                             "exact_match_info": candidate,
                             "same_name_current": candidate
                             "same_name_current": candidate
                         })
                         })
-                        exact_match_found = True
+                        has_exact = True
                         break
                         break
-            
-            # 如果找到了精确匹配,跳过本次循环
-            if exact_match_found:
+
+            if has_exact:
+                continue
+
+            # 【关键】检查是否有编号相同但名称不同的情况(规范名称错误)
+            for candidate in current_candidates:
+                if isinstance(candidate, str):
+                    candidate_name, candidate_number = _extract_regulation_info(candidate)
+                    if (candidate_name and candidate_number and
+                        _normalize_text(candidate_number) == normalized_review_number and
+                        _normalize_text(candidate_name) != normalized_review_name):
+                        # 编号相同但名称不同 - 判定为规范名称错误
+                        logger.info(f"[规范匹配] 编号相同但名称不同: '{review_item}' -> '{candidate}'")
+                        final_results.append({
+                            "review_item": review_item,
+                            "has_related_file": True,
+                            "has_exact_match": False,
+                            "exact_match_info": "",
+                            "same_name_current": candidate,
+                            "name_mismatch": True,  # 标记为名称不匹配
+                            "correct_name": candidate_name  # 正确的名称
+                        })
+                        has_exact = True  # 标记为已处理,跳过后续逻辑
+                        break
+
+            if has_exact:
                 continue
                 continue
         
         
         # 如果有精确匹配(由LLM判断),直接接受
         # 如果有精确匹配(由LLM判断),直接接受
@@ -492,12 +539,24 @@ async def match_reference_files(reference_text: str, review_text: str) -> str:
                 if validation_result.validated_number:
                 if validation_result.validated_number:
                     # 【关键逻辑】检查生成的编号与原始编号是否属于同一规范家族
                     # 【关键逻辑】检查生成的编号与原始编号是否属于同一规范家族
                     is_same_family = _is_same_regulation_family(
                     is_same_family = _is_same_regulation_family(
-                        review_number or "", 
+                        review_number or "",
                         validation_result.validated_number
                         validation_result.validated_number
                     )
                     )
-                    
-                    if not is_same_family:
-                        # 生成的编号与原始编号完全不同,说明参考库中找到的文件实际上不相关
+
+                    # 【特殊处理】检查参考候选中是否有名称完全匹配的文件
+                    # 如果名称相同但编号不同(如 GB 51-2001 vs GB 50021-2001),应接受生成的编号
+                    has_same_name_in_candidates = False
+                    for candidate in current_candidates:
+                        if isinstance(candidate, str):
+                            candidate_name, _ = _extract_regulation_info(candidate)
+                            if (candidate_name and
+                                _normalize_text(candidate_name) == _normalize_text(review_name)):
+                                has_same_name_in_candidates = True
+                                break
+
+                    if not is_same_family and not has_same_name_in_candidates:
+                        # 生成的编号与原始编号完全不同,且参考库中没有名称匹配的文件
+                        # 说明参考库中找到的文件实际上不相关
                         logger.info(f"[规范匹配] '{review_item}' 生成的编号({validation_result.validated_number})"
                         logger.info(f"[规范匹配] '{review_item}' 生成的编号({validation_result.validated_number})"
                                   f"与原始编号({review_number})不属于同一规范家族,判定为无相关文件")
                                   f"与原始编号({review_number})不属于同一规范家族,判定为无相关文件")
                         final_results.append({
                         final_results.append({

+ 11 - 5
core/construction_review/component/reviewers/utils/timeliness_determiner.py

@@ -55,22 +55,28 @@ HUMAN = """
    - 原因:在参考规范库中完全找不到相关文件
    - 原因:在参考规范库中完全找不到相关文件
    - 建议:当前引用未在参考规范库中发现,建议人工核实其有效性
    - 建议:当前引用未在参考规范库中发现,建议人工核实其有效性
 
 
-2. **规范编号错误**(高风险)
-   - 条件:has_related_file = true 且 has_exact_match = false
+2. **规范名称错误**(高风险)
+   - 条件:name_mismatch = true(编号相同但名称不同)
+   - 原因:规范编号正确,但规范名称错误。审查引用的是《错误名称》(编号),参考库中应为《正确名称》(编号)
+   - 建议:建议将规范名称更正为《正确名称》(编号)
+   - **重要**:必须从 correct_name 字段获取正确的规范名称
+
+3. **规范编号错误**(高风险)
+   - 条件:has_related_file = true 且 has_exact_match = false 且 name_mismatch 不存在或不为true
    - 原因:与参考文件XXX编号不一致(注意:仅当编号实质性不同时才算不一致,忽略括号格式差异)
    - 原因:与参考文件XXX编号不一致(注意:仅当编号实质性不同时才算不一致,忽略括号格式差异)
    - 建议:建议核实并更正为参考库中的正确编号XXX
    - 建议:建议核实并更正为参考库中的正确编号XXX
 
 
-3. **规范编号正确**(无风险)
+4. **规范编号正确**(无风险)
    - 条件:has_exact_match = true 且 exact_match_info 中状态为"现行"
    - 条件:has_exact_match = true 且 exact_match_info 中状态为"现行"
    - 原因:与参考文件XXX名称编号一致,且文件状态为现行
    - 原因:与参考文件XXX名称编号一致,且文件状态为现行
    - 建议:引用规范为现行有效版本,无需调整
    - 建议:引用规范为现行有效版本,无需调整
 
 
-4. **引用已废止的规范**(高风险)
+5. **引用已废止的规范**(高风险)
    - 条件:has_exact_match = true 且 exact_match_info 中状态为"废止" 且 same_name_current 为空
    - 条件:has_exact_match = true 且 exact_match_info 中状态为"废止" 且 same_name_current 为空
    - 原因:参考文件显示XXX已废止,且无明确替代版本
    - 原因:参考文件显示XXX已废止,且无明确替代版本
    - 建议:建议删除该引用或咨询最新替代规范
    - 建议:建议删除该引用或咨询最新替代规范
 
 
-5. **引用已被替代的规范**(高风险)
+6. **引用已被替代的规范**(高风险)
    - 条件:has_exact_match = true 且 exact_match_info 中状态为"废止" 且 same_name_current 不为空
    - 条件:has_exact_match = true 且 exact_match_info 中状态为"废止" 且 same_name_current 不为空
    - 原因:参考文件显示《规范名称》(原编号)已废止,存在现行版本《规范名称》(新编号)
    - 原因:参考文件显示《规范名称》(原编号)已废止,存在现行版本《规范名称》(新编号)
    - 建议:建议更新为现行版本《规范名称》(新编号),并核实其适用性
    - 建议:建议更新为现行版本《规范名称》(新编号),并核实其适用性