Эх сурвалжийг харах

Merge branch 'dev_sgsc_mjp' of CRBC-MaaS-Platform-Project/LQAgentPlatform into dev

WangXuMing 4 өдөр өмнө
parent
commit
eb865689a1

+ 255 - 1
core/construction_review/component/reviewers/standard_timeliness_reviewer.py

@@ -34,6 +34,7 @@ from typing import List, Dict, Any, Optional
 from dataclasses import dataclass, asdict
 
 from foundation.observability.logger.loggering import review_logger as logger
+from foundation.ai.agent.generate.model_generate import generate_model_client
 from core.construction_review.component.standard_matching import (
     StandardMatchingService,
     StandardMatchResult,
@@ -56,6 +57,7 @@ class TimelinessReviewResult:
     risk_level: str = "low"                  # 风险等级(与原有逻辑一致:low/high)
     replacement_name: Optional[str] = None   # 替代标准名称
     replacement_number: Optional[str] = None # 替代标准号
+    mismatch_analysis: Optional[str] = None  # MISMATCH 具体差异分析
     final_result: Optional[str] = None       # 最终结果描述
 
     def to_dict(self) -> Dict[str, Any]:
@@ -93,6 +95,7 @@ class StandardTimelinessReviewer:
         self._own_service = False  # 标记是否由本实例创建 service
         self.callback_task_id = callback_task_id
         self._log_lock = threading.Lock()
+        self._mismatch_analysis_semaphore = asyncio.Semaphore(3)
 
     async def __aenter__(self):
         """异步上下文管理器入口"""
@@ -164,6 +167,13 @@ class StandardTimelinessReviewer:
         for match_result in match_results:
             # 跳过 match 返回 None 的情况(文件名为空)
             if match_result is not None:
+                logger.info(
+                    "[时效性审查变量] "
+                    f"提取standard_name={match_result.raw_name}, "
+                    f"提取standard_number={match_result.raw_number}, "
+                    f"数据库standard_name={match_result.matched_name or ''}, "
+                    f"数据库standard_number={match_result.matched_number or ''}"
+                )
                 review_result = self._convert_match_to_review_result(match_result)
                 review_results.append(review_result)
 
@@ -190,6 +200,13 @@ class StandardTimelinessReviewer:
         # 如果 match 返回 None(文件名为空),则返回 None
         if match_result is None:
             return None
+        logger.info(
+            "[时效性审查变量-单条] "
+            f"提取standard_name={match_result.raw_name}, "
+            f"提取standard_number={match_result.raw_number}, "
+            f"数据库standard_name={match_result.matched_name or ''}, "
+            f"数据库standard_number={match_result.matched_number or ''}"
+        )
         review_result = self._convert_match_to_review_result(match_result)
         self._log_determination_results([review_result])
         return review_result
@@ -269,6 +286,7 @@ class StandardTimelinessReviewer:
                 risk_level="high",
                 replacement_name=match_result.substitute_name,
                 replacement_number=match_result.substitute_number,
+                mismatch_analysis=None,
                 final_result=match_result.final_result
             )
 
@@ -301,6 +319,238 @@ class StandardTimelinessReviewer:
                 final_result=match_result.final_result
             )
 
+    async def enrich_mismatch_details(
+        self,
+        review_results: List[TimelinessReviewResult]
+    ) -> List[TimelinessReviewResult]:
+        """
+        使用 LLM 补充 MISMATCH 的具体差异说明。
+
+        设计原则:
+        1. 只增强 MISMATCH,不影响原有判定结果。
+        2. 模型调用失败时静默降级,保留原 suggestion。
+        3. 增强结果直接追加到 suggestion,便于前端直接展示。
+        """
+        mismatch_results = [
+            result for result in review_results
+            if result.status_code == MatchResultCode.MISMATCH.value
+            and result.has_issue
+            and result.replacement_name
+            and result.replacement_number
+        ]
+        if not mismatch_results:
+            return review_results
+
+        async def _enrich_single(result: TimelinessReviewResult) -> None:
+            async with self._mismatch_analysis_semaphore:
+                analysis = await self._generate_mismatch_analysis(result)
+                if not analysis:
+                    return
+                result.mismatch_analysis = analysis
+                if analysis not in (result.suggestion or ""):
+                    result.suggestion = f"{result.suggestion}\n{analysis}"
+
+        tasks = [_enrich_single(result) for result in mismatch_results]
+        enrich_results = await asyncio.gather(*tasks, return_exceptions=True)
+        for idx, enrich_result in enumerate(enrich_results):
+            if isinstance(enrich_result, Exception):
+                logger.warning(
+                    f"MISMATCH 细化分析失败,保留原建议。seq_no={mismatch_results[idx].seq_no}, "
+                    f"error={enrich_result}"
+                )
+
+        return review_results
+
+    async def _generate_mismatch_analysis(self, result: TimelinessReviewResult) -> Optional[str]:
+        """调用 LLM 生成适合直接展示给用户的 MISMATCH 改进建议。"""
+        input_name = self._strip_standard_name_wrapper(result.standard_name)
+        input_number = self._strip_standard_number_wrapper(result.standard_number)
+        actual_name = self._strip_standard_name_wrapper(result.replacement_name)
+        actual_number = self._strip_standard_number_wrapper(result.replacement_number)
+
+        system_prompt = (
+            "你是规范引用差异分析助手。"
+            "你的任务是比较用户引用的标准信息与标准库中的实际标准信息,"
+            "输出必须是可直接展示给用户的改进建议,严格使用指定句式。"
+        )
+        user_prompt = f"""
+请根据以下两组标准信息,输出一条可直接展示给用户的“改进建议”。
+
+【用户引用】
+- 标准名称:{input_name}
+- 标准编号:{input_number}
+
+【标准库实际记录】
+- 标准名称:{actual_name}
+- 标准编号:{actual_number}
+
+【要求】
+1. 输出必须严格为 JSON 对象,不要添加任何额外说明。
+2. JSON 中只保留一个字段:`improvement_suggestion`。
+3. `improvement_suggestion` 必须严格以 `改进建议:\\n` 开头。
+4. 你必须先判断应该是“修改”“删除”还是“补充”,并明确指出具体的词或片段,不能把所有情况都写成“修改”:
+   - 如果用户内容有多余片段,而标准库没有,该动作应为“删除”,只写出最小多余的片段。
+   - 如果用户内容缺少片段,而标准库有,该动作应为“补充”,只写出最小缺失的片段。
+   - 如果用户内容与标准库是错词替换关系,该动作应为“修改”,只写出最小差异片段。
+5. 如果是“标准号正确、名称错误”,推荐句式如下,但动作要根据第4条自行判断:
+   改进建议:\n标准号(正确标准号)对应的规范名称应为《正确规范名称》,请将“错误内容”修改为“正确内容”。
+   或:改进建议:\n标准号(正确标准号)对应的规范名称应为《正确规范名称》,请删除“多余内容”。
+   或:改进建议:\n标准号(正确标准号)对应的规范名称应为《正确规范名称》,请补充“缺失内容”。
+6. 如果是“规范名称正确、标准号错误”,也要根据第4条自行判断是修改、删除还是补充,并指出具体标准号片段。
+7. 如果名称和标准号都不一致,优先按更便于用户直接修改的方式输出一句建议,仍必须以“改进建议:\n”开头。
+8. 不要输出“编号一致,问题在名称”这类分析性描述,要直接输出修改建议。
+9. 引号内容必须尽量精确指出需要修改、删除、补充的片段。
+
+输出示例:duid
+	改进建议:
+	标准号 (GB 50021-2001)对应的规范名称应为《岩土工程勘察报告》,请修改"规范"为"报告"。
+
+    改进建议:
+	标准号(JTG D60-2015)对应的规范名称应为《公路桥涵设计通用规范》,请删除"通用"。
+    
+    改进建议:
+	《铁路工程抗震设计规范》对应的标准号应为(GB 50111-2009),请将标准号中的"(2009 年版)"修改为"(GB 50111-2006)"。
+
+输出格式:
+{{
+  "improvement_suggestion": "改进建议:\\n..."
+}}
+/no_think
+""".strip()
+
+        try:
+            raw = await generate_model_client.get_model_generate_invoke(
+                trace_id=f"timeliness_mismatch_{self.callback_task_id or 'default'}_{result.seq_no}",
+                system_prompt=system_prompt,
+                user_prompt=user_prompt,
+                model_name="shutian_qwen3_5_122b",
+                enable_thinking=False
+            )
+            payload = self._extract_first_json_object(raw)
+            suggestion_text = str(payload.get("improvement_suggestion", "")).strip()
+            if suggestion_text:
+                return suggestion_text
+        except Exception as e:
+            logger.warning(
+                f"MISMATCH LLM 细化分析失败,使用原始建议。seq_no={result.seq_no}, error={e}"
+            )
+
+        return self._build_fallback_mismatch_analysis(result)
+
+    def _extract_first_json_object(self, text: str) -> Dict[str, Any]:
+        """从模型输出中提取第一个 JSON 对象。"""
+        if not text:
+            raise ValueError("模型返回为空")
+
+        start = text.find("{")
+        if start == -1:
+            raise ValueError("未找到 JSON 起始符")
+
+        depth = 0
+        for idx in range(start, len(text)):
+            char = text[idx]
+            if char == "{":
+                depth += 1
+            elif char == "}":
+                depth -= 1
+                if depth == 0:
+                    return json.loads(text[start:idx + 1])
+
+        raise ValueError("JSON 对象未闭合")
+
+    def _build_fallback_mismatch_analysis(self, result: TimelinessReviewResult) -> str:
+        """LLM 不可用时的兜底改进建议。"""
+        input_name = self._strip_standard_name_wrapper(result.standard_name)
+        input_number = self._strip_standard_number_wrapper(result.standard_number)
+        actual_name = self._strip_standard_name_wrapper(result.replacement_name)
+        actual_number = self._strip_standard_number_wrapper(result.replacement_number)
+
+        name_same = input_name == actual_name
+        number_same = input_number == actual_number
+
+        if number_same and not name_same:
+            wrong_fragment, correct_fragment = self._find_name_diff_fragment(input_name, actual_name)
+            return (
+                f"改进建议:\n标准号({actual_number})对应的规范名称应为《{actual_name}》,"
+                f"{self._build_edit_instruction(wrong_fragment, correct_fragment)}"
+            )
+        if name_same and not number_same:
+            return (
+                f"改进建议:\n《{actual_name}》对应的标准号应为({actual_number}),"
+                f"{self._build_edit_instruction(input_number, actual_number, target_label='标准号中的')}"
+            )
+        if not name_same and not number_same:
+            wrong_fragment, correct_fragment = self._find_name_diff_fragment(input_name, actual_name)
+            return (
+                f"改进建议:\n《{input_name}》对应的标准信息应调整为《{actual_name}》({actual_number}),"
+                f"{self._build_edit_instruction(wrong_fragment, correct_fragment, target_label='名称中的')}"
+                f",并{self._build_edit_instruction(input_number, actual_number, target_label='标准号中的', with_prefix=False)}"
+            )
+        return (
+            f"改进建议:\n请将当前标准信息核对并修改为《{actual_name}》({actual_number})。"
+        )
+
+    def _strip_standard_name_wrapper(self, name: Optional[str]) -> str:
+        """去除标准名称外围书名号,便于拼接提示词。"""
+        if not name:
+            return ""
+        return str(name).strip().strip("《》")
+
+    def _strip_standard_number_wrapper(self, number: Optional[str]) -> str:
+        """去除标准编号外围括号,便于拼接提示词。"""
+        if not number:
+            return ""
+        return str(number).strip().strip("()()")
+
+    def _find_name_diff_fragment(self, wrong_name: str, correct_name: str) -> tuple[str, str]:
+        """提取名称中的主要差异片段,便于生成可执行的修改建议。"""
+        wrong_name = wrong_name or ""
+        correct_name = correct_name or ""
+
+        prefix_len = 0
+        min_len = min(len(wrong_name), len(correct_name))
+        while prefix_len < min_len and wrong_name[prefix_len] == correct_name[prefix_len]:
+            prefix_len += 1
+
+        suffix_len = 0
+        wrong_remain = wrong_name[prefix_len:]
+        correct_remain = correct_name[prefix_len:]
+        min_suffix_len = min(len(wrong_remain), len(correct_remain))
+        while (
+            suffix_len < min_suffix_len
+            and wrong_remain[-(suffix_len + 1)] == correct_remain[-(suffix_len + 1)]
+        ):
+            suffix_len += 1
+
+        if suffix_len > 0:
+            wrong_fragment = wrong_name[prefix_len:len(wrong_name) - suffix_len]
+            correct_fragment = correct_name[prefix_len:len(correct_name) - suffix_len]
+        else:
+            wrong_fragment = wrong_name[prefix_len:]
+            correct_fragment = correct_name[prefix_len:]
+
+        return wrong_fragment, correct_fragment
+
+    def _build_edit_instruction(
+        self,
+        wrong_fragment: str,
+        correct_fragment: str,
+        target_label: str = "",
+        with_prefix: bool = True
+    ) -> str:
+        """根据差异片段生成“修改/删除/补充”指令。"""
+        wrong_fragment = (wrong_fragment or "").strip()
+        correct_fragment = (correct_fragment or "").strip()
+        prefix = "请" if with_prefix else ""
+
+        if wrong_fragment and correct_fragment:
+            return f"{prefix}将{target_label}“{wrong_fragment}”修改为“{correct_fragment}”"
+        if wrong_fragment and not correct_fragment:
+            return f"{prefix}删除{target_label}“{wrong_fragment}”"
+        if not wrong_fragment and correct_fragment:
+            return f"{prefix}补充{target_label}“{correct_fragment}”"
+        return f"{prefix}核对{target_label}相关内容"
+
     def _normalize_text(self, text: str) -> str:
         """
         规范化文本用于比较(与 StandardRepository._normalize_for_matching 保持一致)
@@ -391,6 +641,7 @@ class StandardTimelinessReviewer:
                     "standard_number": result.standard_number,
                     "replacement_name": result.replacement_name,
                     "replacement_number": result.replacement_number,
+                    "mismatch_analysis": result.mismatch_analysis,
                 },
                 "exist_issue": True,
                 "risk_info": {"risk_level": result.risk_level}
@@ -427,7 +678,9 @@ async def review_standards_timeliness(
         )
     """
     async with StandardTimelinessReviewer(db_pool=db_pool, standard_service=standard_service) as reviewer:
-        return reviewer.review_standards(standards_list)
+        review_results = reviewer.review_standards(standards_list)
+        await reviewer.enrich_mismatch_details(review_results)
+        return review_results
 
 
 async def review_standard_timeliness_with_standardized_output(
@@ -454,6 +707,7 @@ async def review_standard_timeliness_with_standardized_output(
     """
     async with StandardTimelinessReviewer(db_pool=db_pool, standard_service=standard_service) as reviewer:
         review_results = reviewer.review_standards(standards_list)
+        await reviewer.enrich_mismatch_details(review_results)
         return reviewer.convert_to_standardized_format(
             review_results, check_item, chapter_code, check_item_code
         )

+ 29 - 4
core/construction_review/component/reviewers/timeliness_basis_reviewer.py

@@ -226,19 +226,31 @@ class BasisReviewService:
         pattern1 = r'《([^《》]+)》\s*(([^)]+))'
         match = re.search(pattern1, basis_text)
         if match:
-            return {
+            result = {
                 "standard_name": match.group(1).strip(),
                 "standard_number": match.group(2).strip()
             }
+            logger.info(
+                "[编制依据提取变量] "
+                f"提取standard_name={result['standard_name']}, "
+                f"提取standard_number={result['standard_number']}"
+            )
+            return result
 
         # 模式2: 《名称》(编号) - 半角括号
         pattern2 = r'《([^《》]+)》\s*\(([^)]+)\)'
         match = re.search(pattern2, basis_text)
         if match:
-            return {
+            result = {
                 "standard_name": match.group(1).strip(),
                 "standard_number": match.group(2).strip()
             }
+            logger.info(
+                "[编制依据提取变量] "
+                f"提取standard_name={result['standard_name']}, "
+                f"提取standard_number={result['standard_number']}"
+            )
+            return result
 
         # 模式3: 尝试匹配标准号格式(如 GB 1234-2020)
         standard_pattern = r'([A-Z]{2,6}(?:/[A-Z])?\s*\d{1,6}(?:\.\d)?(?:-\d{4})?)'
@@ -248,15 +260,27 @@ class BasisReviewService:
             # 尝试提取名称(在编号前的书名号内)
             name_match = re.search(r'《([^《》]+)》', basis_text)
             if name_match:
-                return {
+                result = {
                     "standard_name": name_match.group(1).strip(),
                     "standard_number": standard_number
                 }
+                logger.info(
+                    "[编制依据提取变量] "
+                    f"提取standard_name={result['standard_name']}, "
+                    f"提取standard_number={result['standard_number']}"
+                )
+                return result
             # 如果没有书名号,使用空名称
-            return {
+            result = {
                 "standard_name": "",
                 "standard_number": standard_number
             }
+            logger.info(
+                "[编制依据提取变量] "
+                f"提取standard_name={result['standard_name']}, "
+                f"提取standard_number={result['standard_number']}"
+            )
+            return result
 
         return None
 
@@ -296,6 +320,7 @@ class BasisReviewService:
                     raise RuntimeError("时效性审查器未初始化,请使用异步上下文管理器")
 
                 review_results = self._timeliness_reviewer.review_standards(standards_list)
+                await self._timeliness_reviewer.enrich_mismatch_details(review_results)
 
                 # 转换为标准格式
                 standardized_results = self._timeliness_reviewer.convert_to_standardized_format(

+ 1 - 0
core/construction_review/component/reviewers/timeliness_content_reviewer.py

@@ -281,6 +281,7 @@ class ContentTimelinessReviewer:
                     self._timeliness_reviewer.callback_task_id = callback_task_id
                 # 执行规则匹配审查
                 review_results = self._timeliness_reviewer.review_standards(standards_list)
+                await self._timeliness_reviewer.enrich_mismatch_details(review_results)
 
                 # 转换为标准格式
                 standardized_results = self._timeliness_reviewer.convert_to_standardized_format(

+ 18 - 0
core/construction_review/component/standard_matching/standard_service.py

@@ -307,6 +307,24 @@ class StandardMatcher:
 
         # 5. 使用规范化数据进行匹配
         match_by_number = self.repo.find_by_normalized_number(normalized_number)
+        if match_by_number:
+            logger.info(
+                "[standard_number_exact_match] "
+                f"seq_no={seq_no}, "
+                f"raw_number={raw_number}, "
+                f"normalized_number={normalized_number}, "
+                f"matched_db_number={match_by_number.standard_number}, "
+                f"matched_db_name={match_by_number.standard_name}, "
+                f"validity={match_by_number.validity}"
+            )
+        else:
+            logger.info(
+                "[standard_number_exact_match] "
+                f"seq_no={seq_no}, "
+                f"raw_number={raw_number}, "
+                f"normalized_number={normalized_number}, "
+                "matched=None"
+            )
 
         if match_by_number:
             # 分支A: 标准号匹配成功