|
|
@@ -34,6 +34,7 @@ from typing import List, Dict, Any, Optional
|
|
|
from dataclasses import dataclass, asdict
|
|
|
|
|
|
from foundation.observability.logger.loggering import review_logger as logger
|
|
|
+from foundation.ai.agent.generate.model_generate import generate_model_client
|
|
|
from core.construction_review.component.standard_matching import (
|
|
|
StandardMatchingService,
|
|
|
StandardMatchResult,
|
|
|
@@ -56,6 +57,7 @@ class TimelinessReviewResult:
|
|
|
risk_level: str = "low" # 风险等级(与原有逻辑一致:low/high)
|
|
|
replacement_name: Optional[str] = None # 替代标准名称
|
|
|
replacement_number: Optional[str] = None # 替代标准号
|
|
|
+ mismatch_analysis: Optional[str] = None # MISMATCH 具体差异分析
|
|
|
final_result: Optional[str] = None # 最终结果描述
|
|
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
|
@@ -93,6 +95,7 @@ class StandardTimelinessReviewer:
|
|
|
self._own_service = False # 标记是否由本实例创建 service
|
|
|
self.callback_task_id = callback_task_id
|
|
|
self._log_lock = threading.Lock()
|
|
|
+ self._mismatch_analysis_semaphore = asyncio.Semaphore(3)
|
|
|
|
|
|
async def __aenter__(self):
|
|
|
"""异步上下文管理器入口"""
|
|
|
@@ -164,6 +167,13 @@ class StandardTimelinessReviewer:
|
|
|
for match_result in match_results:
|
|
|
# 跳过 match 返回 None 的情况(文件名为空)
|
|
|
if match_result is not None:
|
|
|
+ logger.info(
|
|
|
+ "[时效性审查变量] "
|
|
|
+ f"提取standard_name={match_result.raw_name}, "
|
|
|
+ f"提取standard_number={match_result.raw_number}, "
|
|
|
+ f"数据库standard_name={match_result.matched_name or ''}, "
|
|
|
+ f"数据库standard_number={match_result.matched_number or ''}"
|
|
|
+ )
|
|
|
review_result = self._convert_match_to_review_result(match_result)
|
|
|
review_results.append(review_result)
|
|
|
|
|
|
@@ -190,6 +200,13 @@ class StandardTimelinessReviewer:
|
|
|
# 如果 match 返回 None(文件名为空),则返回 None
|
|
|
if match_result is None:
|
|
|
return None
|
|
|
+ logger.info(
|
|
|
+ "[时效性审查变量-单条] "
|
|
|
+ f"提取standard_name={match_result.raw_name}, "
|
|
|
+ f"提取standard_number={match_result.raw_number}, "
|
|
|
+ f"数据库standard_name={match_result.matched_name or ''}, "
|
|
|
+ f"数据库standard_number={match_result.matched_number or ''}"
|
|
|
+ )
|
|
|
review_result = self._convert_match_to_review_result(match_result)
|
|
|
self._log_determination_results([review_result])
|
|
|
return review_result
|
|
|
@@ -269,6 +286,7 @@ class StandardTimelinessReviewer:
|
|
|
risk_level="high",
|
|
|
replacement_name=match_result.substitute_name,
|
|
|
replacement_number=match_result.substitute_number,
|
|
|
+ mismatch_analysis=None,
|
|
|
final_result=match_result.final_result
|
|
|
)
|
|
|
|
|
|
@@ -301,6 +319,238 @@ class StandardTimelinessReviewer:
|
|
|
final_result=match_result.final_result
|
|
|
)
|
|
|
|
|
|
+ async def enrich_mismatch_details(
|
|
|
+ self,
|
|
|
+ review_results: List[TimelinessReviewResult]
|
|
|
+ ) -> List[TimelinessReviewResult]:
|
|
|
+ """
|
|
|
+ 使用 LLM 补充 MISMATCH 的具体差异说明。
|
|
|
+
|
|
|
+ 设计原则:
|
|
|
+ 1. 只增强 MISMATCH,不影响原有判定结果。
|
|
|
+ 2. 模型调用失败时静默降级,保留原 suggestion。
|
|
|
+ 3. 增强结果直接追加到 suggestion,便于前端直接展示。
|
|
|
+ """
|
|
|
+ mismatch_results = [
|
|
|
+ result for result in review_results
|
|
|
+ if result.status_code == MatchResultCode.MISMATCH.value
|
|
|
+ and result.has_issue
|
|
|
+ and result.replacement_name
|
|
|
+ and result.replacement_number
|
|
|
+ ]
|
|
|
+ if not mismatch_results:
|
|
|
+ return review_results
|
|
|
+
|
|
|
+ async def _enrich_single(result: TimelinessReviewResult) -> None:
|
|
|
+ async with self._mismatch_analysis_semaphore:
|
|
|
+ analysis = await self._generate_mismatch_analysis(result)
|
|
|
+ if not analysis:
|
|
|
+ return
|
|
|
+ result.mismatch_analysis = analysis
|
|
|
+ if analysis not in (result.suggestion or ""):
|
|
|
+ result.suggestion = f"{result.suggestion}\n{analysis}"
|
|
|
+
|
|
|
+ tasks = [_enrich_single(result) for result in mismatch_results]
|
|
|
+ enrich_results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
|
+ for idx, enrich_result in enumerate(enrich_results):
|
|
|
+ if isinstance(enrich_result, Exception):
|
|
|
+ logger.warning(
|
|
|
+ f"MISMATCH 细化分析失败,保留原建议。seq_no={mismatch_results[idx].seq_no}, "
|
|
|
+ f"error={enrich_result}"
|
|
|
+ )
|
|
|
+
|
|
|
+ return review_results
|
|
|
+
|
|
|
+ async def _generate_mismatch_analysis(self, result: TimelinessReviewResult) -> Optional[str]:
|
|
|
+ """调用 LLM 生成适合直接展示给用户的 MISMATCH 改进建议。"""
|
|
|
+ input_name = self._strip_standard_name_wrapper(result.standard_name)
|
|
|
+ input_number = self._strip_standard_number_wrapper(result.standard_number)
|
|
|
+ actual_name = self._strip_standard_name_wrapper(result.replacement_name)
|
|
|
+ actual_number = self._strip_standard_number_wrapper(result.replacement_number)
|
|
|
+
|
|
|
+ system_prompt = (
|
|
|
+ "你是规范引用差异分析助手。"
|
|
|
+ "你的任务是比较用户引用的标准信息与标准库中的实际标准信息,"
|
|
|
+ "输出必须是可直接展示给用户的改进建议,严格使用指定句式。"
|
|
|
+ )
|
|
|
+ user_prompt = f"""
|
|
|
+请根据以下两组标准信息,输出一条可直接展示给用户的“改进建议”。
|
|
|
+
|
|
|
+【用户引用】
|
|
|
+- 标准名称:{input_name}
|
|
|
+- 标准编号:{input_number}
|
|
|
+
|
|
|
+【标准库实际记录】
|
|
|
+- 标准名称:{actual_name}
|
|
|
+- 标准编号:{actual_number}
|
|
|
+
|
|
|
+【要求】
|
|
|
+1. 输出必须严格为 JSON 对象,不要添加任何额外说明。
|
|
|
+2. JSON 中只保留一个字段:`improvement_suggestion`。
|
|
|
+3. `improvement_suggestion` 必须严格以 `改进建议:\\n` 开头。
|
|
|
+4. 你必须先判断应该是“修改”“删除”还是“补充”,并明确指出具体的词或片段,不能把所有情况都写成“修改”:
|
|
|
+ - 如果用户内容有多余片段,而标准库没有,该动作应为“删除”,只写出最小多余的片段。
|
|
|
+ - 如果用户内容缺少片段,而标准库有,该动作应为“补充”,只写出最小缺失的片段。
|
|
|
+ - 如果用户内容与标准库是错词替换关系,该动作应为“修改”,只写出最小差异片段。
|
|
|
+5. 如果是“标准号正确、名称错误”,推荐句式如下,但动作要根据第4条自行判断:
|
|
|
+ 改进建议:\n标准号(正确标准号)对应的规范名称应为《正确规范名称》,请将“错误内容”修改为“正确内容”。
|
|
|
+ 或:改进建议:\n标准号(正确标准号)对应的规范名称应为《正确规范名称》,请删除“多余内容”。
|
|
|
+ 或:改进建议:\n标准号(正确标准号)对应的规范名称应为《正确规范名称》,请补充“缺失内容”。
|
|
|
+6. 如果是“规范名称正确、标准号错误”,也要根据第4条自行判断是修改、删除还是补充,并指出具体标准号片段。
|
|
|
+7. 如果名称和标准号都不一致,优先按更便于用户直接修改的方式输出一句建议,仍必须以“改进建议:\n”开头。
|
|
|
+8. 不要输出“编号一致,问题在名称”这类分析性描述,要直接输出修改建议。
|
|
|
+9. 引号内容必须尽量精确指出需要修改、删除、补充的片段。
|
|
|
+
|
|
|
+输出示例:duid
|
|
|
+ 改进建议:
|
|
|
+ 标准号 (GB 50021-2001)对应的规范名称应为《岩土工程勘察报告》,请修改"规范"为"报告"。
|
|
|
+
|
|
|
+ 改进建议:
|
|
|
+ 标准号(JTG D60-2015)对应的规范名称应为《公路桥涵设计通用规范》,请删除"通用"。
|
|
|
+
|
|
|
+ 改进建议:
|
|
|
+ 《铁路工程抗震设计规范》对应的标准号应为(GB 50111-2009),请将标准号中的"(2009 年版)"修改为"(GB 50111-2006)"。
|
|
|
+
|
|
|
+输出格式:
|
|
|
+{{
|
|
|
+ "improvement_suggestion": "改进建议:\\n..."
|
|
|
+}}
|
|
|
+/no_think
|
|
|
+""".strip()
|
|
|
+
|
|
|
+ try:
|
|
|
+ raw = await generate_model_client.get_model_generate_invoke(
|
|
|
+ trace_id=f"timeliness_mismatch_{self.callback_task_id or 'default'}_{result.seq_no}",
|
|
|
+ system_prompt=system_prompt,
|
|
|
+ user_prompt=user_prompt,
|
|
|
+ model_name="shutian_qwen3_5_122b",
|
|
|
+ enable_thinking=False
|
|
|
+ )
|
|
|
+ payload = self._extract_first_json_object(raw)
|
|
|
+ suggestion_text = str(payload.get("improvement_suggestion", "")).strip()
|
|
|
+ if suggestion_text:
|
|
|
+ return suggestion_text
|
|
|
+ except Exception as e:
|
|
|
+ logger.warning(
|
|
|
+ f"MISMATCH LLM 细化分析失败,使用原始建议。seq_no={result.seq_no}, error={e}"
|
|
|
+ )
|
|
|
+
|
|
|
+ return self._build_fallback_mismatch_analysis(result)
|
|
|
+
|
|
|
+ def _extract_first_json_object(self, text: str) -> Dict[str, Any]:
|
|
|
+ """从模型输出中提取第一个 JSON 对象。"""
|
|
|
+ if not text:
|
|
|
+ raise ValueError("模型返回为空")
|
|
|
+
|
|
|
+ start = text.find("{")
|
|
|
+ if start == -1:
|
|
|
+ raise ValueError("未找到 JSON 起始符")
|
|
|
+
|
|
|
+ depth = 0
|
|
|
+ for idx in range(start, len(text)):
|
|
|
+ char = text[idx]
|
|
|
+ if char == "{":
|
|
|
+ depth += 1
|
|
|
+ elif char == "}":
|
|
|
+ depth -= 1
|
|
|
+ if depth == 0:
|
|
|
+ return json.loads(text[start:idx + 1])
|
|
|
+
|
|
|
+ raise ValueError("JSON 对象未闭合")
|
|
|
+
|
|
|
+ def _build_fallback_mismatch_analysis(self, result: TimelinessReviewResult) -> str:
|
|
|
+ """LLM 不可用时的兜底改进建议。"""
|
|
|
+ input_name = self._strip_standard_name_wrapper(result.standard_name)
|
|
|
+ input_number = self._strip_standard_number_wrapper(result.standard_number)
|
|
|
+ actual_name = self._strip_standard_name_wrapper(result.replacement_name)
|
|
|
+ actual_number = self._strip_standard_number_wrapper(result.replacement_number)
|
|
|
+
|
|
|
+ name_same = input_name == actual_name
|
|
|
+ number_same = input_number == actual_number
|
|
|
+
|
|
|
+ if number_same and not name_same:
|
|
|
+ wrong_fragment, correct_fragment = self._find_name_diff_fragment(input_name, actual_name)
|
|
|
+ return (
|
|
|
+ f"改进建议:\n标准号({actual_number})对应的规范名称应为《{actual_name}》,"
|
|
|
+ f"{self._build_edit_instruction(wrong_fragment, correct_fragment)}"
|
|
|
+ )
|
|
|
+ if name_same and not number_same:
|
|
|
+ return (
|
|
|
+ f"改进建议:\n《{actual_name}》对应的标准号应为({actual_number}),"
|
|
|
+ f"{self._build_edit_instruction(input_number, actual_number, target_label='标准号中的')}"
|
|
|
+ )
|
|
|
+ if not name_same and not number_same:
|
|
|
+ wrong_fragment, correct_fragment = self._find_name_diff_fragment(input_name, actual_name)
|
|
|
+ return (
|
|
|
+ f"改进建议:\n《{input_name}》对应的标准信息应调整为《{actual_name}》({actual_number}),"
|
|
|
+ f"{self._build_edit_instruction(wrong_fragment, correct_fragment, target_label='名称中的')}"
|
|
|
+ f",并{self._build_edit_instruction(input_number, actual_number, target_label='标准号中的', with_prefix=False)}"
|
|
|
+ )
|
|
|
+ return (
|
|
|
+ f"改进建议:\n请将当前标准信息核对并修改为《{actual_name}》({actual_number})。"
|
|
|
+ )
|
|
|
+
|
|
|
+ def _strip_standard_name_wrapper(self, name: Optional[str]) -> str:
|
|
|
+ """去除标准名称外围书名号,便于拼接提示词。"""
|
|
|
+ if not name:
|
|
|
+ return ""
|
|
|
+ return str(name).strip().strip("《》")
|
|
|
+
|
|
|
+ def _strip_standard_number_wrapper(self, number: Optional[str]) -> str:
|
|
|
+ """去除标准编号外围括号,便于拼接提示词。"""
|
|
|
+ if not number:
|
|
|
+ return ""
|
|
|
+ return str(number).strip().strip("()()")
|
|
|
+
|
|
|
+ def _find_name_diff_fragment(self, wrong_name: str, correct_name: str) -> tuple[str, str]:
|
|
|
+ """提取名称中的主要差异片段,便于生成可执行的修改建议。"""
|
|
|
+ wrong_name = wrong_name or ""
|
|
|
+ correct_name = correct_name or ""
|
|
|
+
|
|
|
+ prefix_len = 0
|
|
|
+ min_len = min(len(wrong_name), len(correct_name))
|
|
|
+ while prefix_len < min_len and wrong_name[prefix_len] == correct_name[prefix_len]:
|
|
|
+ prefix_len += 1
|
|
|
+
|
|
|
+ suffix_len = 0
|
|
|
+ wrong_remain = wrong_name[prefix_len:]
|
|
|
+ correct_remain = correct_name[prefix_len:]
|
|
|
+ min_suffix_len = min(len(wrong_remain), len(correct_remain))
|
|
|
+ while (
|
|
|
+ suffix_len < min_suffix_len
|
|
|
+ and wrong_remain[-(suffix_len + 1)] == correct_remain[-(suffix_len + 1)]
|
|
|
+ ):
|
|
|
+ suffix_len += 1
|
|
|
+
|
|
|
+ if suffix_len > 0:
|
|
|
+ wrong_fragment = wrong_name[prefix_len:len(wrong_name) - suffix_len]
|
|
|
+ correct_fragment = correct_name[prefix_len:len(correct_name) - suffix_len]
|
|
|
+ else:
|
|
|
+ wrong_fragment = wrong_name[prefix_len:]
|
|
|
+ correct_fragment = correct_name[prefix_len:]
|
|
|
+
|
|
|
+ return wrong_fragment, correct_fragment
|
|
|
+
|
|
|
+ def _build_edit_instruction(
|
|
|
+ self,
|
|
|
+ wrong_fragment: str,
|
|
|
+ correct_fragment: str,
|
|
|
+ target_label: str = "",
|
|
|
+ with_prefix: bool = True
|
|
|
+ ) -> str:
|
|
|
+ """根据差异片段生成“修改/删除/补充”指令。"""
|
|
|
+ wrong_fragment = (wrong_fragment or "").strip()
|
|
|
+ correct_fragment = (correct_fragment or "").strip()
|
|
|
+ prefix = "请" if with_prefix else ""
|
|
|
+
|
|
|
+ if wrong_fragment and correct_fragment:
|
|
|
+ return f"{prefix}将{target_label}“{wrong_fragment}”修改为“{correct_fragment}”"
|
|
|
+ if wrong_fragment and not correct_fragment:
|
|
|
+ return f"{prefix}删除{target_label}“{wrong_fragment}”"
|
|
|
+ if not wrong_fragment and correct_fragment:
|
|
|
+ return f"{prefix}补充{target_label}“{correct_fragment}”"
|
|
|
+ return f"{prefix}核对{target_label}相关内容"
|
|
|
+
|
|
|
def _normalize_text(self, text: str) -> str:
|
|
|
"""
|
|
|
规范化文本用于比较(与 StandardRepository._normalize_for_matching 保持一致)
|
|
|
@@ -391,6 +641,7 @@ class StandardTimelinessReviewer:
|
|
|
"standard_number": result.standard_number,
|
|
|
"replacement_name": result.replacement_name,
|
|
|
"replacement_number": result.replacement_number,
|
|
|
+ "mismatch_analysis": result.mismatch_analysis,
|
|
|
},
|
|
|
"exist_issue": True,
|
|
|
"risk_info": {"risk_level": result.risk_level}
|
|
|
@@ -427,7 +678,9 @@ async def review_standards_timeliness(
|
|
|
)
|
|
|
"""
|
|
|
async with StandardTimelinessReviewer(db_pool=db_pool, standard_service=standard_service) as reviewer:
|
|
|
- return reviewer.review_standards(standards_list)
|
|
|
+ review_results = reviewer.review_standards(standards_list)
|
|
|
+ await reviewer.enrich_mismatch_details(review_results)
|
|
|
+ return review_results
|
|
|
|
|
|
|
|
|
async def review_standard_timeliness_with_standardized_output(
|
|
|
@@ -454,6 +707,7 @@ async def review_standard_timeliness_with_standardized_output(
|
|
|
"""
|
|
|
async with StandardTimelinessReviewer(db_pool=db_pool, standard_service=standard_service) as reviewer:
|
|
|
review_results = reviewer.review_standards(standards_list)
|
|
|
+ await reviewer.enrich_mismatch_details(review_results)
|
|
|
return reviewer.convert_to_standardized_format(
|
|
|
review_results, check_item, chapter_code, check_item_code
|
|
|
)
|