|
@@ -15,6 +15,9 @@ from typing import Dict, List, Optional, Set, Tuple, Any
|
|
|
from dataclasses import dataclass, field
|
|
from dataclasses import dataclass, field
|
|
|
from collections import defaultdict
|
|
from collections import defaultdict
|
|
|
from pathlib import Path
|
|
from pathlib import Path
|
|
|
|
|
+import json
|
|
|
|
|
+
|
|
|
|
|
+from foundation.observability.logger.loggering import review_logger as logger
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
@dataclass
|
|
@@ -180,18 +183,42 @@ class TertiarySpecLoader:
|
|
|
|
|
|
|
|
class LightweightCompletenessChecker:
|
|
class LightweightCompletenessChecker:
|
|
|
"""轻量级完整性检查器"""
|
|
"""轻量级完整性检查器"""
|
|
|
-
|
|
|
|
|
- def __init__(self, standard_csv_path: str):
|
|
|
|
|
|
|
+
|
|
|
|
|
+ def __init__(self, standard_csv_path: str, model_client=None, prompt_loader=None):
|
|
|
"""
|
|
"""
|
|
|
初始化检查器
|
|
初始化检查器
|
|
|
-
|
|
|
|
|
|
|
+
|
|
|
Args:
|
|
Args:
|
|
|
standard_csv_path: StandardCategoryTable.csv 文件路径
|
|
standard_csv_path: StandardCategoryTable.csv 文件路径
|
|
|
|
|
+ model_client: 模型客户端(可选),用于生成智能建议
|
|
|
|
|
+ prompt_loader: 提示词加载器(可选)
|
|
|
"""
|
|
"""
|
|
|
self.spec_loader = TertiarySpecLoader(standard_csv_path)
|
|
self.spec_loader = TertiarySpecLoader(standard_csv_path)
|
|
|
self.tertiary_specs = self.spec_loader.get_tertiary_items()
|
|
self.tertiary_specs = self.spec_loader.get_tertiary_items()
|
|
|
self.secondary_specs = self.spec_loader.get_secondary_items()
|
|
self.secondary_specs = self.spec_loader.get_secondary_items()
|
|
|
self.secondary_names = self.spec_loader.get_secondary_names()
|
|
self.secondary_names = self.spec_loader.get_secondary_names()
|
|
|
|
|
+
|
|
|
|
|
+ # 大模型客户端和提示词加载器(用于生成智能建议)
|
|
|
|
|
+ self.model_client = model_client
|
|
|
|
|
+ self.prompt_loader = prompt_loader
|
|
|
|
|
+
|
|
|
|
|
+ # 如果没有提供model_client,尝试从foundation导入
|
|
|
|
|
+ if self.model_client is None:
|
|
|
|
|
+ try:
|
|
|
|
|
+ from foundation.ai.agent.generate.model_generate import generate_model_client
|
|
|
|
|
+ self.model_client = generate_model_client
|
|
|
|
|
+ except ImportError:
|
|
|
|
|
+ logger.warning("无法导入generate_model_client,建议生成功能将使用简单拼接模式")
|
|
|
|
|
+ self.model_client = None
|
|
|
|
|
+
|
|
|
|
|
+ # 如果没有提供prompt_loader,尝试从当前模块导入
|
|
|
|
|
+ if self.prompt_loader is None:
|
|
|
|
|
+ try:
|
|
|
|
|
+ from .utils.prompt_loader import prompt_loader
|
|
|
|
|
+ self.prompt_loader = prompt_loader
|
|
|
|
|
+ except ImportError:
|
|
|
|
|
+ logger.warning("无法导入prompt_loader,建议生成功能将使用简单拼接模式")
|
|
|
|
|
+ self.prompt_loader = None
|
|
|
|
|
|
|
|
def _normalize_chapter_code(self, code: str) -> str:
|
|
def _normalize_chapter_code(self, code: str) -> str:
|
|
|
"""将章节分类码大小写归一化为与CSV一致(如 'management' -> 'management')"""
|
|
"""将章节分类码大小写归一化为与CSV一致(如 'management' -> 'management')"""
|
|
@@ -202,6 +229,198 @@ class LightweightCompletenessChecker:
|
|
|
return k
|
|
return k
|
|
|
return code
|
|
return code
|
|
|
|
|
|
|
|
|
|
+ def _build_llm_prompt_for_recommendation(
|
|
|
|
|
+ self,
|
|
|
|
|
+ level: str,
|
|
|
|
|
+ first_code: str,
|
|
|
|
|
+ first_name: str,
|
|
|
|
|
+ second_code: str = None,
|
|
|
|
|
+ second_name: str = None,
|
|
|
|
|
+ tertiary_items: List[TertiaryItem] = None,
|
|
|
|
|
+ outline_title: str = None
|
|
|
|
|
+ ) -> str:
|
|
|
|
|
+ """
|
|
|
|
|
+ 构建用于LLM生成建议的prompt
|
|
|
|
|
+
|
|
|
|
|
+ Args:
|
|
|
|
|
+ level: 缺失级别(一级 / 二级 / 三级 / 一致性)
|
|
|
|
|
+ first_code: 一级分类代码
|
|
|
|
|
+ first_name: 一级分类名称
|
|
|
|
|
+ second_code: 二级分类代码(可选)
|
|
|
|
|
+ second_name: 二级分类名称(可选)
|
|
|
|
|
+ tertiary_items: 缺失的三级分类项列表(可选)
|
|
|
|
|
+ outline_title: 目录中的标题(用于一致性检查)
|
|
|
|
|
+
|
|
|
|
|
+ Returns:
|
|
|
|
|
+ str: 构建的prompt
|
|
|
|
|
+ """
|
|
|
|
|
+ # 构建问题上下文
|
|
|
|
|
+ if level == "一级":
|
|
|
|
|
+ context = f"""
|
|
|
|
|
+【问题类型】一级章节缺失
|
|
|
|
|
+【缺失章节】{first_name} ({first_code})
|
|
|
|
|
+【问题描述】文档中缺少'{first_name}'整个章节,这是专项施工方案中必须包含的一级章节。"""
|
|
|
|
|
+ # 获取该一级下的所有二级和三级信息作为参考
|
|
|
|
|
+ related_specs = []
|
|
|
|
|
+ for (fc, sc), sec_item in self.secondary_specs.items():
|
|
|
|
|
+ if fc == first_code:
|
|
|
|
|
+ # 获取该二级下的所有三级
|
|
|
|
|
+ tertiary_list = self.spec_loader.get_tertiary_by_secondary(fc, sc)
|
|
|
|
|
+ tertiary_info = []
|
|
|
|
|
+ for t_item in tertiary_list:
|
|
|
|
|
+ tertiary_info.append(f" - {t_item.third_cn}: {t_item.third_focus}")
|
|
|
|
|
+ related_specs.append(f"""
|
|
|
|
|
+ 【二级分类】{sec_item.second_cn}
|
|
|
|
|
+ 【包含的三级内容要点】
|
|
|
|
|
+{chr(10).join(tertiary_info)}""")
|
|
|
|
|
+
|
|
|
|
|
+ reference = f"""
|
|
|
|
|
+【规范参考信息】
|
|
|
|
|
+根据《桥梁公司危险性较大工程管理实施细则(2025版)》,'{first_name}'章节应包含以下内容:
|
|
|
|
|
+{chr(10).join(related_specs)}
|
|
|
|
|
+"""
|
|
|
|
|
+
|
|
|
|
|
+ elif level == "二级":
|
|
|
|
|
+ context = f"""
|
|
|
|
|
+【问题类型】二级章节缺失
|
|
|
|
|
+【所属一级】{first_name} ({first_code})
|
|
|
|
|
+【缺失章节】{second_name} ({second_code})
|
|
|
|
|
+【问题描述】'{first_name}'下缺少'{second_name}'二级章节。"""
|
|
|
|
|
+ # 获取该二级下的所有三级信息
|
|
|
|
|
+ tertiary_list = self.spec_loader.get_tertiary_by_secondary(first_code, second_code)
|
|
|
|
|
+ tertiary_info = []
|
|
|
|
|
+ for t_item in tertiary_list:
|
|
|
|
|
+ tertiary_info.append(f" - {t_item.third_cn}: {t_item.third_focus}")
|
|
|
|
|
+
|
|
|
|
|
+ reference = f"""
|
|
|
|
|
+【规范参考信息】
|
|
|
|
|
+根据《桥梁公司危险性较大工程管理实施细则(2025版)》,'{second_name}'章节应包含以下三级内容要点:
|
|
|
|
|
+{chr(10).join(tertiary_info)}
|
|
|
|
|
+"""
|
|
|
|
|
+
|
|
|
|
|
+ elif level == "三级":
|
|
|
|
|
+ context = f"""
|
|
|
|
|
+【问题类型】三级内容缺失
|
|
|
|
|
+【所属一级】{first_name} ({first_code})
|
|
|
|
|
+【所属二级】{second_name} ({second_code})
|
|
|
|
|
+【缺失内容】"""
|
|
|
|
|
+ missing_contents = []
|
|
|
|
|
+ for item in tertiary_items or []:
|
|
|
|
|
+ missing_contents.append(f" - {item.third_cn}: {item.third_focus}")
|
|
|
|
|
+ context += "\n" + "\n".join(missing_contents)
|
|
|
|
|
+
|
|
|
|
|
+ reference = f"""
|
|
|
|
|
+【规范参考信息】
|
|
|
|
|
+以上缺失的内容要点是'{second_name}'章节下的标准内容要求,具体包括:
|
|
|
|
|
+{chr(10).join([f' - {t.third_cn}: 应包含{t.third_focus}' for t in (tertiary_items or [])])}
|
|
|
|
|
+"""
|
|
|
|
|
+
|
|
|
|
|
+ elif level == "一致性":
|
|
|
|
|
+ context = f"""
|
|
|
|
|
+【问题类型】目录与正文不一致
|
|
|
|
|
+【涉及章节】{outline_title or second_name}
|
|
|
|
|
+【问题描述】目录页列有该章节,但正文中未发现对应内容。"""
|
|
|
|
|
+ reference = """
|
|
|
|
|
+【规范参考信息】
|
|
|
|
|
+根据文档一致性要求,目录中列出的章节应在正文中有对应的内容描述。若该章节确实不需要,应从目录中移除;若需要保留,则必须补充正文内容。
|
|
|
|
|
+"""
|
|
|
|
|
+ else:
|
|
|
|
|
+ context = "【问题类型】未知"
|
|
|
|
|
+ reference = ""
|
|
|
|
|
+
|
|
|
|
|
+ prompt = f"""你是一位资深的工程施工方案审查专家。请根据以下问题上下文和规范参考信息,生成专业的审查建议。
|
|
|
|
|
+
|
|
|
|
|
+{context}
|
|
|
|
|
+
|
|
|
|
|
+{reference}
|
|
|
|
|
+
|
|
|
|
|
+请用JSON格式输出审查建议,包含以下字段:
|
|
|
|
|
+- issue_point: 问题摘要(简洁明了,50字以内)
|
|
|
|
|
+- suggestion: 具体补充建议(详细可行,100-200字,包含具体应该补充的内容要点)
|
|
|
|
|
+- reason: 规范依据说明(引用具体规范要求,说明为什么需要补充)
|
|
|
|
|
+
|
|
|
|
|
+注意:
|
|
|
|
|
+1. suggestion应该具体、可操作,引用规范中的具体内容要求
|
|
|
|
|
+2. 使用专业的工程术语
|
|
|
|
|
+3. 语气应该是指导性的,帮助编制人员理解需要补充什么内容
|
|
|
|
|
+
|
|
|
|
|
+JSON输出:"""
|
|
|
|
|
+ return prompt
|
|
|
|
|
+
|
|
|
|
|
+ async def _generate_recommendation_with_llm(
|
|
|
|
|
+ self,
|
|
|
|
|
+ level: str,
|
|
|
|
|
+ first_code: str,
|
|
|
|
|
+ first_name: str,
|
|
|
|
|
+ second_code: str = None,
|
|
|
|
|
+ second_name: str = None,
|
|
|
|
|
+ tertiary_items: List[TertiaryItem] = None,
|
|
|
|
|
+ outline_title: str = None,
|
|
|
|
|
+ timeout: int = 30
|
|
|
|
|
+ ) -> Dict[str, str]:
|
|
|
|
|
+ """
|
|
|
|
|
+ 使用大模型生成建议
|
|
|
|
|
+
|
|
|
|
|
+ Returns:
|
|
|
|
|
+ Dict[str, str]: 包含 issue_point, suggestion, reason 的字典
|
|
|
|
|
+ """
|
|
|
|
|
+ if not self.model_client:
|
|
|
|
|
+ return None
|
|
|
|
|
+
|
|
|
|
|
+ try:
|
|
|
|
|
+ prompt = self._build_llm_prompt_for_recommendation(
|
|
|
|
|
+ level=level,
|
|
|
|
|
+ first_code=first_code,
|
|
|
|
|
+ first_name=first_name,
|
|
|
|
|
+ second_code=second_code,
|
|
|
|
|
+ second_name=second_name,
|
|
|
|
|
+ tertiary_items=tertiary_items,
|
|
|
|
|
+ outline_title=outline_title
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ # 调用大模型
|
|
|
|
|
+ task_prompt_info = {
|
|
|
|
|
+ "task_prompt": prompt,
|
|
|
|
|
+ "task_name": f"completeness_suggestion_{level}"
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ # 生成唯一trace_id
|
|
|
|
|
+ import uuid
|
|
|
|
|
+ trace_id = f"completeness_llm_{uuid.uuid4().hex[:8]}"
|
|
|
|
|
+
|
|
|
|
|
+ model_response = await self.model_client.get_model_generate_invoke(
|
|
|
|
|
+ trace_id=trace_id,
|
|
|
|
|
+ task_prompt_info=task_prompt_info,
|
|
|
|
|
+ timeout=timeout,
|
|
|
|
|
+ model_name="qwen" # 使用默认模型,可根据需要调整
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ # 解析模型返回的JSON
|
|
|
|
|
+ try:
|
|
|
|
|
+ # 尝试从返回文本中提取JSON
|
|
|
|
|
+ response_text = model_response.strip()
|
|
|
|
|
+ # 查找JSON块
|
|
|
|
|
+ if "```json" in response_text:
|
|
|
|
|
+ json_str = response_text.split("```json")[1].split("```")[0].strip()
|
|
|
|
|
+ elif "```" in response_text:
|
|
|
|
|
+ json_str = response_text.split("```")[1].split("```")[0].strip()
|
|
|
|
|
+ else:
|
|
|
|
|
+ json_str = response_text
|
|
|
|
|
+
|
|
|
|
|
+ result = json.loads(json_str)
|
|
|
|
|
+ return {
|
|
|
|
|
+ "issue_point": result.get("issue_point", ""),
|
|
|
|
|
+ "suggestion": result.get("suggestion", ""),
|
|
|
|
|
+ "reason": result.get("reason", "")
|
|
|
|
|
+ }
|
|
|
|
|
+ except (json.JSONDecodeError, IndexError) as e:
|
|
|
|
|
+ logger.warning(f"LLM建议生成结果解析失败: {e},返回: {model_response[:200]}")
|
|
|
|
|
+ return None
|
|
|
|
|
+
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ logger.warning(f"LLM建议生成失败: {e}")
|
|
|
|
|
+ return None
|
|
|
|
|
+
|
|
|
async def check(
|
|
async def check(
|
|
|
self,
|
|
self,
|
|
|
chunks: List[Dict],
|
|
chunks: List[Dict],
|
|
@@ -259,7 +478,7 @@ class LightweightCompletenessChecker:
|
|
|
|
|
|
|
|
# 7. 生成分级建议
|
|
# 7. 生成分级建议
|
|
|
actual_first = {cat1 for cat1, _ in actual_secondary}
|
|
actual_first = {cat1 for cat1, _ in actual_secondary}
|
|
|
- recommendations = self._generate_recommendations(
|
|
|
|
|
|
|
+ recommendations = await self._generate_recommendations(
|
|
|
tertiary_result, catalogue_result, outline_result,
|
|
tertiary_result, catalogue_result, outline_result,
|
|
|
actual_first, actual_secondary, actual_tertiary,
|
|
actual_first, actual_secondary, actual_tertiary,
|
|
|
chapter_classification
|
|
chapter_classification
|
|
@@ -636,7 +855,7 @@ class LightweightCompletenessChecker:
|
|
|
else:
|
|
else:
|
|
|
return "incomplete"
|
|
return "incomplete"
|
|
|
|
|
|
|
|
- def _generate_recommendations(
|
|
|
|
|
|
|
+ async def _generate_recommendations(
|
|
|
self,
|
|
self,
|
|
|
tertiary_result: Dict,
|
|
tertiary_result: Dict,
|
|
|
catalogue_result: Dict,
|
|
catalogue_result: Dict,
|
|
@@ -653,8 +872,8 @@ class LightweightCompletenessChecker:
|
|
|
level : 缺失级别(一级 / 二级 / 三级 / 一致性)
|
|
level : 缺失级别(一级 / 二级 / 三级 / 一致性)
|
|
|
issue_point : 问题摘要(含级别标识)
|
|
issue_point : 问题摘要(含级别标识)
|
|
|
location : 问题定位路径
|
|
location : 问题定位路径
|
|
|
- suggestion : 补充建议
|
|
|
|
|
- reason : 规范依据说明
|
|
|
|
|
|
|
+ suggestion : 补充建议(使用LLM生成)
|
|
|
|
|
+ reason : 规范依据说明(使用LLM生成)
|
|
|
"""
|
|
"""
|
|
|
recommendations: List[Dict[str, Any]] = []
|
|
recommendations: List[Dict[str, Any]] = []
|
|
|
|
|
|
|
@@ -679,17 +898,36 @@ class LightweightCompletenessChecker:
|
|
|
|
|
|
|
|
# ── 一级缺失 ──────────────────────────────────────────────
|
|
# ── 一级缺失 ──────────────────────────────────────────────
|
|
|
if first_code not in actual_first:
|
|
if first_code not in actual_first:
|
|
|
- recommendations.append({
|
|
|
|
|
- "level": "一级",
|
|
|
|
|
- "issue_point": f"【一级章节缺失】'{first_name}'整个章节不存在",
|
|
|
|
|
- "location": first_name,
|
|
|
|
|
- "suggestion": f"请添加'{first_name}'章节及其下全部子章节内容",
|
|
|
|
|
- "reason": (
|
|
|
|
|
- f"根据规范要求,文档必须包含'{first_name}'一级章节,"
|
|
|
|
|
- f"当前正文中未发现该章节任何内容"
|
|
|
|
|
- ),
|
|
|
|
|
- "first_seq": first_seq,
|
|
|
|
|
- })
|
|
|
|
|
|
|
+ # 尝试使用LLM生成建议
|
|
|
|
|
+ llm_result = await self._generate_recommendation_with_llm(
|
|
|
|
|
+ level="一级",
|
|
|
|
|
+ first_code=first_code,
|
|
|
|
|
+ first_name=first_name,
|
|
|
|
|
+ first_seq=first_seq
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ if llm_result:
|
|
|
|
|
+ recommendations.append({
|
|
|
|
|
+ "level": "一级",
|
|
|
|
|
+ "issue_point": llm_result.get("issue_point", f"【一级章节缺失】'{first_name}'整个章节不存在"),
|
|
|
|
|
+ "location": first_name,
|
|
|
|
|
+ "suggestion": llm_result.get("suggestion", f"请添加'{first_name}'章节及其下全部子章节内容"),
|
|
|
|
|
+ "reason": llm_result.get("reason", f"根据规范要求,文档必须包含'{first_name}'一级章节,当前正文中未发现该章节任何内容"),
|
|
|
|
|
+ "first_seq": first_seq,
|
|
|
|
|
+ })
|
|
|
|
|
+ else:
|
|
|
|
|
+ # 回退到简单拼接
|
|
|
|
|
+ recommendations.append({
|
|
|
|
|
+ "level": "一级",
|
|
|
|
|
+ "issue_point": f"【一级章节缺失】'{first_name}'整个章节不存在",
|
|
|
|
|
+ "location": first_name,
|
|
|
|
|
+ "suggestion": f"请添加'{first_name}'章节及其下全部子章节内容",
|
|
|
|
|
+ "reason": (
|
|
|
|
|
+ f"根据规范要求,文档必须包含'{first_name}'一级章节,"
|
|
|
|
|
+ f"当前正文中未发现该章节任何内容"
|
|
|
|
|
+ ),
|
|
|
|
|
+ "first_seq": first_seq,
|
|
|
|
|
+ })
|
|
|
continue
|
|
continue
|
|
|
|
|
|
|
|
# ── 一级存在,检查二级 ─────────────────────────────────────
|
|
# ── 一级存在,检查二级 ─────────────────────────────────────
|
|
@@ -703,20 +941,41 @@ class LightweightCompletenessChecker:
|
|
|
|
|
|
|
|
# ── 二级缺失 ──────────────────────────────────────────
|
|
# ── 二级缺失 ──────────────────────────────────────────
|
|
|
if (cat1, cat2) not in actual_secondary:
|
|
if (cat1, cat2) not in actual_secondary:
|
|
|
- recommendations.append({
|
|
|
|
|
- "level": "二级",
|
|
|
|
|
- "issue_point": (
|
|
|
|
|
- f"【二级章节缺失】{first_name} > '{second_name}'整个章节不存在"
|
|
|
|
|
- ),
|
|
|
|
|
- "location": f"{first_name} > {second_name}",
|
|
|
|
|
- "suggestion": f"请在'{first_name}'下添加'{second_name}'章节内容",
|
|
|
|
|
- "reason": (
|
|
|
|
|
- f"根据规范要求,'{first_name}'下应包含'{second_name}'二级章节,"
|
|
|
|
|
- f"当前正文中未发现该章节内容"
|
|
|
|
|
- ),
|
|
|
|
|
- "first_seq": first_seq,
|
|
|
|
|
- "second_seq": second_seq,
|
|
|
|
|
- })
|
|
|
|
|
|
|
+ # 尝试使用LLM生成建议
|
|
|
|
|
+ llm_result = await self._generate_recommendation_with_llm(
|
|
|
|
|
+ level="二级",
|
|
|
|
|
+ first_code=cat1,
|
|
|
|
|
+ first_name=first_name,
|
|
|
|
|
+ second_code=cat2,
|
|
|
|
|
+ second_name=second_name
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ if llm_result:
|
|
|
|
|
+ recommendations.append({
|
|
|
|
|
+ "level": "二级",
|
|
|
|
|
+ "issue_point": llm_result.get("issue_point", f"【二级章节缺失】{first_name} > '{second_name}'整个章节不存在"),
|
|
|
|
|
+ "location": f"{first_name} > {second_name}",
|
|
|
|
|
+ "suggestion": llm_result.get("suggestion", f"请在'{first_name}'下添加'{second_name}'章节内容"),
|
|
|
|
|
+ "reason": llm_result.get("reason", f"根据规范要求,'{first_name}'下应包含'{second_name}'二级章节,当前正文中未发现该章节内容"),
|
|
|
|
|
+ "first_seq": first_seq,
|
|
|
|
|
+ "second_seq": second_seq,
|
|
|
|
|
+ })
|
|
|
|
|
+ else:
|
|
|
|
|
+ # 回退到简单拼接
|
|
|
|
|
+ recommendations.append({
|
|
|
|
|
+ "level": "二级",
|
|
|
|
|
+ "issue_point": (
|
|
|
|
|
+ f"【二级章节缺失】{first_name} > '{second_name}'整个章节不存在"
|
|
|
|
|
+ ),
|
|
|
|
|
+ "location": f"{first_name} > {second_name}",
|
|
|
|
|
+ "suggestion": f"请在'{first_name}'下添加'{second_name}'章节内容",
|
|
|
|
|
+ "reason": (
|
|
|
|
|
+ f"根据规范要求,'{first_name}'下应包含'{second_name}'二级章节,"
|
|
|
|
|
+ f"当前正文中未发现该章节内容"
|
|
|
|
|
+ ),
|
|
|
|
|
+ "first_seq": first_seq,
|
|
|
|
|
+ "second_seq": second_seq,
|
|
|
|
|
+ })
|
|
|
continue
|
|
continue
|
|
|
|
|
|
|
|
# ── 二级存在,检查三级缺失 ────────────────────────────
|
|
# ── 二级存在,检查三级缺失 ────────────────────────────
|
|
@@ -734,40 +993,82 @@ class LightweightCompletenessChecker:
|
|
|
if not missing_t_items:
|
|
if not missing_t_items:
|
|
|
continue
|
|
continue
|
|
|
|
|
|
|
|
- # 为每个缺失的三级项创建单独的 recommendation
|
|
|
|
|
- for t_item in missing_t_items:
|
|
|
|
|
- recommendations.append({
|
|
|
|
|
- "level": "三级",
|
|
|
|
|
- "issue_point": (
|
|
|
|
|
- f"【三级内容缺失】{first_name} > {second_name} > '{t_item.third_cn}'"
|
|
|
|
|
- ),
|
|
|
|
|
- "location": f"{first_name} > {second_name}",
|
|
|
|
|
- "suggestion": f"请补充'{second_name}'下的'{t_item.third_cn}'内容",
|
|
|
|
|
- "reason": f"'{second_name}'下缺失规范要求的'{t_item.third_cn}'内容要点",
|
|
|
|
|
- "first_seq": first_seq,
|
|
|
|
|
- "second_seq": second_seq,
|
|
|
|
|
- "third_seq": t_item.third_seq,
|
|
|
|
|
- })
|
|
|
|
|
|
|
+ # 尝试使用LLM批量生成三级缺失建议
|
|
|
|
|
+ llm_result = await self._generate_recommendation_with_llm(
|
|
|
|
|
+ level="三级",
|
|
|
|
|
+ first_code=cat1,
|
|
|
|
|
+ first_name=first_name,
|
|
|
|
|
+ second_code=cat2,
|
|
|
|
|
+ second_name=second_name,
|
|
|
|
|
+ tertiary_items=missing_t_items
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ if llm_result:
|
|
|
|
|
+ # LLM生成了整体建议,为每个缺失项添加相同建议(但位置不同)
|
|
|
|
|
+ for t_item in missing_t_items:
|
|
|
|
|
+ recommendations.append({
|
|
|
|
|
+ "level": "三级",
|
|
|
|
|
+ "issue_point": f"【三级内容缺失】{first_name} > {second_name} > '{t_item.third_cn}'",
|
|
|
|
|
+ "location": f"{first_name} > {second_name}",
|
|
|
|
|
+ "suggestion": llm_result.get("suggestion", f"请补充'{second_name}'下的'{t_item.third_cn}'内容"),
|
|
|
|
|
+ "reason": llm_result.get("reason", f"'{second_name}'下缺失规范要求的'{t_item.third_cn}'内容要点"),
|
|
|
|
|
+ "first_seq": first_seq,
|
|
|
|
|
+ "second_seq": second_seq,
|
|
|
|
|
+ "third_seq": t_item.third_seq,
|
|
|
|
|
+ })
|
|
|
|
|
+ else:
|
|
|
|
|
+ # 回退到简单拼接
|
|
|
|
|
+ for t_item in missing_t_items:
|
|
|
|
|
+ recommendations.append({
|
|
|
|
|
+ "level": "三级",
|
|
|
|
|
+ "issue_point": (
|
|
|
|
|
+ f"【三级内容缺失】{first_name} > {second_name} > '{t_item.third_cn}'"
|
|
|
|
|
+ ),
|
|
|
|
|
+ "location": f"{first_name} > {second_name}",
|
|
|
|
|
+ "suggestion": f"请补充'{second_name}'下的'{t_item.third_cn}'内容",
|
|
|
|
|
+ "reason": f"'{second_name}'下缺失规范要求的'{t_item.third_cn}'内容要点",
|
|
|
|
|
+ "first_seq": first_seq,
|
|
|
|
|
+ "second_seq": second_seq,
|
|
|
|
|
+ "third_seq": t_item.third_seq,
|
|
|
|
|
+ })
|
|
|
|
|
|
|
|
# ── 一致性审查:目录有列但正文无内容 ─────────────────────────────
|
|
# ── 一致性审查:目录有列但正文无内容 ─────────────────────────────
|
|
|
if outline_result:
|
|
if outline_result:
|
|
|
for e in outline_result.get("empty_sections", []):
|
|
for e in outline_result.get("empty_sections", []):
|
|
|
f_name = e.get("first_name", "")
|
|
f_name = e.get("first_name", "")
|
|
|
- # 优先用目录页原始标题,回退到标准名称
|
|
|
|
|
sec_title = e.get("outline_title") or e.get("secondary_name", "")
|
|
sec_title = e.get("outline_title") or e.get("secondary_name", "")
|
|
|
location = f"{f_name} > {sec_title}" if f_name else sec_title
|
|
location = f"{f_name} > {sec_title}" if f_name else sec_title
|
|
|
- recommendations.append({
|
|
|
|
|
- "level": "一致性",
|
|
|
|
|
- "issue_point": f"【目录正文不一致】'{location}'目录已列但正文无内容",
|
|
|
|
|
- "location": location,
|
|
|
|
|
- "suggestion": (
|
|
|
|
|
- f"请补充'{sec_title}'章节的正文内容,或从目录中移除该章节"
|
|
|
|
|
- ),
|
|
|
|
|
- "reason": (
|
|
|
|
|
- f"目录页列有'{sec_title}'章节,但正文中未发现对应内容,"
|
|
|
|
|
- f"存在目录与正文不一致的问题"
|
|
|
|
|
- ),
|
|
|
|
|
- })
|
|
|
|
|
|
|
+
|
|
|
|
|
+ # 尝试使用LLM生成建议
|
|
|
|
|
+ llm_result = await self._generate_recommendation_with_llm(
|
|
|
|
|
+ level="一致性",
|
|
|
|
|
+ first_code="",
|
|
|
|
|
+ first_name=f_name,
|
|
|
|
|
+ second_name=sec_title,
|
|
|
|
|
+ outline_title=sec_title
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ if llm_result:
|
|
|
|
|
+ recommendations.append({
|
|
|
|
|
+ "level": "一致性",
|
|
|
|
|
+ "issue_point": llm_result.get("issue_point", f"【目录正文不一致】'{location}'目录已列但正文无内容"),
|
|
|
|
|
+ "location": location,
|
|
|
|
|
+ "suggestion": llm_result.get("suggestion", f"请补充'{sec_title}'章节的正文内容,或从目录中移除该章节"),
|
|
|
|
|
+ "reason": llm_result.get("reason", f"目录页列有'{sec_title}'章节,但正文中未发现对应内容,存在目录与正文不一致的问题"),
|
|
|
|
|
+ })
|
|
|
|
|
+ else:
|
|
|
|
|
+ recommendations.append({
|
|
|
|
|
+ "level": "一致性",
|
|
|
|
|
+ "issue_point": f"【目录正文不一致】'{location}'目录已列但正文无内容",
|
|
|
|
|
+ "location": location,
|
|
|
|
|
+ "suggestion": (
|
|
|
|
|
+ f"请补充'{sec_title}'章节的正文内容,或从目录中移除该章节"
|
|
|
|
|
+ ),
|
|
|
|
|
+ "reason": (
|
|
|
|
|
+ f"目录页列有'{sec_title}'章节,但正文中未发现对应内容,"
|
|
|
|
|
+ f"存在目录与正文不一致的问题"
|
|
|
|
|
+ ),
|
|
|
|
|
+ })
|
|
|
|
|
|
|
|
if not recommendations:
|
|
if not recommendations:
|
|
|
recommendations.append({
|
|
recommendations.append({
|
|
@@ -785,16 +1086,20 @@ class LightweightCompletenessChecker:
|
|
|
async def check_completeness_lightweight(
|
|
async def check_completeness_lightweight(
|
|
|
chunks: List[Dict],
|
|
chunks: List[Dict],
|
|
|
outline: Optional[List[Dict]] = None,
|
|
outline: Optional[List[Dict]] = None,
|
|
|
- standard_csv_path: Optional[str] = None
|
|
|
|
|
|
|
+ standard_csv_path: Optional[str] = None,
|
|
|
|
|
+ model_client=None,
|
|
|
|
|
+ prompt_loader=None
|
|
|
) -> LightweightCompletenessResult:
|
|
) -> LightweightCompletenessResult:
|
|
|
"""
|
|
"""
|
|
|
轻量级完整性审查入口函数
|
|
轻量级完整性审查入口函数
|
|
|
-
|
|
|
|
|
|
|
+
|
|
|
Args:
|
|
Args:
|
|
|
chunks: 文档分块列表,每个chunk需包含tertiary_category_code
|
|
chunks: 文档分块列表,每个chunk需包含tertiary_category_code
|
|
|
outline: 目录结构(可选)
|
|
outline: 目录结构(可选)
|
|
|
standard_csv_path: 三级标准CSV文件路径,默认为doc_worker/config/StandardCategoryTable.csv
|
|
standard_csv_path: 三级标准CSV文件路径,默认为doc_worker/config/StandardCategoryTable.csv
|
|
|
-
|
|
|
|
|
|
|
+ model_client: 模型客户端(可选),用于生成智能建议
|
|
|
|
|
+ prompt_loader: 提示词加载器(可选)
|
|
|
|
|
+
|
|
|
Returns:
|
|
Returns:
|
|
|
LightweightCompletenessResult
|
|
LightweightCompletenessResult
|
|
|
"""
|
|
"""
|
|
@@ -802,8 +1107,12 @@ async def check_completeness_lightweight(
|
|
|
# 默认路径
|
|
# 默认路径
|
|
|
default_path = Path(__file__).parent.parent.parent.parent.parent / "doc_worker" / "config" / "StandardCategoryTable.csv"
|
|
default_path = Path(__file__).parent.parent.parent.parent.parent / "doc_worker" / "config" / "StandardCategoryTable.csv"
|
|
|
standard_csv_path = str(default_path)
|
|
standard_csv_path = str(default_path)
|
|
|
-
|
|
|
|
|
- checker = LightweightCompletenessChecker(standard_csv_path)
|
|
|
|
|
|
|
+
|
|
|
|
|
+ checker = LightweightCompletenessChecker(
|
|
|
|
|
+ standard_csv_path,
|
|
|
|
|
+ model_client=model_client,
|
|
|
|
|
+ prompt_loader=prompt_loader
|
|
|
|
|
+ )
|
|
|
return await checker.check(chunks=chunks, outline=outline)
|
|
return await checker.check(chunks=chunks, outline=outline)
|
|
|
|
|
|
|
|
|
|
|