|
|
@@ -1,52 +1,19 @@
|
|
|
"""
|
|
|
-轻量级完整性审查模块
|
|
|
+完整性审查模块(方案B:直接LLM解释法)
|
|
|
|
|
|
-特点:
|
|
|
-- 目录审查:二级粒度(检查二级章节是否齐全)
|
|
|
-- 完整性审查:三级粒度(基于分类结果,无LLM)
|
|
|
-- 大纲审查:二级粒度(检查二级章节一致性)
|
|
|
-
|
|
|
-完全依赖分类器输出的三级分类结果,无需LLM参与。
|
|
|
+直接将文档原文与标准要求送交LLM,逐条判断是否覆盖,
|
|
|
+并输出证据原文和判断理由。
|
|
|
"""
|
|
|
|
|
|
import pandas as pd
|
|
|
-import asyncio
|
|
|
import re
|
|
|
import json
|
|
|
-from typing import Dict, List, Optional, Set, Tuple, Any
|
|
|
+from typing import Dict, List, Optional, Tuple, Any
|
|
|
from dataclasses import dataclass, field
|
|
|
from collections import defaultdict
|
|
|
-from pathlib import Path
|
|
|
|
|
|
from foundation.observability.logger.loggering import review_logger as logger
|
|
|
-from ..doc_worker.classification.hierarchy_classifier import is_secondary_in_whitelist
|
|
|
-
|
|
|
-
|
|
|
-# 方案B:直接LLM完整性审查 System Prompt
|
|
|
-DIRECT_CHECK_SYSTEM_PROMPT = """你是专业的施工方案完整性审查专家。
|
|
|
-
|
|
|
-【任务】
|
|
|
-给你一组施工方案文档片段和一组标准要求(来自《公路水运危险性较大工程专项施工方案编制审查规程》JT/T 1495—2024),请逐条判断文档是否覆盖了每条标准要求。
|
|
|
-
|
|
|
-【输出格式】
|
|
|
-对每条标准要求,输出一个JSON对象:
|
|
|
-- standard_code: 标准分类代码(原样传入)
|
|
|
-- standard_name: 标准分类名称(原样传入)
|
|
|
-- is_covered: true/false(文档是否包含该内容)
|
|
|
-- evidence: 如果覆盖,引用文档中的关键原文(50-150字);如果未覆盖,写"无"
|
|
|
-- reason: 判断原因(30-80字,说明为什么认为覆盖或未覆盖)
|
|
|
-- confidence: 置信度 0.0-1.0
|
|
|
-
|
|
|
-【判断原则】
|
|
|
-1. 只要文档中有相关内容(即使不完全匹配),就算覆盖
|
|
|
-2. 如果文档提到了相关概念但不够具体,is_covered=true 但 confidence 较低(0.3-0.6)
|
|
|
-3. 如果文档完全没有相关内容,is_covered=false
|
|
|
-4. 注意区分:文档可能在不同片段提到同一标准的不同方面
|
|
|
-
|
|
|
-【输出要求】
|
|
|
-- 只输出JSON数组,不要任何解释文字
|
|
|
-- 数组中每条标准要求对应一个对象
|
|
|
-- 保持 standard_code 与输入一致"""
|
|
|
+from .utils.prompt_loader import prompt_loader
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
@@ -75,18 +42,6 @@ class SecondaryItem:
|
|
|
second_seq: int = 0
|
|
|
|
|
|
|
|
|
-@dataclass
|
|
|
-class DirectCheckItem:
|
|
|
- """方案B:单条标准要求的直接LLM检查结果"""
|
|
|
- standard_code: str # 三级分类代码
|
|
|
- standard_name: str # 三级分类名称
|
|
|
- third_focus: str # 标准要求描述
|
|
|
- is_covered: bool # 是否覆盖
|
|
|
- evidence: str # LLM给出的证据(文档原文引用)
|
|
|
- reason: str # LLM给出的判断原因
|
|
|
- confidence: float # LLM置信度 0-1
|
|
|
-
|
|
|
-
|
|
|
@dataclass
|
|
|
class LightweightCompletenessResult:
|
|
|
"""轻量级完整性审查结果"""
|
|
|
@@ -230,41 +185,28 @@ class TertiarySpecLoader:
|
|
|
class LightweightCompletenessChecker:
|
|
|
"""轻量级完整性检查器"""
|
|
|
|
|
|
- def __init__(self, standard_csv_path: str, model_client=None, prompt_loader=None):
|
|
|
+ def __init__(self, standard_csv_path: str, model_client=None):
|
|
|
"""
|
|
|
初始化检查器
|
|
|
|
|
|
Args:
|
|
|
standard_csv_path: StandardCategoryTable.csv 文件路径
|
|
|
- model_client: 模型客户端(可选),用于生成智能建议
|
|
|
- prompt_loader: 提示词加载器(可选)
|
|
|
+ model_client: 模型客户端(可选)
|
|
|
"""
|
|
|
self.spec_loader = TertiarySpecLoader(standard_csv_path)
|
|
|
self.tertiary_specs = self.spec_loader.get_tertiary_items()
|
|
|
self.secondary_specs = self.spec_loader.get_secondary_items()
|
|
|
self.secondary_names = self.spec_loader.get_secondary_names()
|
|
|
|
|
|
- # 大模型客户端和提示词加载器(用于生成智能建议)
|
|
|
self.model_client = model_client
|
|
|
- self.prompt_loader = prompt_loader
|
|
|
|
|
|
- # 如果没有提供model_client,尝试从foundation导入
|
|
|
if self.model_client is None:
|
|
|
try:
|
|
|
from foundation.ai.agent.generate.model_generate import generate_model_client
|
|
|
self.model_client = generate_model_client
|
|
|
except ImportError:
|
|
|
- logger.warning("无法导入generate_model_client,建议生成功能将使用简单拼接模式")
|
|
|
+ logger.warning("无法导入generate_model_client")
|
|
|
self.model_client = None
|
|
|
-
|
|
|
- # 如果没有提供prompt_loader,尝试从当前模块导入
|
|
|
- if self.prompt_loader is None:
|
|
|
- try:
|
|
|
- from .utils.prompt_loader import prompt_loader
|
|
|
- self.prompt_loader = prompt_loader
|
|
|
- except ImportError:
|
|
|
- logger.warning("无法导入prompt_loader,建议生成功能将使用简单拼接模式")
|
|
|
- self.prompt_loader = None
|
|
|
|
|
|
def _normalize_chapter_code(self, code: str) -> str:
|
|
|
"""将章节分类码大小写归一化为与CSV一致(如 'management' -> 'management')"""
|
|
|
@@ -279,13 +221,13 @@ class LightweightCompletenessChecker:
|
|
|
# 方案B:直接LLM完整性检查方法
|
|
|
# ═══════════════════════════════════════════════════════════════
|
|
|
|
|
|
- def _build_direct_check_user_prompt(
|
|
|
+ def _build_direct_check_prompt_kwargs(
|
|
|
self,
|
|
|
chunks: List[Dict[str, Any]],
|
|
|
standard_items: List[Dict[str, Any]],
|
|
|
chapter_name: str = ""
|
|
|
- ) -> str:
|
|
|
- """构建方案B的用户Prompt"""
|
|
|
+ ) -> dict:
|
|
|
+ """构建方案B用户提示词变量"""
|
|
|
content_parts = []
|
|
|
for i, chunk in enumerate(chunks):
|
|
|
label = chunk.get("section_label", "")
|
|
|
@@ -302,18 +244,12 @@ class LightweightCompletenessChecker:
|
|
|
f" — {item['third_focus']}\n"
|
|
|
)
|
|
|
|
|
|
- prompt = f"""请审查以下施工方案文档是否覆盖了标准要求。
|
|
|
-
|
|
|
-【文档章节】{chapter_name}
|
|
|
-
|
|
|
-【文档内容】
|
|
|
-{document_text[:8000]}
|
|
|
-
|
|
|
-【标准要求(共{len(standard_items)}条)】
|
|
|
-{standards_text}
|
|
|
-
|
|
|
-请逐条判断文档是否覆盖了上述标准要求,输出JSON数组。"""
|
|
|
- return prompt
|
|
|
+ return {
|
|
|
+ "chapter_name": chapter_name,
|
|
|
+ "document_text": document_text[:8000],
|
|
|
+ "total_standards": str(len(standard_items)),
|
|
|
+ "standards_text": standards_text,
|
|
|
+ }
|
|
|
|
|
|
def _try_parse_json(self, text: str):
|
|
|
"""尝试直接解析JSON"""
|
|
|
@@ -520,12 +456,14 @@ class LightweightCompletenessChecker:
|
|
|
if not standard_items:
|
|
|
return [], 0
|
|
|
|
|
|
- user_prompt = self._build_direct_check_user_prompt(
|
|
|
+ prompt_kwargs = self._build_direct_check_prompt_kwargs(
|
|
|
chunks, standard_items, chapter_name
|
|
|
)
|
|
|
|
|
|
task_prompt_info = {
|
|
|
- "task_prompt": f"{DIRECT_CHECK_SYSTEM_PROMPT}\n\n{user_prompt}",
|
|
|
+ "task_prompt": prompt_loader.get_prompt_template(
|
|
|
+ "completeness", "completeness_direct_check", **prompt_kwargs
|
|
|
+ ),
|
|
|
"task_name": "completeness_direct_check"
|
|
|
}
|
|
|
|
|
|
@@ -553,7 +491,6 @@ class LightweightCompletenessChecker:
|
|
|
return items, attempt + 1
|
|
|
|
|
|
logger.warning(f"[完整性审查] 第{attempt+1}次尝试解析为空,准备重试")
|
|
|
- # 重试时加修复提示
|
|
|
if attempt < max_retries:
|
|
|
codes_str = ", ".join(
|
|
|
s["third_code"] for s in standard_items[:10]
|
|
|
@@ -571,7 +508,6 @@ class LightweightCompletenessChecker:
|
|
|
except Exception as e:
|
|
|
logger.error(f"[完整性审查] LLM调用失败 (第{attempt+1}次): {e}")
|
|
|
|
|
|
- # 全部失败,返回空
|
|
|
logger.error("[完整性审查] 所有LLM尝试均失败")
|
|
|
return [], max_retries + 1
|
|
|
|
|
|
@@ -640,7 +576,6 @@ class LightweightCompletenessChecker:
|
|
|
})
|
|
|
|
|
|
# 按二级分组统计
|
|
|
- from collections import defaultdict
|
|
|
secondary_stats = defaultdict(lambda: {"total": 0, "present": 0, "missing": 0})
|
|
|
for item in direct_items:
|
|
|
key = (item.get("first_code", ""), item.get("secondary_code", ""))
|
|
|
@@ -757,14 +692,6 @@ class LightweightCompletenessChecker:
|
|
|
|
|
|
return recommendations
|
|
|
|
|
|
- def _check_secondary_whitelist(self, cat1: str, second_name: str) -> bool:
|
|
|
- """检查二级章节是否在白名单中(同步版本,用于同步上下文)"""
|
|
|
- try:
|
|
|
- from ..doc_worker.classification.hierarchy_classifier import is_secondary_in_whitelist
|
|
|
- return is_secondary_in_whitelist(cat1, second_name)
|
|
|
- except ImportError:
|
|
|
- return False
|
|
|
-
|
|
|
async def check(
|
|
|
self,
|
|
|
chunks: List[Dict],
|
|
|
@@ -844,248 +771,10 @@ class LightweightCompletenessChecker:
|
|
|
"""检查一级分类代码是否有效(在标准分类中)"""
|
|
|
if not code:
|
|
|
return False
|
|
|
- # 排除已知的非章节键
|
|
|
if code in ("quality_check", "catalog", "metadata"):
|
|
|
return False
|
|
|
- # 检查是否在标准一级分类中
|
|
|
return code in self.spec_loader.first_names
|
|
|
|
|
|
- def _extract_first_from_chunks(self, chunks: List[Dict]) -> Set[str]:
|
|
|
- """
|
|
|
- 从chunks独立提取实际存在的一级分类(不依赖二级)。
|
|
|
-
|
|
|
- 解决场景:当一级存在但所有二级被过滤(如标记为non_standard)时,
|
|
|
- 避免误报"一级章节缺失"。
|
|
|
- """
|
|
|
- actual_first = set()
|
|
|
- for chunk in chunks:
|
|
|
- # 支持 metadata 嵌套格式和直接字段格式
|
|
|
- metadata = chunk.get("metadata", {})
|
|
|
- cat1 = (metadata.get("chapter_classification") or
|
|
|
- chunk.get("chapter_classification") or
|
|
|
- chunk.get("first_code"))
|
|
|
- # 归一化并验证
|
|
|
- cat1 = self._normalize_chapter_code(cat1)
|
|
|
- if self._is_valid_first_code(cat1):
|
|
|
- actual_first.add(cat1)
|
|
|
- return actual_first
|
|
|
-
|
|
|
- def _extract_secondary_from_chunks(self, chunks: List[Dict]) -> Set[Tuple[str, str]]:
|
|
|
- """从chunks提取实际存在的二级分类(支持 metadata 嵌套格式),跳过非标准项"""
|
|
|
- actual = set()
|
|
|
- for chunk in chunks:
|
|
|
- # 支持 metadata 嵌套格式和直接字段格式
|
|
|
- metadata = chunk.get("metadata", {})
|
|
|
- cat1 = (metadata.get("chapter_classification") or
|
|
|
- chunk.get("chapter_classification") or
|
|
|
- chunk.get("first_code"))
|
|
|
- cat2 = (metadata.get("secondary_category_code") or
|
|
|
- chunk.get("secondary_category_code") or
|
|
|
- chunk.get("second_code"))
|
|
|
- # 跳过非标准项
|
|
|
- if cat2 == "non_standard":
|
|
|
- continue
|
|
|
- # 跳过无效的一级分类代码
|
|
|
- if not self._is_valid_first_code(cat1):
|
|
|
- continue
|
|
|
- if cat1 and cat2:
|
|
|
- actual.add((cat1, cat2))
|
|
|
- return actual
|
|
|
-
|
|
|
- def _extract_from_outline(
|
|
|
- self, outline: List[Dict]
|
|
|
- ) -> Tuple[Set[str], Dict[Tuple[str, str], str]]:
|
|
|
- """
|
|
|
- 从目录页提取一级分类集合与二级分类映射(含原始标题)
|
|
|
-
|
|
|
- Returns:
|
|
|
- outline_first: {first_code, ...}
|
|
|
- outline_secondary: {(first_code, second_code): outline_title, ...}
|
|
|
- """
|
|
|
- outline_first: Set[str] = set()
|
|
|
- outline_secondary: Dict[Tuple[str, str], str] = {}
|
|
|
-
|
|
|
- if not isinstance(outline, list):
|
|
|
- return outline_first, outline_secondary
|
|
|
-
|
|
|
- for item in outline:
|
|
|
- if not isinstance(item, dict):
|
|
|
- continue
|
|
|
- cat1 = item.get("chapter_classification")
|
|
|
- cat2 = item.get("secondary_category_code")
|
|
|
- title = item.get("title", "")
|
|
|
- if cat1:
|
|
|
- outline_first.add(cat1)
|
|
|
- if cat1 and cat2 and (cat1, cat2) not in outline_secondary:
|
|
|
- outline_secondary[(cat1, cat2)] = title
|
|
|
-
|
|
|
- return outline_first, outline_secondary
|
|
|
-
|
|
|
- def _check_catalogue(self, outline_first: Set[str],
|
|
|
- outline_secondary: Dict[Tuple[str, str], str],
|
|
|
- chapter_classification: Optional[str] = None) -> Dict[str, Any]:
|
|
|
- """
|
|
|
- 目录结构审查(一级 + 二级粒度)
|
|
|
- 检查目录页是否列出了标准要求的所有一级和二级章节
|
|
|
-
|
|
|
- Args:
|
|
|
- outline_first: 从目录页提取的一级分类集合
|
|
|
- outline_secondary: 从目录页提取的二级分类映射 {(cat1,cat2): title}
|
|
|
- chapter_classification: 若提供则只检查该一级章节范围
|
|
|
- """
|
|
|
- outline_second_keys = set(outline_secondary.keys())
|
|
|
-
|
|
|
- # 确定检查范围
|
|
|
- if chapter_classification:
|
|
|
- required_first = (
|
|
|
- {chapter_classification}
|
|
|
- if any(k[0] == chapter_classification for k in self.secondary_specs)
|
|
|
- else set()
|
|
|
- )
|
|
|
- required_second = {
|
|
|
- (c1, c2) for (c1, c2) in self.secondary_specs
|
|
|
- if c1 == chapter_classification
|
|
|
- }
|
|
|
- actual_first = {c for c in outline_first if c == chapter_classification}
|
|
|
- actual_second_keys = {
|
|
|
- (c1, c2) for (c1, c2) in outline_second_keys if c1 == chapter_classification
|
|
|
- }
|
|
|
- else:
|
|
|
- required_first = {k[0] for k in self.secondary_specs}
|
|
|
- required_second = set(self.secondary_specs.keys())
|
|
|
- actual_first = outline_first
|
|
|
- actual_second_keys = outline_second_keys
|
|
|
-
|
|
|
- # 一级差异
|
|
|
- missing_first = required_first - actual_first
|
|
|
- extra_first = actual_first - required_first
|
|
|
-
|
|
|
- # 二级差异
|
|
|
- missing_second = required_second - actual_second_keys
|
|
|
- extra_second = actual_second_keys - required_second
|
|
|
-
|
|
|
- # 一级缺失详情
|
|
|
- missing_first_details = []
|
|
|
- for c in sorted(missing_first):
|
|
|
- # 从任意该一级下的二级获取 first_seq
|
|
|
- first_seq = 0
|
|
|
- for (fc, sc), item in self.secondary_specs.items():
|
|
|
- if fc == c:
|
|
|
- first_seq = item.first_seq
|
|
|
- break
|
|
|
- missing_first_details.append({
|
|
|
- "first_code": c,
|
|
|
- "first_name": self.spec_loader.first_names.get(c, c),
|
|
|
- "first_seq": first_seq
|
|
|
- })
|
|
|
-
|
|
|
- # 二级缺失详情
|
|
|
- missing_second_details = []
|
|
|
- for cat1, cat2 in sorted(missing_second):
|
|
|
- item = self.secondary_specs.get((cat1, cat2))
|
|
|
- missing_second_details.append({
|
|
|
- "first_code": cat1,
|
|
|
- "first_name": item.first_cn if item else self.spec_loader.first_names.get(cat1, cat1),
|
|
|
- "first_seq": item.first_seq if item else 0,
|
|
|
- "secondary_code": cat2,
|
|
|
- "secondary_name": item.second_cn if item else "未知",
|
|
|
- "second_seq": item.second_seq if item else 0
|
|
|
- })
|
|
|
-
|
|
|
- # 二级多余详情(目录有但标准无)
|
|
|
- extra_second_details = []
|
|
|
- for cat1, cat2 in sorted(extra_second):
|
|
|
- item = self.secondary_specs.get((cat1, cat2))
|
|
|
- extra_second_details.append({
|
|
|
- "first_code": cat1,
|
|
|
- "first_name": self.spec_loader.first_names.get(cat1, cat1),
|
|
|
- "first_seq": item.first_seq if item else 0,
|
|
|
- "secondary_code": cat2,
|
|
|
- "secondary_name": item.second_cn if item else "未知",
|
|
|
- "second_seq": item.second_seq if item else 0,
|
|
|
- "outline_title": outline_secondary.get((cat1, cat2), "")
|
|
|
- })
|
|
|
-
|
|
|
- present_second = len(required_second & actual_second_keys)
|
|
|
- second_rate = present_second / len(required_second) * 100 if required_second else 0
|
|
|
-
|
|
|
- return {
|
|
|
- "level": "primary_and_secondary",
|
|
|
- "is_complete": len(missing_first) == 0 and len(missing_second) == 0,
|
|
|
- "first_level": {
|
|
|
- "total_required": len(required_first),
|
|
|
- "actual_present": len(actual_first & required_first),
|
|
|
- "missing_count": len(missing_first),
|
|
|
- "extra_count": len(extra_first),
|
|
|
- "missing": missing_first_details,
|
|
|
- "extra": [
|
|
|
- {"first_code": c, "first_name": self.spec_loader.first_names.get(c, c)}
|
|
|
- for c in sorted(extra_first)
|
|
|
- ]
|
|
|
- },
|
|
|
- "second_level": {
|
|
|
- "total_required": len(required_second),
|
|
|
- "actual_present": present_second,
|
|
|
- "missing_count": len(missing_second),
|
|
|
- "extra_count": len(extra_second),
|
|
|
- "completeness_rate": f"{second_rate:.1f}%",
|
|
|
- "missing": missing_second_details,
|
|
|
- "extra": extra_second_details
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- def _check_outline(
|
|
|
- self,
|
|
|
- actual_secondary: Set[Tuple[str, str]],
|
|
|
- outline_secondary: Dict[Tuple[str, str], str]
|
|
|
- ) -> Dict[str, Any]:
|
|
|
- """
|
|
|
- 一致性审查(二级粒度)
|
|
|
- 对比目录页标题与正文实际内容的二级分类是否吻合
|
|
|
-
|
|
|
- Args:
|
|
|
- actual_secondary: 从正文 chunks 提取的二级分类集合
|
|
|
- outline_secondary: 从目录页提取的二级分类映射 {(cat1,cat2): outline_title}
|
|
|
- """
|
|
|
- outline_keys = set(outline_secondary.keys())
|
|
|
-
|
|
|
- # 空章节:目录页列了,但正文无对应内容
|
|
|
- empty_sections = []
|
|
|
- for (cat1, cat2) in sorted(outline_keys - actual_secondary):
|
|
|
- item = self.secondary_specs.get((cat1, cat2))
|
|
|
- empty_sections.append({
|
|
|
- "first_code": cat1,
|
|
|
- "first_name": item.first_cn if item else self.spec_loader.first_names.get(cat1, cat1),
|
|
|
- "secondary_code": cat2,
|
|
|
- "secondary_name": item.second_cn if item else "未知",
|
|
|
- "outline_title": outline_secondary.get((cat1, cat2), "") # 目录页原始标题
|
|
|
- })
|
|
|
-
|
|
|
- # 未归类内容:正文有内容,但目录页未列出
|
|
|
- unclassified_content = []
|
|
|
- for (cat1, cat2) in sorted(actual_secondary - outline_keys):
|
|
|
- item = self.secondary_specs.get((cat1, cat2))
|
|
|
- unclassified_content.append({
|
|
|
- "first_code": cat1,
|
|
|
- "first_name": item.first_cn if item else self.spec_loader.first_names.get(cat1, cat1),
|
|
|
- "secondary_code": cat2,
|
|
|
- "secondary_name": item.second_cn if item else "未知"
|
|
|
- })
|
|
|
-
|
|
|
- matched = outline_keys & actual_secondary
|
|
|
- match_rate = len(matched) / len(outline_keys) * 100 if outline_keys else 0
|
|
|
-
|
|
|
- return {
|
|
|
- "level": "secondary",
|
|
|
- "is_consistent": len(empty_sections) == 0 and len(unclassified_content) == 0,
|
|
|
- "outline_secondary_count": len(outline_keys),
|
|
|
- "content_secondary_count": len(actual_secondary),
|
|
|
- "matched_count": len(matched),
|
|
|
- "match_rate": f"{match_rate:.1f}%",
|
|
|
- "empty_sections": empty_sections, # 目录有,正文无
|
|
|
- "unclassified_content": unclassified_content # 正文有,目录无
|
|
|
- }
|
|
|
-
|
|
|
def _calc_overall_status(self, tertiary_result: Dict) -> str:
|
|
|
"""计算总体状态"""
|
|
|
rate_str = tertiary_result.get("completeness_rate", "0%").rstrip("%")
|
|
|
@@ -1149,52 +838,6 @@ class LightweightCompletenessChecker:
|
|
|
return parts[-1].strip() # 返回二级小节名
|
|
|
return section_label.strip()
|
|
|
|
|
|
- def _get_actual_first_name(self, label_map: Dict[Tuple[str, str], str],
|
|
|
- first_code: str) -> str:
|
|
|
- """
|
|
|
- 获取实际一级章节名(从任意一个该一级下的 section_label 提取)
|
|
|
- """
|
|
|
- for (fc, sc), label in label_map.items():
|
|
|
- if fc == first_code and "->" in label:
|
|
|
- return label.split("->")[0].strip()
|
|
|
- # 回退到标准名称
|
|
|
- return self.spec_loader.first_names.get(first_code, first_code)
|
|
|
-
|
|
|
-
|
|
|
-# 便捷函数
|
|
|
-async def check_completeness_lightweight(
|
|
|
- chunks: List[Dict],
|
|
|
- outline: Optional[List[Dict]] = None,
|
|
|
- standard_csv_path: Optional[str] = None,
|
|
|
- model_client=None,
|
|
|
- prompt_loader=None
|
|
|
-) -> LightweightCompletenessResult:
|
|
|
- """
|
|
|
- 轻量级完整性审查入口函数
|
|
|
-
|
|
|
- Args:
|
|
|
- chunks: 文档分块列表,每个chunk需包含tertiary_category_code
|
|
|
- outline: 目录结构(可选)
|
|
|
- standard_csv_path: 三级标准CSV文件路径,默认为doc_worker/config/StandardCategoryTable.csv
|
|
|
- model_client: 模型客户端(可选),用于生成智能建议
|
|
|
- prompt_loader: 提示词加载器(可选)
|
|
|
-
|
|
|
- Returns:
|
|
|
- LightweightCompletenessResult
|
|
|
- """
|
|
|
- if standard_csv_path is None:
|
|
|
- # 默认路径
|
|
|
- default_path = Path(__file__).parent.parent.parent.parent.parent / "doc_worker" / "config" / "StandardCategoryTable.csv"
|
|
|
- standard_csv_path = str(default_path)
|
|
|
-
|
|
|
- checker = LightweightCompletenessChecker(
|
|
|
- standard_csv_path,
|
|
|
- model_client=model_client,
|
|
|
- prompt_loader=prompt_loader
|
|
|
- )
|
|
|
- return await checker.check(chunks=chunks, outline=outline)
|
|
|
-
|
|
|
-
|
|
|
def result_to_dict(result: LightweightCompletenessResult) -> Dict[str, Any]:
|
|
|
"""将结果对象转换为字典"""
|
|
|
return {
|