|
|
@@ -482,7 +482,8 @@ JSON输出:"""
|
|
|
recommendations = await self._generate_recommendations(
|
|
|
tertiary_result, catalogue_result, outline_result,
|
|
|
actual_first, actual_secondary, actual_tertiary,
|
|
|
- chapter_classification
|
|
|
+ chapter_classification,
|
|
|
+ chunks # 传入 chunks 用于获取实际章节名
|
|
|
)
|
|
|
|
|
|
return LightweightCompletenessResult(
|
|
|
@@ -856,6 +857,62 @@ JSON输出:"""
|
|
|
else:
|
|
|
return "incomplete"
|
|
|
|
|
|
+ def _build_section_label_map(self, chunks: List[Dict]) -> Dict[Tuple[str, str], str]:
|
|
|
+ """
|
|
|
+ 从 chunks 构建 (first_code, second_code) -> section_label 映射
|
|
|
+ section_label 格式:"第一章编制依据->一、法律法规"
|
|
|
+ """
|
|
|
+ label_map: Dict[Tuple[str, str], str] = {}
|
|
|
+ for chunk in chunks:
|
|
|
+ metadata = chunk.get("metadata", {})
|
|
|
+ cat1 = (metadata.get("chapter_classification") or
|
|
|
+ chunk.get("chapter_classification") or
|
|
|
+ chunk.get("first_code"))
|
|
|
+ cat2 = (metadata.get("secondary_category_code") or
|
|
|
+ chunk.get("secondary_category_code") or
|
|
|
+ chunk.get("second_code"))
|
|
|
+ section_label = (metadata.get("section_label") or
|
|
|
+ chunk.get("section_label") or
|
|
|
+ "")
|
|
|
+ if cat1 and cat2 and section_label:
|
|
|
+ label_map[(cat1, cat2)] = section_label
|
|
|
+ return label_map
|
|
|
+
|
|
|
+ def _get_actual_chapter_name(self, label_map: Dict[Tuple[str, str], str],
|
|
|
+ first_code: str, second_code: str = None) -> str:
|
|
|
+ """
|
|
|
+ 获取实际章节名
|
|
|
+ - 一级缺失:返回 first_name(保持原逻辑)
|
|
|
+ - 二级缺失:返回一级章节名(section_label.split('->')[0])
|
|
|
+ - 三级缺失:返回二级小节名(section_label.split('->')[-1])
|
|
|
+ """
|
|
|
+ if not second_code:
|
|
|
+ return self.spec_loader.first_names.get(first_code, first_code)
|
|
|
+
|
|
|
+ section_label = label_map.get((first_code, second_code), "")
|
|
|
+ if not section_label:
|
|
|
+ # 回退到标准名称
|
|
|
+ sec_item = self.secondary_specs.get((first_code, second_code))
|
|
|
+ if sec_item:
|
|
|
+ return f"{sec_item.first_cn} > {sec_item.second_cn}"
|
|
|
+ return f"{first_code} > {second_code}"
|
|
|
+
|
|
|
+ parts = section_label.split("->")
|
|
|
+ if len(parts) >= 2:
|
|
|
+ return parts[-1].strip() # 返回二级小节名
|
|
|
+ return section_label.strip()
|
|
|
+
|
|
|
+ def _get_actual_first_name(self, label_map: Dict[Tuple[str, str], str],
|
|
|
+ first_code: str) -> str:
|
|
|
+ """
|
|
|
+ 获取实际一级章节名(从任意一个该一级下的 section_label 提取)
|
|
|
+ """
|
|
|
+ for (fc, sc), label in label_map.items():
|
|
|
+ if fc == first_code and "->" in label:
|
|
|
+ return label.split("->")[0].strip()
|
|
|
+ # 回退到标准名称
|
|
|
+ return self.spec_loader.first_names.get(first_code, first_code)
|
|
|
+
|
|
|
async def _generate_recommendations(
|
|
|
self,
|
|
|
tertiary_result: Dict,
|
|
|
@@ -864,7 +921,8 @@ JSON输出:"""
|
|
|
actual_first: Set[str],
|
|
|
actual_secondary: Set[Tuple[str, str]],
|
|
|
actual_tertiary: Set[Tuple[str, str, str]],
|
|
|
- chapter_classification: Optional[str] = None
|
|
|
+ chapter_classification: Optional[str] = None,
|
|
|
+ chunks: List[Dict] = None
|
|
|
) -> List[Dict[str, Any]]:
|
|
|
"""
|
|
|
生成结构化分级改进建议。
|
|
|
@@ -872,12 +930,15 @@ JSON输出:"""
|
|
|
每条建议包含:
|
|
|
level : 缺失级别(一级 / 二级 / 三级 / 一致性)
|
|
|
issue_point : 问题摘要(含级别标识)
|
|
|
- location : 问题定位路径
|
|
|
+ location : 问题定位路径(使用实际章节名)
|
|
|
suggestion : 补充建议(使用LLM生成)
|
|
|
reason : 规范依据说明(使用LLM生成)
|
|
|
"""
|
|
|
recommendations: List[Dict[str, Any]] = []
|
|
|
|
|
|
+ # 构建 section_label 映射,用于获取实际章节名
|
|
|
+ label_map = self._build_section_label_map(chunks or [])
|
|
|
+
|
|
|
# 确定需要检查的一级分类范围
|
|
|
if chapter_classification:
|
|
|
required_first = (
|
|
|
@@ -939,15 +1000,18 @@ JSON输出:"""
|
|
|
|
|
|
# ── 二级缺失 ──────────────────────────────────────────
|
|
|
if (cat1, cat2) not in actual_secondary:
|
|
|
+ # 获取实际一级章节名
|
|
|
+ actual_first_name = self._get_actual_first_name(label_map, cat1)
|
|
|
+
|
|
|
# issue_point 和 reason 使用简单拼接
|
|
|
- issue_point = f"【二级章节缺失】{first_name} > '{second_name}'整个章节不存在"
|
|
|
- reason = f"依据《桥梁公司危险性较大工程管理实施细则(2025版)》规定,'{first_name}'下应包含'{second_name}'二级章节,当前正文中未发现该章节内容"
|
|
|
+ issue_point = f"【二级章节缺失】{actual_first_name} > '{second_name}'整个章节不存在"
|
|
|
+ reason = f"依据《桥梁公司危险性较大工程管理实施细则(2025版)》规定,'{actual_first_name}'下应包含'{second_name}'二级章节,当前正文中未发现该章节内容"
|
|
|
|
|
|
# 尝试使用LLM生成 suggestion
|
|
|
llm_result = await self._generate_recommendation_with_llm(
|
|
|
level="二级",
|
|
|
first_code=cat1,
|
|
|
- first_name=first_name,
|
|
|
+ first_name=actual_first_name,
|
|
|
second_code=cat2,
|
|
|
second_name=second_name,
|
|
|
first_seq=first_seq,
|
|
|
@@ -958,12 +1022,12 @@ JSON输出:"""
|
|
|
suggestion = llm_result.get("suggestion")
|
|
|
else:
|
|
|
# 回退到简单拼接
|
|
|
- suggestion = f"请在'{first_name}'下添加'{second_name}'章节内容"
|
|
|
+ suggestion = f"请在'{actual_first_name}'下添加'{second_name}'章节内容"
|
|
|
|
|
|
recommendations.append({
|
|
|
"level": "二级",
|
|
|
"issue_point": issue_point,
|
|
|
- "location": f"{first_name} > {second_name}",
|
|
|
+ "location": actual_first_name, # 二级缺失定位到一级章节
|
|
|
"suggestion": suggestion,
|
|
|
"reason": reason,
|
|
|
"first_seq": first_seq,
|
|
|
@@ -986,6 +1050,9 @@ JSON输出:"""
|
|
|
if not missing_t_items:
|
|
|
continue
|
|
|
|
|
|
+ # 获取实际二级小节名
|
|
|
+ actual_second_name = self._get_actual_chapter_name(label_map, cat1, cat2)
|
|
|
+
|
|
|
# issue_point 和 reason 使用简单拼接(三级缺失)
|
|
|
# 尝试使用LLM批量生成 suggestion
|
|
|
llm_result = await self._generate_recommendation_with_llm(
|
|
|
@@ -1012,7 +1079,7 @@ JSON输出:"""
|
|
|
recommendations.append({
|
|
|
"level": "三级",
|
|
|
"issue_point": f"【三级内容缺失】{first_name} > {second_name} > '{t_item.third_cn}'",
|
|
|
- "location": f"{first_name} > {second_name}",
|
|
|
+ "location": actual_second_name, # 三级缺失定位到二级小节
|
|
|
"suggestion": suggestion,
|
|
|
"reason": f"依据《桥梁公司危险性较大工程管理实施细则(2025版)》规定,'{second_name}'下应包含'{t_item.third_cn}'内容要点",
|
|
|
"first_seq": first_seq,
|