|
@@ -1,484 +0,0 @@
|
|
|
-"""
|
|
|
|
|
-目录完整性审查模块
|
|
|
|
|
-
|
|
|
|
|
-使用LLM对比实际目录(OCR提取)和标准目录,找出缺失项。
|
|
|
|
|
-"""
|
|
|
|
|
-
|
|
|
|
|
-import json
|
|
|
|
|
-import re
|
|
|
|
|
-from pathlib import Path
|
|
|
|
|
-from typing import Dict, Any, List, Optional
|
|
|
|
|
-
|
|
|
|
|
-import yaml
|
|
|
|
|
-
|
|
|
|
|
-from foundation.observability.logger.loggering import review_logger as logger
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-class CatalogReviewer:
|
|
|
|
|
- """目录审查器"""
|
|
|
|
|
-
|
|
|
|
|
- # 默认标准目录模板路径
|
|
|
|
|
- DEFAULT_TEMPLATE_PATH = Path(__file__).parent.parent / 'doc_worker' / 'config' / 'StandardCatalogTemplate.yaml'
|
|
|
|
|
-
|
|
|
|
|
- # JSON 格式示例(避免 f-string 嵌套问题,单独定义)
|
|
|
|
|
- _JSON_EXAMPLE_TEMPLATE = '''{
|
|
|
|
|
- "details": {
|
|
|
|
|
- "name": "outline_check",
|
|
|
|
|
- "response": [
|
|
|
|
|
- {
|
|
|
|
|
- "check_item": "completeness_check",
|
|
|
|
|
- "chapter_code": "catalogue",
|
|
|
|
|
- "check_item_code": "catalogue_completeness_check",
|
|
|
|
|
- "check_result": {
|
|
|
|
|
- "issue_point": "【一级缺失】第四章 施工工艺技术",
|
|
|
|
|
- "location": "目录页",
|
|
|
|
|
- "page": 3,
|
|
|
|
|
- "suggestion": "建议补充'第四章 施工工艺技术'章节",
|
|
|
|
|
- "reason": "目录页缺少该章节",
|
|
|
|
|
- "risk_level": "高风险"
|
|
|
|
|
- },
|
|
|
|
|
- "exist_issue": true,
|
|
|
|
|
- "risk_info": {"risk_level": "high"}
|
|
|
|
|
- },
|
|
|
|
|
- {
|
|
|
|
|
- "check_item": "completeness_check",
|
|
|
|
|
- "chapter_code": "catalogue",
|
|
|
|
|
- "check_item_code": "catalogue_completeness_check",
|
|
|
|
|
- "check_result": {
|
|
|
|
|
- "issue_point": "【一级缺失】第十章 其他资料",
|
|
|
|
|
- "location": "目录页",
|
|
|
|
|
- "page": 3,
|
|
|
|
|
- "suggestion": "建议补充'第十章 其他资料'章节",
|
|
|
|
|
- "reason": "目录页缺少该章节",
|
|
|
|
|
- "risk_level": "高风险"
|
|
|
|
|
- },
|
|
|
|
|
- "exist_issue": true,
|
|
|
|
|
- "risk_info": {"risk_level": "high"}
|
|
|
|
|
- },
|
|
|
|
|
- {
|
|
|
|
|
- "check_item": "completeness_check",
|
|
|
|
|
- "chapter_code": "catalogue",
|
|
|
|
|
- "check_item_code": "catalogue_completeness_check",
|
|
|
|
|
- "check_result": {
|
|
|
|
|
- "issue_point": "【二级缺失】第一章 编制依据 - 四、编制原则",
|
|
|
|
|
- "location": "第一章",
|
|
|
|
|
- "page": 3,
|
|
|
|
|
- "suggestion": "建议补充'四、编制原则'",
|
|
|
|
|
- "reason": "第一章缺少该二级目录",
|
|
|
|
|
- "risk_level": "中风险"
|
|
|
|
|
- },
|
|
|
|
|
- "exist_issue": true,
|
|
|
|
|
- "risk_info": {"risk_level": "medium"}
|
|
|
|
|
- }
|
|
|
|
|
- ],
|
|
|
|
|
- "review_location_label": "目录完整性审查",
|
|
|
|
|
- "chapter_code": "outline"
|
|
|
|
|
- },
|
|
|
|
|
- "success": true
|
|
|
|
|
-}'''
|
|
|
|
|
-
|
|
|
|
|
- def __init__(self, template_path: Optional[Path] = None):
|
|
|
|
|
- self.template_path = template_path or self.DEFAULT_TEMPLATE_PATH
|
|
|
|
|
- self.standard_text = self._load_standard_template()
|
|
|
|
|
-
|
|
|
|
|
- def _load_standard_template(self) -> str:
|
|
|
|
|
- """加载标准目录模板"""
|
|
|
|
|
- try:
|
|
|
|
|
- with open(self.template_path, 'r', encoding='utf-8') as f:
|
|
|
|
|
- template = yaml.safe_load(f)
|
|
|
|
|
- return template.get('text_template', '')
|
|
|
|
|
- except Exception as e:
|
|
|
|
|
- logger.warning(f"[CatalogReviewer] 加载标准模板失败: {e}")
|
|
|
|
|
- return self._default_template()
|
|
|
|
|
-
|
|
|
|
|
- def _default_template(self) -> str:
|
|
|
|
|
- """默认标准目录模板"""
|
|
|
|
|
- return """第一章 编制依据
|
|
|
|
|
-一、法律法规
|
|
|
|
|
-二、标准规范
|
|
|
|
|
-三、文件制度
|
|
|
|
|
-四、编制原则
|
|
|
|
|
-五、编制范围
|
|
|
|
|
-
|
|
|
|
|
-第二章 工程概况
|
|
|
|
|
-一、设计概况
|
|
|
|
|
-二、工程地质与水文气象
|
|
|
|
|
-三、周边环境
|
|
|
|
|
-四、施工平面及立面布置
|
|
|
|
|
-五、施工要求和技术保证条件
|
|
|
|
|
-六、风险辨识与分级
|
|
|
|
|
-七、参建各方责任主体单位
|
|
|
|
|
-
|
|
|
|
|
-第三章 施工计划
|
|
|
|
|
-一、施工进度计划
|
|
|
|
|
-二、施工材料计划
|
|
|
|
|
-三、施工设备计划
|
|
|
|
|
-四、劳动力计划
|
|
|
|
|
-五、安全生产费用使用计划
|
|
|
|
|
-
|
|
|
|
|
-第四章 施工工艺技术
|
|
|
|
|
-一、主要施工方法概述
|
|
|
|
|
-二、技术参数
|
|
|
|
|
-三、工艺流程
|
|
|
|
|
-四、施工准备
|
|
|
|
|
-五、施工方法及操作要求
|
|
|
|
|
-六、检查要求
|
|
|
|
|
-
|
|
|
|
|
-第五章 安全保证措施
|
|
|
|
|
-一、安全保证体系
|
|
|
|
|
-二、组织保证措施
|
|
|
|
|
-三、技术保证措施
|
|
|
|
|
-四、监测监控措施
|
|
|
|
|
-五、应急处置措施
|
|
|
|
|
-
|
|
|
|
|
-第六章 质量保证措施
|
|
|
|
|
-一、质量保证体系
|
|
|
|
|
-二、质量目标
|
|
|
|
|
-三、工程创优规划
|
|
|
|
|
-四、质量控制程序与具体措施
|
|
|
|
|
-
|
|
|
|
|
-第七章 环境保证措施
|
|
|
|
|
-一、环境保证体系
|
|
|
|
|
-二、环境保护组织机构
|
|
|
|
|
-三、环境保护及文明施工措施
|
|
|
|
|
-
|
|
|
|
|
-第八章 施工管理及作业人员配备与分工
|
|
|
|
|
-一、施工管理人员
|
|
|
|
|
-二、专职安全生产管理人员
|
|
|
|
|
-三、其他作业人员
|
|
|
|
|
-
|
|
|
|
|
-第九章 验收要求
|
|
|
|
|
-一、验收标准
|
|
|
|
|
-二、验收程序
|
|
|
|
|
-三、验收内容
|
|
|
|
|
-四、验收时间
|
|
|
|
|
-五、验收人员
|
|
|
|
|
-
|
|
|
|
|
-第十章 其他资料
|
|
|
|
|
-一、计算书
|
|
|
|
|
-二、相关施工图纸
|
|
|
|
|
-三、附图附表
|
|
|
|
|
-四、编制及审核人员情况"""
|
|
|
|
|
-
|
|
|
|
|
- async def review(self, actual_catalog_text: str, trace_id_idx: str = "",
|
|
|
|
|
- toc_page_range: Dict[str, int] = None) -> Dict[str, Any]:
|
|
|
|
|
- import time
|
|
|
|
|
- import asyncio
|
|
|
|
|
- start_time = time.time()
|
|
|
|
|
-
|
|
|
|
|
- try:
|
|
|
|
|
- from foundation.ai.agent.generate.model_generate import generate_model_client
|
|
|
|
|
-
|
|
|
|
|
- system_prompt = (
|
|
|
|
|
- "你是一位施工方案文档审查专家,负责对比实际目录和标准目录,找出缺失项。"
|
|
|
|
|
- "请严格按JSON格式输出最终结果,不要输出任何其他内容。"
|
|
|
|
|
- )
|
|
|
|
|
- user_prompt = self._build_prompt(actual_catalog_text, toc_page_range)
|
|
|
|
|
-
|
|
|
|
|
- # 第1次:正常调用
|
|
|
|
|
- content = await self._call_llm(
|
|
|
|
|
- generate_model_client, trace_id_idx, 0, system_prompt, user_prompt
|
|
|
|
|
- )
|
|
|
|
|
- result, err_msg = self._try_parse_json(content)
|
|
|
|
|
- if result and "details" in result:
|
|
|
|
|
- return {"details": result["details"], "success": result.get("success", True),
|
|
|
|
|
- "execution_time": time.time() - start_time}
|
|
|
|
|
-
|
|
|
|
|
- logger.warning(f"[CatalogReviewer] 第1次JSON解析失败: {err_msg}")
|
|
|
|
|
-
|
|
|
|
|
- # 第2次:让LLM修正格式
|
|
|
|
|
- fix_prompt = self._build_fix_prompt(content, err_msg)
|
|
|
|
|
- content = await self._call_llm(
|
|
|
|
|
- generate_model_client, trace_id_idx, 1, system_prompt, fix_prompt
|
|
|
|
|
- )
|
|
|
|
|
- result, err_msg = self._try_parse_json(content)
|
|
|
|
|
- if result and "details" in result:
|
|
|
|
|
- return {"details": result["details"], "success": result.get("success", True),
|
|
|
|
|
- "execution_time": time.time() - start_time}
|
|
|
|
|
-
|
|
|
|
|
- logger.warning(f"[CatalogReviewer] 第2次JSON解析失败: {err_msg}")
|
|
|
|
|
-
|
|
|
|
|
- # 第3次:再次修正
|
|
|
|
|
- fix_prompt = self._build_fix_prompt(content, err_msg)
|
|
|
|
|
- content = await self._call_llm(
|
|
|
|
|
- generate_model_client, trace_id_idx, 2, system_prompt, fix_prompt
|
|
|
|
|
- )
|
|
|
|
|
- result, err_msg = self._try_parse_json(content)
|
|
|
|
|
- if result and "details" in result:
|
|
|
|
|
- return {"details": result["details"], "success": result.get("success", True),
|
|
|
|
|
- "execution_time": time.time() - start_time}
|
|
|
|
|
-
|
|
|
|
|
- raise ValueError(f"重试3次后JSON解析仍失败,最后错误: {err_msg}")
|
|
|
|
|
-
|
|
|
|
|
- except Exception as e:
|
|
|
|
|
- logger.error(f"[CatalogReviewer] LLM审查失败(已重试3次): {e}")
|
|
|
|
|
- return self._fallback_result(time.time() - start_time)
|
|
|
|
|
-
|
|
|
|
|
- async def _call_llm(self, client, trace_id_idx: str, attempt: int,
|
|
|
|
|
- system_prompt: str, user_prompt: str) -> str:
|
|
|
|
|
- logger.info(f"[CatalogReviewer] 第 {attempt + 1} 次调用")
|
|
|
|
|
- content = await client.get_model_generate_invoke(
|
|
|
|
|
- trace_id=f"{trace_id_idx or 'catalog_review'}_attempt{attempt}",
|
|
|
|
|
- system_prompt=system_prompt,
|
|
|
|
|
- user_prompt=user_prompt,
|
|
|
|
|
- function_name="catalog_integrity_review",
|
|
|
|
|
- timeout=120
|
|
|
|
|
- )
|
|
|
|
|
- logger.info(f"[CatalogReviewer] content length: {len(content)}")
|
|
|
|
|
- return content
|
|
|
|
|
-
|
|
|
|
|
- def _try_parse_json(self, content: str) -> tuple:
|
|
|
|
|
- """返回 (result_dict, error_message),成功时 error_message 为 None"""
|
|
|
|
|
- result = self._extract_json(content)
|
|
|
|
|
- if result is not None:
|
|
|
|
|
- return result, None
|
|
|
|
|
- # 收集具体错误
|
|
|
|
|
- preview = content[:500]
|
|
|
|
|
- try:
|
|
|
|
|
- json.loads(content)
|
|
|
|
|
- return None, "JSON结构异常但loads未报错"
|
|
|
|
|
- except json.JSONDecodeError as e:
|
|
|
|
|
- return None, f"JSONDecodeError: {e} | 内容前500字: {preview}"
|
|
|
|
|
- except Exception as e:
|
|
|
|
|
- return None, f"{type(e).__name__}: {e} | 内容前500字: {preview}"
|
|
|
|
|
-
|
|
|
|
|
- def _build_fix_prompt(self, malformed_content: str, parse_error: str = "") -> str:
|
|
|
|
|
- preview = malformed_content[:2000]
|
|
|
|
|
- error_info = f"\n解析错误详情:{parse_error}\n" if parse_error else ""
|
|
|
|
|
- return (
|
|
|
|
|
- "你上次输出的JSON格式不正确,无法解析。请仔细检查以下问题并重新输出:\n\n"
|
|
|
|
|
- f"{error_info}"
|
|
|
|
|
- "1. 确保所有字符串键和值使用双引号\n"
|
|
|
|
|
- "2. 确保字符串值内没有未转义的换行符,如有请用\\n替代\n"
|
|
|
|
|
- "3. 确保所有括号、方括号正确闭合\n"
|
|
|
|
|
- "4. 不要使用markdown代码块包裹JSON\n"
|
|
|
|
|
- "5. 不要输出任何JSON之外的内容(包括思考过程)\n\n"
|
|
|
|
|
- f"以下是你的输出,请修正格式后重新输出完整的JSON结果:\n\n{preview}"
|
|
|
|
|
- )
|
|
|
|
|
-
|
|
|
|
|
- def _fallback_result(self, execution_time: float) -> Dict[str, Any]:
|
|
|
|
|
- return {
|
|
|
|
|
- "details": {
|
|
|
|
|
- "name": "outline_check",
|
|
|
|
|
- "response": [{
|
|
|
|
|
- "check_item": "completeness_check",
|
|
|
|
|
- "chapter_code": "catalogue",
|
|
|
|
|
- "check_item_code": "catalogue_completeness_check",
|
|
|
|
|
- "check_result": {
|
|
|
|
|
- "issue_point": "无",
|
|
|
|
|
- "location": "目录页",
|
|
|
|
|
- "suggestion": "无",
|
|
|
|
|
- "reason": "无",
|
|
|
|
|
- "risk_level": "无风险"
|
|
|
|
|
- },
|
|
|
|
|
- "exist_issue": False,
|
|
|
|
|
- "risk_info": {"risk_level": "none"}
|
|
|
|
|
- }],
|
|
|
|
|
- "review_location_label": "目录完整性审查",
|
|
|
|
|
- "chapter_code": "catalogue"
|
|
|
|
|
- },
|
|
|
|
|
- "success": True,
|
|
|
|
|
- "execution_time": execution_time
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- def _build_prompt(self, actual_catalog_text: str,
|
|
|
|
|
- toc_page_range: Dict[str, int] = None) -> str:
|
|
|
|
|
- """构建审查Prompt"""
|
|
|
|
|
- json_example = self._JSON_EXAMPLE_TEMPLATE
|
|
|
|
|
-
|
|
|
|
|
- # 构建页码信息说明
|
|
|
|
|
- page_info = ""
|
|
|
|
|
- if toc_page_range:
|
|
|
|
|
- start_page = toc_page_range.get('start', 3)
|
|
|
|
|
- end_page = toc_page_range.get('end', 3)
|
|
|
|
|
- if start_page == end_page:
|
|
|
|
|
- page_info = f"目录页位于第 {start_page} 页"
|
|
|
|
|
- else:
|
|
|
|
|
- page_info = f"目录页位于第 {start_page}-{end_page} 页"
|
|
|
|
|
-
|
|
|
|
|
- # 基础 JSON 模板(使用单引号字符串避免 f-string 转义问题)
|
|
|
|
|
- base_template = '''{
|
|
|
|
|
- "details": {
|
|
|
|
|
- "name": "outline_check",
|
|
|
|
|
- "response": [
|
|
|
|
|
- {
|
|
|
|
|
- "check_item": "completeness_check",
|
|
|
|
|
- "chapter_code": "catalogue",
|
|
|
|
|
- "check_item_code": "catalogue_completeness_check",
|
|
|
|
|
- "check_result": {
|
|
|
|
|
- "issue_point": "【一级缺失】xxx",
|
|
|
|
|
- "location": "目录页",
|
|
|
|
|
- "page": 3,
|
|
|
|
|
- "suggestion": "建议补充'xxx'章节",
|
|
|
|
|
- "reason": "简要说明",
|
|
|
|
|
- "risk_level": "高风险"
|
|
|
|
|
- },
|
|
|
|
|
- "exist_issue": true,
|
|
|
|
|
- "risk_info": {"risk_level": "high"}
|
|
|
|
|
- }
|
|
|
|
|
- ],
|
|
|
|
|
- "review_location_label": "目录完整性审查",
|
|
|
|
|
- "chapter_code": "catalogue"
|
|
|
|
|
- },
|
|
|
|
|
- "success": true
|
|
|
|
|
-}'''
|
|
|
|
|
-
|
|
|
|
|
- page_instruction = f"""
|
|
|
|
|
-## 页码信息
|
|
|
|
|
-{page_info if page_info else "目录页页码未知,统一使用 page=3"}
|
|
|
|
|
-
|
|
|
|
|
-## 输出格式要求
|
|
|
|
|
-check_result 中必须包含以下字段:
|
|
|
|
|
-- issue_point: 问题描述
|
|
|
|
|
-- location: 问题定位(一级缺失填"目录页",二级缺失填对应的一级章节名)
|
|
|
|
|
-- page: 页码数字({toc_page_range.get('start', 3) if toc_page_range else 3})
|
|
|
|
|
-- suggestion: 补充建议
|
|
|
|
|
-- reason: 原因说明
|
|
|
|
|
-- risk_level: 风险等级("高风险"或"中风险")
|
|
|
|
|
-""" if toc_page_range else """
|
|
|
|
|
-## 输出格式要求
|
|
|
|
|
-check_result 中必须包含以下字段:
|
|
|
|
|
-- issue_point: 问题描述
|
|
|
|
|
-- location: 问题定位(一级缺失填"目录页",二级缺失填对应的一级章节名)
|
|
|
|
|
-- page: 页码数字(统一使用 3)
|
|
|
|
|
-- suggestion: 补充建议
|
|
|
|
|
-- reason: 原因说明
|
|
|
|
|
-- risk_level: 风险等级("高风险"或"中风险")
|
|
|
|
|
-"""
|
|
|
|
|
-
|
|
|
|
|
- return f"""你是一位施工方案文档审查专家。请对比【实际目录】和【标准目录】,找出缺失项。
|
|
|
|
|
-
|
|
|
|
|
-## 审查原则
|
|
|
|
|
-1. **语义匹配**:实际目录与标准目录含义相同即算匹配,不要求文字完全一致
|
|
|
|
|
-2. **常见同义表述**(示例):
|
|
|
|
|
- - "编制依据" ≈ "方案编制依据" ≈ "编制原则及依据"
|
|
|
|
|
- - "工程概况" ≈ "工程基本情况" ≈ "项目概况"
|
|
|
|
|
- - "施工计划" ≈ "施工进度计划" ≈ "施工安排"
|
|
|
|
|
- - "法律法规" ≈ "相关法律" ≈ "法规依据"
|
|
|
|
|
-3. **容错范围**:
|
|
|
|
|
- - 一级标题必须严格对应(如"编制依据"不能变成"引用标准")
|
|
|
|
|
- - 二级标题允许一定变通,但核心含义必须一致
|
|
|
|
|
-
|
|
|
|
|
-## 实际目录(来自OCR识别)
|
|
|
|
|
-```
|
|
|
|
|
-{actual_catalog_text}
|
|
|
|
|
-```
|
|
|
|
|
-
|
|
|
|
|
-## 标准目录(必须包含的完整结构)
|
|
|
|
|
-```
|
|
|
|
|
-{self.standard_text}
|
|
|
|
|
-```
|
|
|
|
|
-
|
|
|
|
|
-## 输出规则
|
|
|
|
|
-1. **一级缺失判定**:实际目录中完全没有对应的章,或章节标题完全不匹配
|
|
|
|
|
-2. **二级缺失判定**:只有当父级一级目录**存在**时,才检查其下的二级目录是否缺失
|
|
|
|
|
-3. **重要**:如果某个一级目录缺失,**不要报告**该章节下的二级目录缺失(避免重复提醒)
|
|
|
|
|
-
|
|
|
|
|
-## 输出要求
|
|
|
|
|
-**重要:最终答案只输出 JSON,不要添加 markdown 代码块标记(```json)。**
|
|
|
|
|
-
|
|
|
|
|
-请直接输出 completeness_check 格式的 JSON 结果:
|
|
|
|
|
-{base_template}
|
|
|
|
|
-
|
|
|
|
|
-**重要输出规则**:
|
|
|
|
|
-1. **每个缺失项必须单独输出**:一级缺失和二级缺失要分开,不同的缺失项也要分开
|
|
|
|
|
-2. **禁止合并**:不要将多个缺失项写在一个 `issue_point` 里
|
|
|
|
|
-3. **列表格式**:`response` 必须是一个列表,每个缺失项是列表中的一个独立对象
|
|
|
|
|
-
|
|
|
|
|
-**正确示例**(多个缺失项分开):
|
|
|
|
|
-```json
|
|
|
|
|
-{json_example}
|
|
|
|
|
-```
|
|
|
|
|
-
|
|
|
|
|
-**风险等级规则**:
|
|
|
|
|
-- 一级缺失:risk_level 为 "高风险", risk_info.risk_level 为 "high"
|
|
|
|
|
-- 二级缺失:risk_level 为 "中风险", risk_info.risk_level 为 "medium"
|
|
|
|
|
-- 如无缺失,response 中放一条 "issue_point": "【目录完整】一二级目录结构完整", "exist_issue": false
|
|
|
|
|
-
|
|
|
|
|
-{page_instruction}
|
|
|
|
|
-"""
|
|
|
|
|
-
|
|
|
|
|
- def _extract_json(self, content: str) -> Optional[Dict[str, Any]]:
|
|
|
|
|
- """从LLM响应中提取JSON,增强健壮性"""
|
|
|
|
|
- try:
|
|
|
|
|
- # 清理内容:移除 markdown 代码块标记
|
|
|
|
|
- content = content.strip()
|
|
|
|
|
- # 将 JSON 字符串值内的裸换行转为空格,避免 json.loads 失败
|
|
|
|
|
- content = content.replace('\n', ' ').replace('\r', ' ')
|
|
|
|
|
- original_preview = content[:500]
|
|
|
|
|
-
|
|
|
|
|
- # 移除 markdown 代码块
|
|
|
|
|
- content = re.sub(r'^```json\s*', '', content, flags=re.IGNORECASE | re.MULTILINE)
|
|
|
|
|
- content = re.sub(r'\s*```\s*$', '', content, flags=re.MULTILINE)
|
|
|
|
|
- content = re.sub(r'^```\s*', '', content, flags=re.MULTILINE)
|
|
|
|
|
-
|
|
|
|
|
- # 找到第一个 { 开始的位置
|
|
|
|
|
- json_start = content.find('{')
|
|
|
|
|
- if json_start == -1:
|
|
|
|
|
- logger.warning(f"[CatalogReviewer] 未找到 JSON 开始标记 '{{'")
|
|
|
|
|
- return None
|
|
|
|
|
- content = content[json_start:]
|
|
|
|
|
-
|
|
|
|
|
- # 找到最后一个 } 结束的位置
|
|
|
|
|
- json_end = content.rfind('}')
|
|
|
|
|
- if json_end == -1:
|
|
|
|
|
- logger.warning(f"[CatalogReviewer] 未找到 JSON 结束标记 '}}'")
|
|
|
|
|
- return None
|
|
|
|
|
- content = content[:json_end + 1]
|
|
|
|
|
-
|
|
|
|
|
- # 尝试直接解析
|
|
|
|
|
- try:
|
|
|
|
|
- return json.loads(content)
|
|
|
|
|
- except json.JSONDecodeError as e:
|
|
|
|
|
- logger.debug(f"[CatalogReviewer] 直接解析失败: {e}")
|
|
|
|
|
-
|
|
|
|
|
- # 尝试修复常见问题后重新解析
|
|
|
|
|
- fixed_content = self._fix_json_content(content)
|
|
|
|
|
- try:
|
|
|
|
|
- return json.loads(fixed_content)
|
|
|
|
|
- except json.JSONDecodeError as e:
|
|
|
|
|
- logger.debug(f"[CatalogReviewer] 修复后解析失败: {e}")
|
|
|
|
|
-
|
|
|
|
|
- logger.error(f"[CatalogReviewer] JSON解析失败")
|
|
|
|
|
- logger.error(f"[CatalogReviewer] 原始内容: {original_preview}")
|
|
|
|
|
- return None
|
|
|
|
|
-
|
|
|
|
|
- except Exception as e:
|
|
|
|
|
- logger.error(f"[CatalogReviewer] JSON解析异常: {e}")
|
|
|
|
|
- logger.error(f"[CatalogReviewer] 内容前500字: {content[:500]}")
|
|
|
|
|
- return None
|
|
|
|
|
-
|
|
|
|
|
- def _fix_json_content(self, content: str) -> str:
|
|
|
|
|
- """尝试修复常见的 JSON 格式问题"""
|
|
|
|
|
- content = content.strip()
|
|
|
|
|
-
|
|
|
|
|
- # 1. 修复用单引号包裹的键名和值(仅替换 JSON 结构层级的引号)
|
|
|
|
|
- # 'key': → "key":
|
|
|
|
|
- content = re.sub(r"'([a-zA-Z_][a-zA-Z0-9_]*)'\s*:", r'"\1":', content)
|
|
|
|
|
- # : 'value' → : "value"(逗号或 } 或 ] 之前)
|
|
|
|
|
- content = re.sub(r":\s*'([^']*)'\s*([,}\]])", r': "\1"\2', content)
|
|
|
|
|
- # : 'value' 在行末 → : "value"
|
|
|
|
|
- content = re.sub(r":\s*'([^']*)'\s*$", r': "\1"', content)
|
|
|
|
|
-
|
|
|
|
|
- # 2. 修复属性名未加引号的问题:{ key: → { "key":
|
|
|
|
|
- content = re.sub(r'(\{|,)\s*([a-zA-Z_][a-zA-Z0-9_]*)\s*:', r'\1"\2":', content)
|
|
|
|
|
-
|
|
|
|
|
- # 3. 修复尾随逗号
|
|
|
|
|
- content = re.sub(r',\s*([}\]])', r'\1', content)
|
|
|
|
|
-
|
|
|
|
|
- return content
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-async def review_catalog_integrity(actual_catalog_text: str, template_path: Optional[Path] = None) -> Dict[str, Any]:
|
|
|
|
|
- """
|
|
|
|
|
- 便捷函数:审查目录完整性
|
|
|
|
|
-
|
|
|
|
|
- Args:
|
|
|
|
|
- actual_catalog_text: 实际目录文本(标准格式)
|
|
|
|
|
- template_path: 可选的自定义模板路径
|
|
|
|
|
-
|
|
|
|
|
- Returns:
|
|
|
|
|
- 审查结果字典
|
|
|
|
|
- """
|
|
|
|
|
- reviewer = CatalogReviewer(template_path)
|
|
|
|
|
- return await reviewer.review(actual_catalog_text)
|
|
|