Explorar el Código

v0.0.8-功能优化-新完整性审查-
- 待测试版本

WangXuMing hace 1 mes
padre
commit
626aabac8b
Se han modificado 41 ficheros con 4099 adiciones y 699 borrados
  1. 6 1
      core/base/workflow_manager.py
  2. 1 1
      core/construction_review/component/ai_review_engine.py
  3. 61 0
      core/construction_review/component/constants.py
  4. 1 1
      core/construction_review/component/doc_worker/config/Construction_Plan_Content_Specification.csv
  5. 257 320
      core/construction_review/component/document_processor.py
  6. 150 16
      core/construction_review/component/report_generator.py
  7. 10 10
      core/construction_review/component/reviewers/catalogues_check/catalogues_check.py
  8. 182 0
      core/construction_review/component/reviewers/check_completeness/__init__.py
  9. 184 0
      core/construction_review/component/reviewers/check_completeness/adapter.py
  10. 838 0
      core/construction_review/component/reviewers/check_completeness/completeness_checker.py
  11. 1 1
      core/construction_review/component/reviewers/check_completeness/components/data_loader.py
  12. 14 6
      core/construction_review/component/reviewers/check_completeness/components/result_analyzer.py
  13. 1 0
      core/construction_review/component/reviewers/check_completeness/components/review_pipeline.py
  14. BIN
      core/construction_review/component/reviewers/check_completeness/config/Construction_Plan_Content_Specification.csv
  15. 551 0
      core/construction_review/component/reviewers/check_completeness/lightweight_completeness_checker.py
  16. 698 0
      core/construction_review/component/reviewers/check_completeness/tertiary_completeness_checker.py
  17. 290 0
      core/construction_review/component/reviewers/check_completeness/test_completeness_checker.py
  18. 12 14
      core/construction_review/component/reviewers/reference_basis_reviewer.py
  19. 15 17
      core/construction_review/component/reviewers/timeliness_basis_reviewer.py
  20. 6 6
      core/construction_review/component/reviewers/utils/directory_extraction.py
  21. 10 266
      core/construction_review/workflows/ai_review_workflow.py
  22. 20 17
      core/construction_review/workflows/core_functions/ai_review_core_fun.py
  23. 13 12
      core/construction_review/workflows/document_workflow.py
  24. 66 5
      core/construction_review/workflows/report_workflow.py
  25. 48 6
      core/construction_write/component/outline_generator.py
  26. 12 0
      core/construction_write/workflows/agent.py
  27. 59 0
      demo.py
  28. 132 0
      track/observations/list_directory_a62ffe4b.txt
  29. 2 0
      track/observations/read_file_791b3a3a.txt
  30. 2 0
      track/observations/read_file_918670c7.txt
  31. 2 0
      track/observations/read_file_998abacd.txt
  32. 2 0
      track/observations/read_file_b6948d8b.txt
  33. 2 0
      track/observations/read_file_e15321d0.txt
  34. 2 0
      track/observations/read_file_ee5a4ec3.txt
  35. 217 0
      track/observations/search_content_1047433b.txt
  36. 107 0
      track/observations/search_files_1a59aac5.txt
  37. 25 0
      track/tracking_20260306_181157_64c82f4e.json
  38. 25 0
      track/tracking_20260306_181214_a9e464c4.json
  39. 25 0
      track/tracking_20260306_181218_e1d6c6c0.json
  40. 25 0
      track/tracking_20260306_181508_35c4b4da.json
  41. 25 0
      track/tracking_20260306_181511_a57ff3cb.json

+ 6 - 1
core/base/workflow_manager.py

@@ -744,7 +744,12 @@ class WorkflowManager:
             # 执行报告生成
             report_result = await report_workflow.execute()
 
-            logger.info(f"报告生成完成: {state['callback_task_id']}")
+            # 检查是否为降级报告
+            is_fallback = report_result.get('is_fallback', False)
+            if is_fallback:
+                logger.warning(f"报告生成使用了降级方案: {state['callback_task_id']}")
+            else:
+                logger.info(f"报告生成完成: {state['callback_task_id']}")
 
             # 保存完整结果(包含文档处理、AI审查、报告生成)
             await self._save_complete_results(state, report_result)

+ 1 - 1
core/construction_review/component/ai_review_engine.py

@@ -745,7 +745,7 @@ class AIReviewEngine(BaseReviewer):
             review_results_df = pd.DataFrame(review_results)
             chapter_labels = review_results_df['section_label'].str.split('->').str[0]
             review_results_df['title'] = chapter_labels
-
+            review_results_df.to_csv(str(Path("temp") / f'{trace_id_idx}_completeness_review_results.csv'), encoding='utf-8-sig', index=False)
             # 将审查结果存储到Redis,供 outline_check 使用
             logger.info(f"[完整性检查] 准备将大纲审查结果存储到Redis,bind_id: {trace_id_idx}")
             from .reviewers.check_completeness.utils.redis_csv_utils import df_store_to_redis

+ 61 - 0
core/construction_review/component/constants.py

@@ -0,0 +1,61 @@
+"""
+文档处理模块常量定义
+统一管理分类代码、状态码等枚举值,消除硬编码字符串
+"""
+
+from enum import Enum
+
+
+class CategoryCode(Enum):
+    """一级分类代码"""
+    BASIS = "basis"                      # 编制依据
+    OVERVIEW = "overview"                # 工程概况
+    PLAN = "plan"                        # 施工计划
+    TECHNOLOGY = "technology"            # 施工工艺技术
+    SAFETY = "safety"                    # 安全保证措施
+    QUALITY = "quality"                  # 质量保证措施
+    ENVIRONMENT = "environment"          # 环境保证措施
+    MANAGEMENT = "management"            # 施工管理及作业人员配备与分工
+    ACCEPTANCE = "acceptance"            # 验收要求
+    OTHER = "other"                      # 其他资料
+    NON_STANDARD = "non_standard"        # 非标准项
+
+
+class StatusCode(Enum):
+    """处理状态码"""
+    DOC_ANS_COMPLETED = "docu_ans_completed"
+    PROCESSING = "processing"
+    FAILED = "failed"
+    COMPLETED = "completed"
+
+
+class StageName(Enum):
+    """处理阶段名称"""
+    DOCUMENT_PARSING = "文档解析"
+    TOC_EXTRACTION = "目录提取"
+    CLASSIFICATION = "分类处理"
+    TEXT_EXTRACTION = "文本提取"
+    TEXT_SPLITTING = "文本切分"
+    SECONDARY_CLASSIFICATION = "二级分类"
+    TERTIARY_CLASSIFICATION = "三级分类"
+
+
+# 一级分类中文名称映射
+CATEGORY_CN_MAPPING = {
+    CategoryCode.BASIS.value: "编制依据",
+    CategoryCode.OVERVIEW.value: "工程概况",
+    CategoryCode.PLAN.value: "施工计划",
+    CategoryCode.TECHNOLOGY.value: "施工工艺技术",
+    CategoryCode.SAFETY.value: "安全保证措施",
+    CategoryCode.QUALITY.value: "质量保证措施",
+    CategoryCode.ENVIRONMENT.value: "环境保证措施",
+    CategoryCode.MANAGEMENT.value: "施工管理及作业人员配备与分工",
+    CategoryCode.ACCEPTANCE.value: "验收要求",
+    CategoryCode.OTHER.value: "其他资料",
+    CategoryCode.NON_STANDARD.value: "非标准项",
+}
+
+
+def get_category_cn(category_code: str) -> str:
+    """获取分类的中文名称"""
+    return CATEGORY_CN_MAPPING.get(category_code, "未知分类")

+ 1 - 1
core/construction_review/component/doc_worker/config/Construction_Plan_Content_Specification.csv

@@ -1,4 +1,4 @@
-一级目录	二级目录	三级内容
+一级目录	二级目录	三级内容
 编制依据	法律法规	法律法规包括国家、工程所在地省级政府发布的法律法规、规章制度等;
 编制依据	标准规范	标准规范包括行业标准、技术规程等;
 编制依据	文件制度	文件制度包括四川路桥、路桥集团、桥梁公司、建设单位下发的文件制度和管理程序文件等;

+ 257 - 320
core/construction_review/component/document_processor.py

@@ -2,22 +2,31 @@
 文档处理器
 负责文档解析、内容提取和结构化处理
 集成doc_worker模块的智能处理能力
+
+重构说明:
+1. 使用类级别共享ChunkClassifier实例,避免重复创建LLM客户端
+2. 统一PDF/DOCX处理流程,消除代码重复
+3. 移除splits冗余数据,统一使用chunks
+4. 完善异常处理,记录完整堆栈信息
 """
 
 import io
 import json
 import os
 import tempfile
+from dataclasses import dataclass
 from pathlib import Path
-from typing import Dict, Any, Optional, Callable
+from typing import Dict, Any, Optional, List
 from datetime import datetime
 import asyncio
 
 from foundation.observability.logger.loggering import review_logger as logger
 from foundation.observability.cachefiles import cache, CacheBaseDir
+from .constants import CategoryCode, StatusCode, StageName
+
 # 引入doc_worker核心组件
 try:
-    from .doc_worker.interfaces import DocumentSource, TOCExtractor, FullTextExtractor, TextSplitter
+    from .doc_worker.interfaces import DocumentSource, TOCExtractor, FullTextExtractor, TextSplitter, HierarchyClassifier
     from .doc_worker.pdf_worker.toc_extractor import PdfTOCExtractor
     from .doc_worker.pdf_worker.fulltext_extractor import PdfFullTextExtractor
     from .doc_worker.pdf_worker.text_splitter import PdfTextSplitter
@@ -29,7 +38,7 @@ try:
     from .doc_worker.classification.chunk_classifier import ChunkClassifier
     from .doc_worker.config.provider import default_config_provider
 except ImportError:
-    from core.construction_review.component.doc_worker.interfaces import DocumentSource, TOCExtractor, FullTextExtractor, TextSplitter
+    from core.construction_review.component.doc_worker.interfaces import DocumentSource, TOCExtractor, FullTextExtractor, TextSplitter, HierarchyClassifier
     from core.construction_review.component.doc_worker.pdf_worker.toc_extractor import PdfTOCExtractor
     from core.construction_review.component.doc_worker.pdf_worker.fulltext_extractor import PdfFullTextExtractor
     from core.construction_review.component.doc_worker.pdf_worker.text_splitter import PdfTextSplitter
@@ -41,53 +50,81 @@ except ImportError:
     from core.construction_review.component.doc_worker.classification.chunk_classifier import ChunkClassifier
     from core.construction_review.component.doc_worker.config.provider import default_config_provider
 
+
+@dataclass
+class DocumentComponents:
+    """文档处理组件集合,统一封装各类型文档的处理组件"""
+    toc_extractor: TOCExtractor
+    classifier: HierarchyClassifier
+    fulltext_extractor: FullTextExtractor
+    text_splitter: TextSplitter
+
 class DocumentProcessor:
-    """文档处理器"""
+    """
+    文档处理器
+
+    改进说明:
+    1. 使用类级别共享 _shared_chunk_classifier,避免重复创建LLM客户端
+    2. 使用 DocumentComponents 统一管理处理组件
+    3. 统一处理流程 _parse_content 消除代码重复
+    """
+
+    # 类级别共享的ChunkClassifier实例,避免重复创建LLM客户端
+    _shared_chunk_classifier: Optional[ChunkClassifier] = None
 
     def __init__(self):
         self.supported_types = ['pdf', 'docx']
-        # 初始化doc_worker组件
         self.config = default_config_provider
-        # PDF组件
-        self.pdf_toc_extractor = PdfTOCExtractor()
-        self.pdf_fulltext_extractor = PdfFullTextExtractor()
-        self.pdf_text_splitter = PdfTextSplitter()
-        self.pdf_classifier = PdfHierarchyClassifier()
-        # DOCX组件
-        self.docx_toc_extractor = DocxTOCExtractor()
-        self.docx_fulltext_extractor = DocxFullTextExtractor(
-            paragraphs_per_page=int(self.config.get("toc_extraction.paragraphs_per_page", 30))
-        )
-        self.docx_text_splitter = DocxTextSplitter()
-        self.docx_classifier = DocxHierarchyClassifier()
-        # 二三级分类器(通用)
-        self.chunk_classifier = ChunkClassifier()
-
-    async def process_document(self, file_content: bytes, file_type: str,
-                            #  progress_callback: Optional[Callable[[int, str], None]] = None
-                             ) -> Dict[str, Any]:
+
+        # 初始化各类型文档的处理组件
+        self._components: Dict[str, DocumentComponents] = {
+            'pdf': DocumentComponents(
+                toc_extractor=PdfTOCExtractor(),
+                classifier=PdfHierarchyClassifier(),
+                fulltext_extractor=PdfFullTextExtractor(),
+                text_splitter=PdfTextSplitter()
+            ),
+            'docx': DocumentComponents(
+                toc_extractor=DocxTOCExtractor(),
+                classifier=DocxHierarchyClassifier(),
+                fulltext_extractor=DocxFullTextExtractor(
+                    paragraphs_per_page=int(self.config.get("toc_extraction.paragraphs_per_page", 30))
+                ),
+                text_splitter=DocxTextSplitter()
+            )
+        }
+
+    @classmethod
+    def _get_chunk_classifier(cls) -> ChunkClassifier:
+        """获取共享的ChunkClassifier实例"""
+        if cls._shared_chunk_classifier is None:
+            cls._shared_chunk_classifier = ChunkClassifier()
+        return cls._shared_chunk_classifier
+
+    async def process_document(self, file_content: bytes, file_type: str) -> Dict[str, Any]:
         """
         处理文档
 
         Args:
-            file_content: 文件内容
-            file_type: 文件类型
-            progress_callback: 进度回调函数
+            file_content: 文件内容(字节流)
+            file_type: 文件类型(pdf/docx)
 
         Returns:
-            Dict: 解析结果
+            Dict: 结构化的解析结果
+
+        Raises:
+            ValueError: 不支持的文件类型
+            RuntimeError: 文档处理完全失败
         """
         try:
             logger.info(f"开始处理文档,类型: {file_type}")
-            # if progress_callback:
-                #  progress_callback(20, "开始文档处理")
-            # 简化处理:直接解析
-            if file_type.lower() == 'pdf':
-                result = await self.parse_pdf_content(file_content)
-            elif file_type.lower() == 'docx':
-                result = await self.parse_docx_content(file_content)
-            else:
-                raise ValueError(f"不支持的文件类型: {file_type}")
+
+            file_type_lower = file_type.lower()
+            if file_type_lower not in self.supported_types:
+                raise ValueError(f"不支持的文件类型: {file_type},支持的类型: {self.supported_types}")
+
+            # 统一调用解析方法
+            result = await self._parse_content(file_content, file_type_lower)
 
             # 结构化内容
             structured_result = self.structure_content(result)
@@ -95,378 +132,278 @@ class DocumentProcessor:
             return structured_result
 
         except Exception as e:
-            logger.error(f"文档处理失败: {str(e)}")
+            logger.error(f"文档处理失败: {str(e)}", exc_info=True)
             raise
 
-    async def parse_pdf_content(self, file_content: bytes) -> Dict[str, Any]:
-        """解析PDF内容,使用doc_worker的智能处理能力"""
+    async def _parse_content(self, file_content: bytes, file_type: str) -> Dict[str, Any]:
+        """
+        统一的文档解析方法(消除PDF/DOCX代码重复)
+
+        Args:
+            file_content: 文件内容
+            file_type: 文件类型(pdf/docx)
+
+        Returns:
+            Dict: 解析结果
+        """
+        components = self._components.get(file_type)
+        if not components:
+            raise ValueError(f"未找到 {file_type} 类型的处理组件")
+
         try:
-            logger.info("开始使用doc_worker处理PDF文档(内存模式)")
+            logger.info(f"开始使用doc_worker处理{file_type.upper()}文档(内存模式)")
 
-            # 创建DocumentSource(纯内存模式,不使用临时文件)
+            # 创建DocumentSource(纯内存模式)
             source = DocumentSource(
                 path=None,
                 content=file_content,
-                file_type='pdf'
+                file_type=file_type
             )
 
             # 步骤1: 提取目录
-            logger.info("步骤1: 提取文档目录")
-            toc_info = self.pdf_toc_extractor.extract_toc(source)
-            
+            logger.info(f"{StageName.TOC_EXTRACTION.value}: 提取文档目录")
+            toc_info = components.toc_extractor.extract_toc(source)
+
             if toc_info.get('toc_count', 0) == 0:
                 logger.warning("未检测到目录,使用基础处理模式")
-                return await self._fallback_pdf_processing(file_content)
+                return await self._fallback_processing(file_content, file_type)
 
             logger.info(f"成功提取 {toc_info['toc_count']} 个目录项")
 
             # 步骤2: 分类目录项
             target_level = int(self.config.get("text_splitting.target_level", 1))
-            logger.info(f"步骤2: 对{target_level}级目录进行分类")
-            
-            classification_result = await self.pdf_classifier.classify_async(
+            logger.info(f"{StageName.CLASSIFICATION.value}: 对{target_level}级目录进行分类")
+
+            classification_result = await components.classifier.classify_async(
                 toc_info['toc_items'],
                 target_level=target_level
             )
-            
+
             classified_items = classification_result.get('items', [])
             if not classified_items:
                 logger.warning("分类结果为空,使用原始目录项")
                 classified_items = [
-                    item for item in toc_info['toc_items'] 
+                    item for item in toc_info['toc_items']
                     if item.get('level') == target_level
                 ]
                 # 为每个目录项添加默认分类信息
                 for item in classified_items:
                     item['category'] = '未分类'
-                    item['category_code'] = 'other'
+                    item['category_code'] = CategoryCode.OTHER.value
             else:
                 logger.info(f"分类完成,共分类 {len(classified_items)} 个目录项")
 
-            # 步骤3: 提取文档全文
-            logger.info("步骤3: 提取文档全文")
-            # 将同步CPU/IO密集操作放入线程池,避免阻塞事件循环
+            # 步骤3: 提取文档全文(使用线程池避免阻塞事件循环)
+            logger.info(f"{StageName.TEXT_EXTRACTION.value}: 提取文档全文")
             pages_content = await asyncio.to_thread(
-                self.pdf_fulltext_extractor.extract_full_text, source
+                components.fulltext_extractor.extract_full_text, source
             )
-            
+
             if not pages_content:
                 logger.warning("无法提取文档全文,使用基础处理模式")
-                return await self._fallback_pdf_processing(file_content)
+                return await self._fallback_processing(file_content, file_type)
 
             total_chars = sum(len(page.get('text', '')) for page in pages_content)
             logger.info(f"提取完成,共 {len(pages_content)} 页,{total_chars} 个字符")
 
-            # 步骤4: 按分类标题智能切分文本
-            logger.info("步骤4: 按分类标题智能切分文本")
+            # 步骤4: 按分类标题智能切分文本(使用线程池避免阻塞)
+            logger.info(f"{StageName.TEXT_SPLITTING.value}: 按分类标题智能切分文本")
             max_chunk_size = int(self.config.get("text_splitting.max_chunk_size", 3000))
             min_chunk_size = int(self.config.get("text_splitting.min_chunk_size", 50))
-            
-            chunks = self.pdf_text_splitter.split_by_hierarchy(
+
+            chunks = await asyncio.to_thread(
+                components.text_splitter.split_by_hierarchy,
                 classified_items,
                 pages_content,
                 toc_info,
-                target_level=target_level,
-                max_chunk_size=max_chunk_size,
-                min_chunk_size=min_chunk_size
+                target_level,
+                max_chunk_size,
+                min_chunk_size
             )
 
             if not chunks:
                 logger.warning("未能生成任何文本块,使用基础处理模式")
-                return await self._fallback_pdf_processing(file_content)
+                return await self._fallback_processing(file_content, file_type)
 
             logger.info(f"切分完成,共生成 {len(chunks)} 个文本块")
 
             # 步骤5: 对chunks进行二级分类
-            logger.info("步骤5: 对内容块进行二级分类")
-            try:
-                chunks = await self.chunk_classifier.classify_chunks_secondary_async(chunks)
-                logger.info("二级分类完成")
-            except Exception as e:
-                logger.warning(f"二级分类失败: {str(e)},跳过二级分类")
+            chunks = await self._classify_chunks_secondary(chunks)
 
             # 步骤6: 对chunks进行三级分类
-            logger.info("步骤6: 对内容块进行三级分类")
-            try:
-                chunks = await self.chunk_classifier.classify_chunks_tertiary_async(chunks)
-                logger.info("三级分类完成")
-            except Exception as e:
-                logger.warning(f"三级分类失败: {str(e)},跳过三级分类")
+            chunks = await self._classify_chunks_tertiary(chunks)
 
-            # 适配返回格式
-            return {
-                'document_type': 'pdf',
-                'total_pages': len(pages_content),
-                'total_chunks': len(chunks),
-                'chunks': [
-                    {
-                        'page': chunk.get('element_tag', {}).get('page', 0),
-                        'content': chunk.get('review_chunk_content', ''),
-                        'metadata': {
-                            'chunk_id': chunk.get('chunk_id', ''),
-                            'section_label': chunk.get('section_label', ''),
-                            'project_plan_type': chunk.get('project_plan_type', ''),
-                            'chapter_classification': chunk.get('chapter_classification', ''),
-                            'secondary_category_cn': chunk.get('secondary_category_cn', ''),
-                            'secondary_category_code': chunk.get('secondary_category_code', ''),
-                            'tertiary_category_cn': chunk.get('tertiary_category_cn', ''),
-                            'tertiary_category_code': chunk.get('tertiary_category_code', ''),
-                            'element_tag': chunk.get('element_tag', {})
-                        }
-                    }
-                    for chunk in chunks
-                ],
-                'splits': [
-                    {
-                        'content': chunk.get('review_chunk_content', ''),
-                        'metadata': {
-                            'chunk_id': chunk.get('chunk_id', ''),
-                            'section_label': chunk.get('section_label', ''),
-                            'page': chunk.get('element_tag', {}).get('page', 0)
-                        }
-                    }
-                    for chunk in chunks
-                ],
-                'toc_info': toc_info,
-                'classification': {
-                    'items': classified_items,
-                    'target_level': target_level
-                } if classified_items else None
-            }
+            # 构建返回结果(移除splits冗余,统一使用chunks)
+            return self._build_parse_result(
+                file_type, chunks, pages_content, toc_info,
+                classified_items, target_level, total_chars
+            )
 
         except Exception as e:
-            logger.error(f"PDF解析失败: {str(e)}")
+            logger.error(f"{file_type.upper()}解析失败: {str(e)}", exc_info=True)
             # 如果智能处理失败,尝试基础处理
             try:
                 logger.info("尝试使用基础处理模式")
-                return await self._fallback_pdf_processing(file_content)
+                return await self._fallback_processing(file_content, file_type)
             except Exception as fallback_error:
-                logger.error(f"基础处理模式也失败: {str(fallback_error)}")
-                raise
-
-    async def parse_docx_content(self, file_content: bytes) -> Dict[str, Any]:
-        """解析DOCX内容,使用doc_worker的智能处理能力"""
+                logger.error(f"基础处理模式也失败: {str(fallback_error)}", exc_info=True)
+                raise RuntimeError(
+                    f"文档处理完全失败: {file_type.upper()}智能处理({str(e)}) + 基础处理({str(fallback_error)})"
+                ) from e
+
+    async def _classify_chunks_secondary(self, chunks: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """对chunks进行二级分类"""
+        logger.info(f"{StageName.SECONDARY_CLASSIFICATION.value}: 对内容块进行二级分类")
         try:
-            logger.info("开始使用doc_worker处理DOCX文档(内存模式)")
-
-            # 创建DocumentSource(纯内存模式,不使用临时文件)
-            source = DocumentSource(
-                path=None,
-                content=file_content,
-                file_type='docx'
-            )
-
-            # 步骤1: 提取目录
-            logger.info("步骤1: 提取文档目录")
-            toc_info = self.docx_toc_extractor.extract_toc(source)
-            
-            if toc_info.get('toc_count', 0) == 0:
-                logger.warning("未检测到目录,使用基础处理模式")
-                return await self._fallback_docx_processing(file_content)
-
-            logger.info(f"成功提取 {toc_info['toc_count']} 个目录项")
-
-            # 步骤2: 分类目录项
-            target_level = int(self.config.get("text_splitting.target_level", 1))
-            logger.info(f"步骤2: 对{target_level}级目录进行分类")
-            
-            classification_result = await self.docx_classifier.classify_async(
-                toc_info['toc_items'],
-                target_level=target_level
-            )
-            
-            classified_items = classification_result.get('items', [])
-            if not classified_items:
-                logger.warning("分类结果为空,使用原始目录项")
-                classified_items = [
-                    item for item in toc_info['toc_items'] 
-                    if item.get('level') == target_level
-                ]
-                # 为每个目录项添加默认分类信息
-                for item in classified_items:
-                    item['category'] = '未分类'
-                    item['category_code'] = 'other'
-            else:
-                logger.info(f"分类完成,共分类 {len(classified_items)} 个目录项")
-
-            # 步骤3: 提取文档全文
-            logger.info("步骤3: 提取文档全文")
-            # 将同步CPU/IO密集操作放入线程池,避免阻塞事件循环
-            pages_content = await asyncio.to_thread(
-                self.docx_fulltext_extractor.extract_full_text, source
-            )
-            
-            if not pages_content:
-                logger.warning("无法提取文档全文,使用基础处理模式")
-                return await self._fallback_docx_processing(file_content)
-
-            total_chars = sum(len(page.get('text', '')) for page in pages_content)
-            logger.info(f"提取完成,共 {len(pages_content)} 页,{total_chars} 个字符")
+            chunk_classifier = self._get_chunk_classifier()
+            chunks = await chunk_classifier.classify_chunks_secondary_async(chunks)
+            logger.info("二级分类完成")
+        except Exception as e:
+            logger.warning(f"二级分类失败: {str(e)},跳过二级分类", exc_info=True)
+        return chunks
 
-            # 步骤4: 按分类标题智能切分文本
-            logger.info("步骤4: 按分类标题智能切分文本")
-            max_chunk_size = int(self.config.get("text_splitting.max_chunk_size", 3000))
-            min_chunk_size = int(self.config.get("text_splitting.min_chunk_size", 50))
-            
-            chunks = self.docx_text_splitter.split_by_hierarchy(
-                classified_items,
-                pages_content,
-                toc_info,
-                target_level=target_level,
-                max_chunk_size=max_chunk_size,
-                min_chunk_size=min_chunk_size
-            )
+    async def _classify_chunks_tertiary(self, chunks: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """对chunks进行三级分类"""
+        logger.info(f"{StageName.TERTIARY_CLASSIFICATION.value}: 对内容块进行三级分类")
+        try:
+            chunk_classifier = self._get_chunk_classifier()
+            chunks = await chunk_classifier.classify_chunks_tertiary_async(chunks)
+            logger.info("三级分类完成")
+        except Exception as e:
+            logger.warning(f"三级分类失败: {str(e)},跳过三级分类", exc_info=True)
+        return chunks
+
+    def _build_parse_result(
+        self,
+        file_type: str,
+        chunks: List[Dict[str, Any]],
+        pages_content: List[Dict[str, Any]],
+        toc_info: Dict[str, Any],
+        classified_items: List[Dict[str, Any]],
+        target_level: int,
+        total_chars: int
+    ) -> Dict[str, Any]:
+        """
+        构建解析结果(移除splits冗余)
 
-            if not chunks:
-                logger.warning("未能生成任何文本块,使用基础处理模式")
-                return await self._fallback_docx_processing(file_content)
+        改进: 不再生成splits字段,统一使用chunks
+        """
+        result = {
+            'document_type': file_type,
+            'total_pages': len(pages_content),
+            'total_chunks': len(chunks),
+            'chunks': [
+                {
+                    'page': chunk.get('element_tag', {}).get('page', 0),
+                    'content': chunk.get('review_chunk_content', ''),
+                    'metadata': {
+                        'chunk_id': chunk.get('chunk_id', ''),
+                        'section_label': chunk.get('section_label', ''),
+                        'project_plan_type': chunk.get('project_plan_type', ''),
+                        'chapter_classification': chunk.get('chapter_classification', ''),
+                        'secondary_category_cn': chunk.get('secondary_category_cn', ''),
+                        'secondary_category_code': chunk.get('secondary_category_code', ''),
+                        'tertiary_category_cn': chunk.get('tertiary_category_cn', ''),
+                        'tertiary_category_code': chunk.get('tertiary_category_code', ''),
+                        'element_tag': chunk.get('element_tag', {})
+                    }
+                }
+                for chunk in chunks
+            ],
+            'toc_info': toc_info,
+            'classification': {
+                'items': classified_items,
+                'target_level': target_level
+            } if classified_items else None,
+            'metadata': {
+                'total_pages': len(pages_content),
+                'total_chars': total_chars
+            }
+        }
 
-            logger.info(f"切分完成,共生成 {len(chunks)} 个文本块")
+        # DOCX额外保留full_text字段
+        if file_type == 'docx':
+            result['full_text'] = ''.join([page.get('text', '') for page in pages_content])
 
-            # 步骤5: 对chunks进行二级分类
-            logger.info("步骤5: 对内容块进行二级分类")
-            try:
-                chunks = await self.chunk_classifier.classify_chunks_secondary_async(chunks)
-                logger.info("二级分类完成")
-            except Exception as e:
-                logger.warning(f"二级分类失败: {str(e)},跳过二级分类")
+        return result
 
-            # 步骤6: 对chunks进行三级分类
-            logger.info("步骤6: 对内容块进行三级分类")
-            try:
-                chunks = await self.chunk_classifier.classify_chunks_tertiary_async(chunks)
-                logger.info("三级分类完成")
-            except Exception as e:
-                logger.warning(f"三级分类失败: {str(e)},跳过三级分类")
+    async def _fallback_processing(self, file_content: bytes, file_type: str) -> Dict[str, Any]:
+        """
+        统一的基础处理模式(当智能处理失败时使用)
 
-            # 适配返回格式
-            return {
-                'document_type': 'docx',
-                'total_pages': len(pages_content),
-                'total_chunks': len(chunks),
-                'chunks': [
-                    {
-                        'page': chunk.get('element_tag', {}).get('page', 0),
-                        'content': chunk.get('review_chunk_content', ''),
-                        'metadata': {
-                            'chunk_id': chunk.get('chunk_id', ''),
-                            'section_label': chunk.get('section_label', ''),
-                            'project_plan_type': chunk.get('project_plan_type', ''),
-                            'chapter_classification': chunk.get('chapter_classification', ''),
-                            'secondary_category_cn': chunk.get('secondary_category_cn', ''),
-                            'secondary_category_code': chunk.get('secondary_category_code', ''),
-                            'tertiary_category_cn': chunk.get('tertiary_category_cn', ''),
-                            'tertiary_category_code': chunk.get('tertiary_category_code', ''),
-                            'element_tag': chunk.get('element_tag', {})
-                        }
-                    }
-                    for chunk in chunks
-                ],
-                'splits': [
-                    {
-                        'content': chunk.get('review_chunk_content', ''),
-                        'metadata': {
-                            'chunk_id': chunk.get('chunk_id', ''),
-                            'section_label': chunk.get('section_label', ''),
-                            'page': chunk.get('element_tag', {}).get('page', 0)
-                        }
-                    }
-                    for chunk in chunks
-                ],
-                'full_text': ''.join([page.get('text', '') for page in pages_content]),
-                'toc_info': toc_info,
-                'classification': {
-                    'items': classified_items,
-                    'target_level': target_level
-                } if classified_items else None,
-                'metadata': {
-                    'total_pages': len(pages_content),
-                    'total_chars': total_chars
-                }
-            }
+        Args:
+            file_content: 文件内容
+            file_type: 文件类型(pdf/docx)
 
-        except Exception as e:
-            logger.error(f"DOCX解析失败: {str(e)}")
-            # 如果智能处理失败,尝试基础处理
-            try:
-                logger.info("尝试使用基础处理模式")
-                return await self._fallback_docx_processing(file_content)
-            except Exception as fallback_error:
-                logger.error(f"基础处理模式也失败: {str(fallback_error)}")
-                raise
+        Returns:
+            Dict: 基础处理结果
+        """
+        if file_type == 'pdf':
+            return await self._fallback_pdf_processing(file_content)
+        else:
+            return await self._fallback_docx_processing(file_content)
 
     async def _fallback_pdf_processing(self, file_content: bytes) -> Dict[str, Any]:
         """PDF基础处理模式(当智能处理失败时使用)"""
-        temp_file_path = None
         try:
             from langchain_community.document_loaders import PyPDFLoader
             from langchain.text_splitter import RecursiveCharacterTextSplitter
-            
+
             logger.info("使用基础PDF处理模式")
-            
+
             # PyPDFLoader需要文件路径,创建临时文件
-            with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:
+            with tempfile.NamedTemporaryFile(delete=True, suffix='.pdf') as temp_file:
                 temp_file.write(file_content)
+                temp_file.flush()
                 temp_file_path = temp_file.name
-            
-            loader = PyPDFLoader(temp_file_path)
-            documents = loader.load()
-
-            # 文本分块
-            text_splitter = RecursiveCharacterTextSplitter(
-                chunk_size=1000,
-                chunk_overlap=200,
-                separators=["\n\n", "\n", " ", ""]
-            )
-            splits = text_splitter.split_documents(documents)
 
-            # 过滤空内容切块
-            valid_splits = []
-            for split in splits:
-                content = split.page_content.strip()
-                if content:
-                    split.page_content = content
-                    valid_splits.append(split)
+                loader = PyPDFLoader(temp_file_path)
+                documents = loader.load()
 
-            logger.info(f"基础处理完成,有效分块数量: {len(valid_splits)}")
+                # 文本分块
+                text_splitter = RecursiveCharacterTextSplitter(
+                    chunk_size=1000,
+                    chunk_overlap=200,
+                    separators=["\n\n", "\n", " ", ""]
+                )
+                splits = text_splitter.split_documents(documents)
+
+                # 过滤空内容切块
+                valid_splits = []
+                for split in splits:
+                    content = split.page_content.strip()
+                    if content:
+                        split.page_content = content
+                        valid_splits.append(split)
+
+                logger.info(f"基础处理完成,有效分块数量: {len(valid_splits)}")
+
+                # 不再生成splits冗余字段
+                return {
+                    'document_type': 'pdf',
+                    'total_pages': len(documents),
+                    'total_chunks': len(valid_splits),
+                    'chunks': [
+                        {
+                            'chunk_id': f'chunk_{i+1}',
+                            'page': split.metadata.get('page', 0),
+                            'content': split.page_content,
+                            'metadata': split.metadata
+                        }
+                        for i, split in enumerate(valid_splits)
+                    ]
+                }
 
-            return {
-                'document_type': 'pdf',
-                'total_pages': len(documents),
-                'total_chunks': len(valid_splits),
-                'chunks': [
-                    {
-                        'page': doc.metadata.get('page', 0),
-                        'content': doc.page_content,
-                        'metadata': doc.metadata
-                    }
-                    for doc in documents
-                ],
-                'splits': [
-                    {
-                        'content': split.page_content,
-                        'metadata': split.metadata
-                    }
-                    for split in valid_splits
-                ]
-            }
         except Exception as e:
-            logger.error(f"基础PDF处理失败: {str(e)}")
+            logger.error(f"基础PDF处理失败: {str(e)}", exc_info=True)
             raise
-        finally:
-            # 清理临时文件
-            if temp_file_path and os.path.exists(temp_file_path):
-                try:
-                    os.unlink(temp_file_path)
-                except Exception as e:
-                    logger.warning(f"清理临时文件失败: {str(e)}")
 
     async def _fallback_docx_processing(self, file_content: bytes) -> Dict[str, Any]:
         """DOCX基础处理模式(当智能处理失败时使用)"""
         try:
             from docx import Document
             from io import BytesIO
-            
+
             logger.info("使用基础DOCX处理模式(内存模式)")
             doc = Document(BytesIO(file_content))
             full_text = '\n'.join([paragraph.text for paragraph in doc.paragraphs])
@@ -498,7 +435,7 @@ class DocumentProcessor:
                 }
             }
         except Exception as e:
-            logger.error(f"基础DOCX处理失败: {str(e)}")
+            logger.error(f"基础DOCX处理失败: {str(e)}", exc_info=True)
             raise
 
     def structure_content(self, raw_content: Dict[str, Any]) -> Dict[str, Any]:
@@ -582,7 +519,7 @@ class DocumentProcessor:
             return result
 
         except Exception as e:
-            logger.error(f"内容结构化失败: {str(e)}")
+            logger.error(f"内容结构化失败: {str(e)}", exc_info=True)
             raise
 
     def _create_outline_from_toc(self, toc_info: Dict[str, Any], classification: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
@@ -643,7 +580,7 @@ class DocumentProcessor:
             }
 
         except Exception as e:
-            logger.error(f"大纲结构化处理失败: {str(e)}")
+            logger.error(f"大纲结构化处理失败: {str(e)}", exc_info=True)
             return {
                 'chapters': [],
                 'total_chapters': 0

+ 150 - 16
core/construction_review/component/report_generator.py

@@ -93,20 +93,33 @@ class ReportGenerator:
             if progress_callback:
                 progress_callback(50, "调用 LLM 生成报告摘要")
 
-            # 2. 调用 LLM 生成摘要
-            llm_summary = await self._call_llm_for_summary(
-                file_name=file_name,
-                high_risk_issues=high_risk_issues,
-                medium_risk_issues=medium_risk_issues,
-                total_issues=total_issues,
-                trace_id=file_id
-            )
-
-            if progress_callback:
-                progress_callback(80, "解析 LLM 响应")
-
-            # 3. 解析 LLM 响应
-            parsed_summary = self._parse_llm_response(llm_summary)
+            # 2. 调用 LLM 生成摘要(带降级处理)
+            parsed_summary = None
+            try:
+                llm_summary = await self._call_llm_for_summary(
+                    file_name=file_name,
+                    high_risk_issues=high_risk_issues,
+                    medium_risk_issues=medium_risk_issues,
+                    total_issues=total_issues,
+                    trace_id=file_id
+                )
+
+                if progress_callback:
+                    progress_callback(80, "解析 LLM 响应")
+
+                # 3. 解析 LLM 响应
+                parsed_summary = self._parse_llm_response(llm_summary)
+            except Exception as llm_error:
+                # LLM 调用失败,使用降级方案生成默认摘要
+                logger.warning(f"LLM 生成摘要失败,使用降级方案:{str(llm_error)}")
+                if progress_callback:
+                    progress_callback(70, "LLM 服务不可用,使用默认摘要")
+                parsed_summary = self._generate_fallback_summary(
+                    file_name=file_name,
+                    high_risk_issues=high_risk_issues,
+                    medium_risk_issues=medium_risk_issues,
+                    total_issues=total_issues
+                )
 
             if progress_callback:
                 progress_callback(100, "报告生成完成")
@@ -134,7 +147,37 @@ class ReportGenerator:
 
         except Exception as e:
             logger.error(f"报告生成失败:{str(e)}", exc_info=True)
-            raise
+            # 即使是意外错误,也尝试返回一个基本的报告,而不是让任务完全失败
+            try:
+                logger.warning(f"尝试生成应急报告...")
+                high_risk_issues, medium_risk_issues, low_risk_count = await self._extract_high_medium_risk_issues(review_results)
+                total_issues = len(high_risk_issues) + len(medium_risk_issues) + low_risk_count
+                parsed_summary = self._generate_fallback_summary(
+                    file_name=file_name,
+                    high_risk_issues=high_risk_issues,
+                    medium_risk_issues=medium_risk_issues,
+                    total_issues=total_issues,
+                    is_emergency=True
+                )
+                final_report = FinalReport(
+                    file_id=file_id,
+                    file_name=file_name,
+                    total_issues=total_issues,
+                    high_risk_count=len(high_risk_issues),
+                    medium_risk_count=len(medium_risk_issues),
+                    low_risk_count=low_risk_count,
+                    overall_assessment=parsed_summary.get("overall_assessment", ""),
+                    high_risk_summary=parsed_summary.get("high_risk_summary", []),
+                    medium_risk_summary=parsed_summary.get("medium_risk_summary", []),
+                    improvement_recommendations=parsed_summary.get("improvement_recommendations", []),
+                    risk_alerts=parsed_summary.get("risk_alerts", []),
+                    generated_at=datetime.now()
+                )
+                logger.info(f"应急报告生成完成,文件 ID: {file_id}")
+                return final_report
+            except Exception as fallback_error:
+                logger.error(f"应急报告生成也失败:{str(fallback_error)}")
+                raise
 
     async def _extract_high_medium_risk_issues(self, review_results: Dict[str, Any]) -> tuple:
         """
@@ -297,6 +340,7 @@ class ReportGenerator:
 
         except Exception as e:
             logger.error(f"调用 LLM 生成摘要失败:{str(e)}", exc_info=True)
+            # 抛出异常让上层处理降级
             raise
 
     def _format_issues_text(self, issues: List[Dict]) -> str:
@@ -357,6 +401,89 @@ class ReportGenerator:
 
         return "\n".join(result)
 
+    def _generate_fallback_summary(self, file_name: str, high_risk_issues: List[Dict],
+                                   medium_risk_issues: List[Dict], total_issues: int,
+                                   is_emergency: bool = False) -> Dict[str, Any]:
+        """
+        生成降级摘要(当 LLM 调用失败时使用)
+
+        Args:
+            file_name: 文件名称
+            high_risk_issues: 高风险问题列表
+            medium_risk_issues: 中风险问题列表
+            total_issues: 总问题数
+            is_emergency: 是否为应急模式(更简化的输出)
+
+        Returns:
+            Dict: 摘要字典
+        """
+        high_count = len(high_risk_issues)
+        medium_count = len(medium_risk_issues)
+
+        # 生成总体评价
+        if is_emergency:
+            overall = f"【应急报告】文档《{file_name}》的审查已完成,但在生成摘要时遇到技术问题。以下是基于原始审查结果的问题统计。"
+        else:
+            overall = f"文档《{file_name}》的审查已完成。共发现 {total_issues} 个问题,其中高风险 {high_count} 个,中风险 {medium_count} 个。"
+
+        # 生成高风险摘要
+        high_risk_summary = []
+        if high_risk_issues:
+            for i, issue in enumerate(high_risk_issues[:5], 1):
+                check_result = issue.get('check_result', {})
+                issue_point = check_result.get('issue_point', '未知问题')
+                location = check_result.get('location', '未知位置')
+                high_risk_summary.append(f"{i}. 【{location}】{issue_point}")
+            if len(high_risk_issues) > 5:
+                high_risk_summary.append(f"... 及其他 {len(high_risk_issues) - 5} 个高风险问题")
+        else:
+            high_risk_summary.append("未发现高风险问题")
+
+        # 生成中风险摘要
+        medium_risk_summary = []
+        if medium_risk_issues:
+            for i, issue in enumerate(medium_risk_issues[:5], 1):
+                check_result = issue.get('check_result', {})
+                issue_point = check_result.get('issue_point', '未知问题')
+                location = check_result.get('location', '未知位置')
+                medium_risk_summary.append(f"{i}. 【{location}】{issue_point}")
+            if len(medium_risk_issues) > 5:
+                medium_risk_summary.append(f"... 及其他 {len(medium_risk_issues) - 5} 个中风险问题")
+        else:
+            medium_risk_summary.append("未发现中风险问题")
+
+        # 生成改进建议
+        recommendations = []
+        if high_count > 0:
+            recommendations.append(f"优先处理 {high_count} 个高风险问题")
+        if medium_count > 0:
+            recommendations.append(f"建议处理 {medium_count} 个中风险问题")
+        if not recommendations:
+            recommendations.append("文档整体质量良好,继续保持")
+
+        # 添加技术提示
+        if is_emergency:
+            recommendations.append("【注意】报告摘要生成服务暂时不可用,以上为自动生成的简要统计")
+
+        # 生成风险预警
+        risk_alerts = []
+        if high_count > 5:
+            risk_alerts.append(f"⚠️ 高风险问题较多({high_count}个),建议立即整改")
+        elif high_count > 0:
+            risk_alerts.append(f"⚠️ 发现 {high_count} 个高风险问题,需要关注")
+        if medium_count > 10:
+            risk_alerts.append(f"⚠️ 中风险问题较多({medium_count}个),建议尽快处理")
+
+        logger.info(f"降级摘要生成完成 - 高风险:{high_count}, 中风险:{medium_count}")
+
+        return {
+            "overall_assessment": overall,
+            "high_risk_summary": high_risk_summary,
+            "medium_risk_summary": medium_risk_summary,
+            "improvement_recommendations": recommendations,
+            "risk_alerts": risk_alerts if risk_alerts else ["无特别风险预警"]
+        }
+
     def _parse_llm_response(self, llm_response: str) -> Dict[str, Any]:
         """
         解析 LLM 响应
@@ -416,4 +543,11 @@ class ReportGenerator:
             }
         except Exception as e:
             logger.error(f"解析 LLM 响应时发生未知错误:{str(e)}", exc_info=True)
-            raise
+            # 返回默认结构,不让解析错误导致整个任务失败
+            return {
+                "overall_assessment": "报告解析失败,请查看详细问题列表",
+                "high_risk_summary": [],
+                "medium_risk_summary": [],
+                "improvement_recommendations": ["请重试生成报告以获取完整摘要"],
+                "risk_alerts": ["报告生成过程中发生错误"]
+            }

+ 10 - 10
core/construction_review/component/reviewers/catalogues_check/catalogues_check.py

@@ -268,7 +268,7 @@ class CatalogCheckProcessor:
             label = catalog.get('chapter_classification', '')
 
             # 查找规范要求
-            if label not in specifications:
+            if label not in specifications and label != 'catalogue':
                 logger.warning(f"未找到标签 '{label}' 的规范要求")
                 results.append({
                     'index': catalog['index'],
@@ -554,10 +554,10 @@ def process_catalog_review_list(catogues_df: pd.DataFrame) -> List[Dict[str, Any
         if isinstance(missing_items_list, list) and len(missing_items_list) > 0:
             for missing_item in missing_items_list:
                 catalog_item = {
-                    "issue_point": f"{missing_item}缺失",
+                    "issue_point": f"目录中缺失'{missing_item}'",
                     "location": title if title else chapter_label,
-                    "suggestion": f"目录缺失:目录中缺失'{missing_item}'这个小节;当前章节仅涉及'{title if title else chapter_label}',目录中未体现'{missing_item}'相关内容;整改建议:建议在目录中补充'{missing_item}'相关内容,确保目录完整性。",
-                    "reason": f"该章节应具备要点:{specification_items_text}" if specification_items_text else "",
+                    "suggestion": f"【目录不完整】请在'{title if title else chapter_label}'章节的目录中补充'{missing_item}'小节。",
+                    "reason": f"根据规范要求,该章节应包含:{specification_items_text}" if specification_items_text else "",
                     "risk_level": "高风险",
                     "reference_source": '《桥梁公司危险性较大工程管理实施细则(2025版)》',
                 }
@@ -567,10 +567,10 @@ def process_catalog_review_list(catogues_df: pd.DataFrame) -> List[Dict[str, Any
         if isinstance(common_elements_list, list) and len(common_elements_list) > 0:
             for common_element in common_elements_list:
                 common_item = {
-                    "issue_point": f"{common_element}缺失",
+                    "issue_point": f"缺少'{common_element}'内容",
                     "location": title if title else chapter_label,
-                    "suggestion": f"目录与大纲都缺失:目录和大纲中都缺失'{common_element}'这个小节;当前章节仅涉及'{title if title else chapter_label}',目录和大纲均未涵盖'{common_element}'相关内容;整改建议:建议在本章或前序章节中增设'{common_element}'相关内容,确保目录和大纲都包含该小节。",
-                    "reason": f"该章节应具备要点:{specification_items_text}" if specification_items_text else "",
+                    "suggestion": f"【内容缺失】'{title if title else chapter_label}'章节缺少'{common_element}'内容,请在正文中补充并在目录和大纲中体现。",
+                    "reason": f"根据规范要求,该章节应包含:{specification_items_text}" if specification_items_text else "",
                     "risk_level": "高风险",
                     "reference_source": '《桥梁公司危险性较大工程管理实施细则(2025版)》',
                 }
@@ -580,10 +580,10 @@ def process_catalog_review_list(catogues_df: pd.DataFrame) -> List[Dict[str, Any
         if isinstance(miss_outline_list, list) and len(miss_outline_list) > 0:
             for miss_outline in miss_outline_list:
                 outline_item = {
-                    "issue_point": f"{miss_outline}缺失",
+                    "issue_point": f"大纲中缺失'{miss_outline}'",
                     "location": title if title else chapter_label,
-                    "suggestion": f"大纲缺失:大纲中缺失'{miss_outline}'这个小节;当前章节仅涉及'{title if title else chapter_label}',大纲中未涵盖'{miss_outline}'相关内容;整改建议:建议在大纲中补充'{miss_outline}'相关内容,确保大纲完整性。",
-                    "reason": f"该章节应具备要点:{specification_items_text}" if specification_items_text else "",
+                    "suggestion": f"【大纲不完整】请在'{title if title else chapter_label}'章节的大纲中补充'{miss_outline}'小节。",
+                    "reason": f"根据规范要求,该章节应包含:{specification_items_text}" if specification_items_text else "",
                     "risk_level": "高风险",
                     "reference_source": '《桥梁公司危险性较大工程管理实施细则(2025版)》',
                 }

+ 182 - 0
core/construction_review/component/reviewers/check_completeness/__init__.py

@@ -0,0 +1,182 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+完整性审查模块
+
+提供三种实现方式:
+
+1. 【推荐】轻量级三级审查 - 基于分类结果,无LLM,快速
+   from core.construction_review.component.reviewers.check_completeness import (
+       LightweightCompletenessChecker,
+       check_completeness_lightweight,
+       result_to_dict_lightweight
+   )
+   特点:
+   - 目录审查到二级
+   - 完整性审查到三级(基于分类结果,无需LLM)
+   - 大纲审查到二级
+
+2. 三级审查实现(LLM版)- 使用LLM判断内容质量
+   from core.construction_review.component.reviewers.check_completeness import (
+       TertiaryCompletenessChecker,
+       check_completeness_tertiary,
+       tertiary_result_to_dict
+   )
+   特点:
+   - 三级粒度审查
+   - 使用LLM判断内容是否满足关注点要求
+   - 调用成本高,适合精细审查
+
+3. 二级审查实现(原有)- 基于二级分类结果
+   from core.construction_review.component.reviewers.check_completeness import (
+       CompletenessChecker,
+       check_completeness,
+       result_to_dict
+   )
+
+升级建议:
+- 【推荐】新项目使用轻量级审查(LightweightCompletenessChecker)
+  - 速度快,无LLM调用成本
+  - 完全依赖分类器结果
+  - 适合大批量文档快速筛查
+
+- 如需内容质量判断,使用三级审查(TertiaryCompletenessChecker)
+  - 使用LLM判断内容质量
+  - 适合关键文档深度审查
+
+- 旧项目可逐步迁移到轻量级审查
+"""
+
+# ============================================================================
+# 轻量级三级审查实现(推荐)
+# ============================================================================
+
+from .lightweight_completeness_checker import (
+    # 核心类
+    LightweightCompletenessChecker,
+    TertiarySpecLoader as LightweightSpecLoader,
+    TertiaryItem,
+    SecondaryItem,
+    LightweightCompletenessResult,
+
+    # 便捷函数
+    check_completeness_lightweight,
+    result_to_dict as result_to_dict_lightweight,
+)
+
+# ============================================================================
+# 三级审查实现(LLM版)
+# ============================================================================
+
+from .tertiary_completeness_checker import (
+    # 核心类
+    TertiaryCompletenessChecker,
+    TertiarySpecLoader,
+    SimpleLLMClient,
+
+    # 数据模型
+    TertiarySpecItem,
+    Issue,
+    CompletenessResult as TertiaryCompletenessResult,
+
+    # 便捷函数
+    check_completeness_tertiary,
+    result_to_dict as tertiary_result_to_dict,
+)
+
+# ============================================================================
+# 二级审查实现(原有,兼容)
+# ============================================================================
+
+from .completeness_checker import (
+    # 核心类
+    CompletenessChecker,
+    SpecLoader,
+    SimpleLLMClient as SimpleLLMClientV1,  # 别名避免冲突
+
+    # 数据模型
+    SpecItem,
+    Issue as IssueV1,  # 别名避免冲突
+    CompletenessResult as CompletenessResultV1,  # 别名避免冲突
+
+    # 便捷函数
+    check_completeness,
+    result_to_dict,
+)
+
+from .adapter import (
+    # 适配器函数
+    check_completeness_adapter,
+    check_completeness_simple,
+    patch_ai_review_engine,
+    unpatch_ai_review_engine,
+)
+
+# ============================================================================
+# 原有实现(兼容)
+# ============================================================================
+
+from .components.data_loader import CSVDataLoader
+from .components.prompt_builder import PromptBuilder
+from .components.llm_client import LLMClient
+from .components.result_processor import ResultProcessor
+from .components.review_pipeline import ReviewPipeline
+from .components.result_analyzer import ResultAnalyzer
+
+
+__all__ = [
+    # ==================== 轻量级三级审查(推荐)====================
+    # 核心类
+    'LightweightCompletenessChecker',
+    'LightweightSpecLoader',
+    'TertiaryItem',
+    'SecondaryItem',
+    'LightweightCompletenessResult',
+
+    # 便捷函数
+    'check_completeness_lightweight',
+    'result_to_dict_lightweight',
+
+    # ==================== 三级审查(LLM版)====================
+    # 核心类
+    'TertiaryCompletenessChecker',
+    'TertiarySpecLoader',
+
+    # 数据模型
+    'TertiarySpecItem',
+    'TertiaryCompletenessResult',
+
+    # 便捷函数
+    'check_completeness_tertiary',
+    'tertiary_result_to_dict',
+
+    # ==================== 二级审查(兼容)====================
+    # 核心类
+    'CompletenessChecker',
+    'SpecLoader',
+    'SimpleLLMClient',
+
+    # 数据模型
+    'SpecItem',
+    'Issue',
+    'CompletenessResultV1',
+
+    # 便捷函数
+    'check_completeness',
+    'result_to_dict',
+
+    # 适配器
+    'check_completeness_adapter',
+    'check_completeness_simple',
+    'patch_ai_review_engine',
+    'unpatch_ai_review_engine',
+
+    # 原有实现 - 组件
+    'CSVDataLoader',
+    'PromptBuilder',
+    'LLMClient',
+    'ResultProcessor',
+    'ReviewPipeline',
+    'ResultAnalyzer',
+]

+ 184 - 0
core/construction_review/component/reviewers/check_completeness/adapter.py

@@ -0,0 +1,184 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+完整性审查适配器
+
+提供与原有接口兼容的调用方式,内部使用简化版实现。
+
+使用方式:
+    # 原有调用方式
+    result = await ai_review_engine.check_completeness(trace_id_idx, review_content, state, stage_name)
+
+    # 新的替代调用方式
+    from core.construction_review.component.reviewers.completeness_checker.adapter import check_completeness_adapter
+
+    result = await check_completeness_adapter(trace_id_idx, review_content, state, stage_name)
+"""
+
+import time
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+from foundation.observability.logger.loggering import review_logger as logger
+
+from .completeness_checker import (
+    CompletenessChecker,
+    CompletenessResult,
+    result_to_dict
+)
+
+
+async def check_completeness_adapter(
+    trace_id_idx: str,
+    review_content: List[Dict[str, Any]],
+    state: Optional[Dict[str, Any]] = None,
+    stage_name: str = "",
+    llm_config: Optional[Dict[str, Any]] = None,
+    enable_llm_review: bool = True
+) -> Dict[str, Any]:
+    """
+    完整性审查适配器 - 兼容原有接口
+
+    Args:
+        trace_id_idx: 追踪ID索引
+        review_content: 审查内容,文档块列表(chunks格式)
+        state: 状态字典(可选,用于进度推送)
+        stage_name: 阶段名称
+        llm_config: LLM配置(可选)
+        enable_llm_review: 是否启用LLM审查
+
+    Returns:
+        Dict[str, Any]: 审查结果(与原有格式兼容)
+    """
+    start_time = time.time()
+
+    try:
+        # 获取规范文件路径
+        spec_csv_path = str(
+            Path(__file__).parent / "config" / "Construction_Plan_Content_Specification.csv"
+        )
+
+        # 创建审查器
+        checker = CompletenessChecker(
+            spec_csv_path=spec_csv_path,
+            llm_config=llm_config,
+            max_concurrent=20
+        )
+
+        # 从review_content提取outline(如果有)
+        outline = None
+        if state and "structured_content" in state:
+            outline = state["structured_content"].get("outline")
+
+        # 执行检查
+        result = await checker.check(
+            chunks=review_content,
+            outline=outline,
+            enable_llm_review=enable_llm_review
+        )
+
+        # 转换为兼容格式
+        result_dict = result_to_dict(result)
+
+        # 返回原有格式(元组)
+        return result_dict, trace_id_idx
+
+    except Exception as e:
+        execution_time = time.time() - start_time
+        error_msg = f"完整性审查失败: {str(e)}"
+        logger.error(error_msg, exc_info=True)
+
+        # 返回错误格式(与原有保持一致)
+        return {
+            'error': error_msg,
+            'exception': str(e),
+            'execution_time': execution_time
+        }
+
+
+async def check_completeness_simple(
+    chunks: List[Dict[str, Any]],
+    outline: Optional[Dict[str, Any]] = None,
+    llm_config: Optional[Dict[str, Any]] = None,
+    enable_llm_review: bool = True
+) -> CompletenessResult:
+    """
+    简化的完整性检查接口
+
+    Args:
+        chunks: 三级分类后的文档块列表
+        outline: 三级分类后的目录结构(可选)
+        llm_config: LLM配置(可选)
+        enable_llm_review: 是否启用LLM审查
+
+    Returns:
+        CompletenessResult: 完整性审查结果
+    """
+    spec_csv_path = str(
+        Path(__file__).parent / "config" / "Construction_Plan_Content_Specification.csv"
+    )
+
+    checker = CompletenessChecker(
+        spec_csv_path=spec_csv_path,
+        llm_config=llm_config,
+        max_concurrent=20
+    )
+
+    return await checker.check(chunks, outline, enable_llm_review)
+
+
+# ============================================================================
+# 用于替换 AIReviewEngine.check_completeness 的补丁
+# ============================================================================
+
+def patch_ai_review_engine():
+    """
+    补丁函数:替换 AIReviewEngine.check_completeness 方法
+
+    使用方式:
+        from core.construction_review.component.reviewers.completeness_checker.adapter import patch_ai_review_engine
+        patch_ai_review_engine()
+    """
+    try:
+        from core.construction_review.component import ai_review_engine
+
+        # 保存原始方法(如果需要回滚)
+        ai_review_engine._original_check_completeness = ai_review_engine.AIReviewEngine.check_completeness
+
+        # 替换为新方法
+        async def new_check_completeness(self, trace_id_idx, review_content, state, stage_name):
+            """使用简化版实现的完整性检查"""
+            # 从self获取LLM配置
+            llm_config = getattr(self, '_llm_config', None)
+
+            return await check_completeness_adapter(
+                trace_id_idx=trace_id_idx,
+                review_content=review_content,
+                state=state,
+                stage_name=stage_name,
+                llm_config=llm_config,
+                enable_llm_review=True
+            )
+
+        ai_review_engine.AIReviewEngine.check_completeness = new_check_completeness
+        logger.info("已替换 AIReviewEngine.check_completeness 为简化版实现")
+
+    except ImportError as e:
+        logger.warning(f"无法应用补丁: {e}")
+
+
+def unpatch_ai_review_engine():
+    """
+    回滚补丁:恢复原始的 AIReviewEngine.check_completeness 方法
+    """
+    try:
+        from core.construction_review.component import ai_review_engine
+
+        if hasattr(ai_review_engine, '_original_check_completeness'):
+            ai_review_engine.AIReviewEngine.check_completeness = ai_review_engine._original_check_completeness
+            delattr(ai_review_engine, '_original_check_completeness')
+            logger.info("已恢复原始 AIReviewEngine.check_completeness 方法")
+
+    except ImportError as e:
+        logger.warning(f"无法回滚补丁: {e}")

+ 838 - 0
core/construction_review/component/reviewers/check_completeness/completeness_checker.py

@@ -0,0 +1,838 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+简化版完整性审查模块
+
+基于三级分类结果直接实现完整性审查,替代原有的复杂调用链路。
+
+主要功能:
+1. 章节完整性检查(检查目录结构是否完整)
+2. 内容要点完整性检查(检查文档内容是否包含规范要求的要点)
+
+简化改进:
+- 统一入口,一个类处理所有完整性检查
+- 直接使用三级分类结果(chunks + outline)
+- 去除Redis中间存储依赖
+- 合并审查逻辑,减少代码量约75%
+"""
+
+import asyncio
+import json
+import re
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Set, Tuple
+
+import aiohttp
+
+from foundation.observability.logger.loggering import review_logger as logger
+
+
+# ============================================================================
+# 数据模型定义
+# ============================================================================
+
+@dataclass
+class SpecItem:
+    """规范项模型"""
+    tag: str                    # 标签(章节代码)
+    primary_dir: str            # 一级目录
+    secondary_dir: str          # 二级目录
+    requirement: str            # 内容要求
+    point_count: int            # 要点数量
+
+
+@dataclass
+class Issue:
+    """问题模型"""
+    issue_point: str            # 问题点
+    location: str               # 位置
+    suggestion: str             # 建议
+    reason: str                 # 原因
+    risk_level: str             # 风险等级
+    reference_source: str       # 参考来源
+
+
+@dataclass
+class CompletenessResult:
+    """完整性审查结果"""
+    success: bool
+    issues: List[Issue]
+    chapter_issues: List[Issue] = field(default_factory=list)   # 章节缺失问题
+    content_issues: List[Issue] = field(default_factory=list)   # 内容要点缺失问题
+    summary: Dict[str, Any] = field(default_factory=dict)
+    execution_time: float = 0.0
+    error_message: Optional[str] = None
+
+
+# ============================================================================
+# 规范加载器
+# ============================================================================
+
+class SpecLoader:
+    """规范加载器 - 加载施工方案内容规范"""
+
+    # 章节标签到中文名称的映射
+    TAG_NAME_MAP = {
+        "basis": "编制依据",
+        "overview": "工程概况",
+        "plan": "施工计划",
+        "technology": "施工工艺技术",
+        "safety": "安全保证措施",
+        "quality": "质量保证措施",
+        "environment": "环境保证措施",
+        "management": "施工管理及作业人员配备与分工",
+        "acceptance": "验收要求",
+        "other": "其他资料",
+        "catalogue": "目录"
+    }
+
+    def __init__(self, csv_path: str):
+        """
+        初始化规范加载器
+
+        Args:
+            csv_path: 规范CSV文件路径(制表符分隔)
+        """
+        self.csv_path = csv_path
+        self._cache: Dict[str, List[SpecItem]] = {}
+        self._all_specs: Dict[str, List[SpecItem]] = {}
+        self._loaded = False
+
+    def _load(self) -> None:
+        """加载规范文件"""
+        if self._loaded:
+            return
+
+        # 尝试不同编码
+        encodings = ['utf-8-sig', 'utf-16', 'gbk', 'gb18030']
+        content = None
+
+        for encoding in encodings:
+            try:
+                with open(self.csv_path, 'r', encoding=encoding) as f:
+                    content = f.read()
+                break
+            except UnicodeDecodeError:
+                continue
+
+        if content is None:
+            raise ValueError(f"无法读取规范文件: {self.csv_path}")
+
+        # 解析CSV(制表符分隔)
+        lines = content.strip().split('\n')
+        if not lines:
+            return
+
+        # 跳过标题行
+        for line in lines[1:]:
+            parts = line.split('\t')
+            if len(parts) >= 5:
+                tag = parts[0].strip()
+                primary_dir = parts[1].strip()
+                secondary_dir = parts[2].strip()
+                requirement = parts[3].strip()
+                point_count_str = parts[4].strip() if len(parts) > 4 else "0"
+
+                try:
+                    point_count = int(point_count_str)
+                except ValueError:
+                    point_count = 0
+
+                spec_item = SpecItem(
+                    tag=tag,
+                    primary_dir=primary_dir,
+                    secondary_dir=secondary_dir,
+                    requirement=requirement,
+                    point_count=point_count
+                )
+
+                if tag not in self._all_specs:
+                    self._all_specs[tag] = []
+                self._all_specs[tag].append(spec_item)
+
+        self._loaded = True
+        logger.info(f"规范加载完成,共 {len(self._all_specs)} 个章节分类")
+
+    def get_requirements(self, chapter_code: str) -> List[SpecItem]:
+        """
+        获取指定章节的规范要求
+
+        Args:
+            chapter_code: 章节代码(如 "basis", "overview")
+
+        Returns:
+            List[SpecItem]: 规范项列表
+        """
+        self._load()
+        return self._all_specs.get(chapter_code, [])
+
+    def get_all_specs(self) -> Dict[str, List[SpecItem]]:
+        """获取所有规范"""
+        self._load()
+        return self._all_specs
+
+    def get_chapter_name(self, tag: str) -> str:
+        """获取章节中文名称"""
+        return self.TAG_NAME_MAP.get(tag, tag)
+
+
+# ============================================================================
+# LLM客户端
+# ============================================================================
+
+class SimpleLLMClient:
+    """简化的LLM客户端"""
+
+    def __init__(self, config: Dict[str, Any]):
+        """
+        初始化LLM客户端
+
+        Args:
+            config: 配置字典,包含 server_url, model_id, api_key, timeout 等
+        """
+        self.server_url = config.get('server_url', '')
+        self.model_id = config.get('model_id', '')
+        self.api_key = config.get('api_key', '')
+        self.timeout = config.get('timeout', 30)
+        self.max_retries = config.get('max_retries', 2)
+        self.temperature = config.get('temperature', 0.3)
+        self.max_tokens = config.get('max_tokens', 1024)
+
+    async def call(self, system_prompt: str, user_prompt: str) -> str:
+        """
+        调用LLM API
+
+        Args:
+            system_prompt: 系统提示词
+            user_prompt: 用户提示词
+
+        Returns:
+            str: LLM返回的文本
+        """
+        headers = {
+            'Content-Type': 'application/json',
+            'Authorization': f'Bearer {self.api_key}'
+        }
+
+        messages = []
+        if system_prompt:
+            messages.append({'role': 'system', 'content': system_prompt})
+        messages.append({'role': 'user', 'content': user_prompt})
+
+        payload = {
+            'model': self.model_id,
+            'messages': messages,
+            'temperature': self.temperature,
+            'max_tokens': self.max_tokens
+        }
+
+        url = f"{self.server_url.rstrip('/')}/chat/completions"
+
+        for attempt in range(self.max_retries + 1):
+            try:
+                timeout = aiohttp.ClientTimeout(total=self.timeout)
+                async with aiohttp.ClientSession(timeout=timeout) as session:
+                    async with session.post(url, headers=headers, json=payload) as response:
+                        if response.status == 200:
+                            result = await response.json()
+                            return result.get('choices', [{}])[0].get('message', {}).get('content', '')
+                        else:
+                            error_text = await response.text()
+                            if attempt < self.max_retries:
+                                await asyncio.sleep(1 * (attempt + 1))
+                                continue
+                            raise Exception(f"API调用失败: {response.status}, {error_text}")
+            except asyncio.TimeoutError:
+                if attempt < self.max_retries:
+                    await asyncio.sleep(1 * (attempt + 1))
+                    continue
+                raise Exception(f"API调用超时: {self.timeout}秒")
+            except Exception as e:
+                if attempt < self.max_retries:
+                    await asyncio.sleep(1 * (attempt + 1))
+                    continue
+                raise e
+
+        raise Exception("LLM调用失败,已重试所有次数")
+
+
+# ============================================================================
+# 完整性审查器
+# ============================================================================
+
+class CompletenessChecker:
+    """
+    统一完整性审查器
+
+    基于三级分类结果直接实现:
+    1. 章节完整性检查(无需LLM)
+    2. 内容要点完整性检查(需要LLM)
+    """
+
+    # 提示词模板
+    SYSTEM_PROMPT = """你是一名工程与施工领域的专业文档审查专家,负责审查施工方案文档的内容完整性。
+- 仔细分析待审查文本内容,识别文本中实际包含的审查要点;
+- 对于每个二级目录,检查文本中包含了哪些编号的要点,将这些编号记录在列表中;
+- 如果某个二级目录的要点一个都没有包含,则返回空列表[];
+- 判断要严格但合理,如果文本内容能够满足要点的核心要求,即使表述方式不同,也应判定为已包含;
+- 每个二级目录的要点编号必须严格在给定范围内,严禁编造超出范围的编号;
+- 只输出JSON格式,不要添加任何解释性文字;
+- /no_think"""
+
+    USER_PROMPT_TEMPLATE = """任务:审查施工方案文档内容,识别文本中实际包含的审查要点。
+
+待审查文本内容:
+{content}
+
+审查要点要求:
+{requirements}
+
+输出格式:必须严格按照以下JSON格式输出审查结果:
+{
+  "二级目录名称1": [要点编号列表,如: [1, 2]],
+  "二级目录名称2": [要点编号列表,如: [1]],
+  "二级目录名称3": []
+}
+
+说明:
+- JSON对象的字段名必须是二级目录名称;
+- 每个字段的值是一个整数数组,表示文本中包含的要点编号;
+- 如果某个二级目录的要点一个都没有包含,该字段的值应为空数组[];
+- 只输出JSON对象,不要添加任何解释性文字。"""
+
+    REFERENCE_SOURCE = "《桥梁公司危险性较大工程管理实施细则(2025版)》"
+
+    def __init__(
+        self,
+        spec_csv_path: str,
+        llm_config: Optional[Dict[str, Any]] = None,
+        max_concurrent: int = 20
+    ):
+        """
+        初始化完整性审查器
+
+        Args:
+            spec_csv_path: 规范CSV文件路径
+            llm_config: LLM配置(可选,不传则跳过LLM审查)
+            max_concurrent: 最大并发数
+        """
+        self.spec_loader = SpecLoader(spec_csv_path)
+        self.llm_client = SimpleLLMClient(llm_config) if llm_config else None
+        self.max_concurrent = max_concurrent
+        self.semaphore = asyncio.Semaphore(max_concurrent)
+
+    async def check(
+        self,
+        chunks: List[Dict[str, Any]],
+        outline: Optional[Dict[str, Any]] = None,
+        enable_llm_review: bool = True
+    ) -> CompletenessResult:
+        """
+        执行完整性检查
+
+        Args:
+            chunks: 三级分类后的文档块列表
+            outline: 三级分类后的目录结构(可选)
+            enable_llm_review: 是否启用LLM审查(默认True)
+
+        Returns:
+            CompletenessResult: 完整性审查结果
+        """
+        start_time = time.time()
+        chapter_issues = []
+        content_issues = []
+
+        try:
+            # 1. 章节完整性检查(无需LLM)
+            chapter_issues = self._check_chapters(chunks, outline)
+            logger.info(f"章节完整性检查完成,发现 {len(chapter_issues)} 个问题")
+
+            # 2. 内容要点完整性检查(需要LLM)
+            if enable_llm_review and self.llm_client:
+                content_issues = await self._check_content_points(chunks)
+                logger.info(f"内容要点完整性检查完成,发现 {len(content_issues)} 个问题")
+
+            # 3. 合并问题列表
+            all_issues = chapter_issues + content_issues
+
+            # 4. 构建汇总信息
+            summary = {
+                "total_issues": len(all_issues),
+                "chapter_issues": len(chapter_issues),
+                "content_issues": len(content_issues),
+                "high_risk": sum(1 for i in all_issues if i.risk_level == "高风险"),
+                "medium_risk": sum(1 for i in all_issues if i.risk_level == "中风险"),
+                "low_risk": sum(1 for i in all_issues if i.risk_level == "低风险")
+            }
+
+            execution_time = time.time() - start_time
+            logger.info(f"完整性审查完成,共发现 {len(all_issues)} 个问题,耗时 {execution_time:.2f}秒")
+
+            return CompletenessResult(
+                success=True,
+                issues=all_issues,
+                chapter_issues=chapter_issues,
+                content_issues=content_issues,
+                summary=summary,
+                execution_time=execution_time
+            )
+
+        except Exception as e:
+            execution_time = time.time() - start_time
+            logger.error(f"完整性审查失败: {str(e)}", exc_info=True)
+            return CompletenessResult(
+                success=False,
+                issues=[],
+                summary={},
+                execution_time=execution_time,
+                error_message=str(e)
+            )
+
+    def _check_chapters(
+        self,
+        chunks: List[Dict[str, Any]],
+        outline: Optional[Dict[str, Any]] = None
+    ) -> List[Issue]:
+        """
+        章节完整性检查 - 检查目录结构是否完整(无需LLM)
+
+        Args:
+            chunks: 文档块列表
+            outline: 目录结构
+
+        Returns:
+            List[Issue]: 章节缺失问题列表
+        """
+        issues = []
+        all_specs = self.spec_loader.get_all_specs()
+
+        # 获取实际存在的章节
+        actual_chapters = set()
+        actual_sections: Dict[str, Set[str]] = {}  # chapter_code -> set of section titles
+
+        for chunk in chunks:
+            chapter_code = chunk.get('chapter_classification', '')
+            section_label = chunk.get('section_label', '')
+
+            if chapter_code:
+                actual_chapters.add(chapter_code)
+
+                if chapter_code not in actual_sections:
+                    actual_sections[chapter_code] = set()
+
+                # 提取二级目录名称
+                if '->' in section_label:
+                    parts = section_label.split('->')
+                    if len(parts) >= 2:
+                        actual_sections[chapter_code].add(parts[1].strip())
+
+        # 从outline提取章节信息(如果有)
+        if outline and 'chapters' in outline:
+            for chapter in outline['chapters']:
+                chapter_code = chapter.get('chapter_classification', '')
+                if chapter_code:
+                    actual_chapters.add(chapter_code)
+
+                    if chapter_code not in actual_sections:
+                        actual_sections[chapter_code] = set()
+
+                    # 提取subsections
+                    for subsection in chapter.get('subsections', []):
+                        title = subsection.get('title', '')
+                        if title:
+                            actual_sections[chapter_code].add(title)
+
+        # 对比规范要求
+        for chapter_code, spec_items in all_specs.items():
+            if not spec_items:
+                continue
+
+            chapter_name = self.spec_loader.get_chapter_name(chapter_code)
+
+            # 检查一级章节是否存在
+            if chapter_code not in actual_chapters:
+                issues.append(Issue(
+                    issue_point=f"缺少章节:{chapter_name}",
+                    location="全文",
+                    suggestion=f"建议添加【{chapter_name}】章节",
+                    reason=f"根据规范要求,施工方案应包含【{chapter_name}】章节",
+                    risk_level="高风险",
+                    reference_source=self.REFERENCE_SOURCE
+                ))
+                continue
+
+            # 检查二级目录是否存在
+            actual_section_set = actual_sections.get(chapter_code, set())
+            required_sections = set(item.secondary_dir for item in spec_items)
+
+            missing_sections = required_sections - actual_section_set
+
+            for missing_section in missing_sections:
+                # 查找该小节的规范要求
+                section_requirement = ""
+                for item in spec_items:
+                    if item.secondary_dir == missing_section:
+                        section_requirement = item.requirement
+                        break
+                
+                issues.append(Issue(
+                    issue_point=f"【章节缺失】{chapter_name}缺少'{missing_section}'小节",
+                    location=chapter_name,
+                    suggestion=f"请在{chapter_name}中补充'{missing_section}'内容" + (f",应包含:{section_requirement}" if section_requirement else ""),
+                    reason=f"根据《桥梁公司危险性较大工程管理实施细则(2025版)》,{chapter_name}应包含'{missing_section}'" + (f",具体要求:{section_requirement}" if section_requirement else ""),
+                    risk_level="高风险",
+                    reference_source=self.REFERENCE_SOURCE
+                ))
+
+        return issues
+
+    async def _check_content_points(self, chunks: List[Dict[str, Any]]) -> List[Issue]:
+        """
+        内容要点完整性检查 - 检查文档内容是否包含规范要求的要点(需要LLM)
+
+        Args:
+            chunks: 文档块列表
+
+        Returns:
+            List[Issue]: 内容要点缺失问题列表
+        """
+        issues = []
+
+        # 按章节分组并合并内容
+        chapter_contents = self._group_and_merge_by_chapter(chunks)
+
+        if not chapter_contents:
+            return issues
+
+        # 并发审查各章节
+        tasks = []
+        for chapter_code, content in chapter_contents.items():
+            tasks.append(self._check_single_chapter(chapter_code, content))
+
+        results = await asyncio.gather(*tasks, return_exceptions=True)
+
+        for result in results:
+            if isinstance(result, list):
+                issues.extend(result)
+            elif isinstance(result, Exception):
+                logger.error(f"章节审查失败: {str(result)}")
+
+        return issues
+
+    async def _check_single_chapter(
+        self,
+        chapter_code: str,
+        content: str
+    ) -> List[Issue]:
+        """
+        审查单个章节的内容要点
+
+        Args:
+            chapter_code: 章节代码
+            content: 合并后的章节内容
+
+        Returns:
+            List[Issue]: 该章节的缺失问题列表
+        """
+        async with self.semaphore:
+            try:
+                # 获取规范要求
+                spec_items = self.spec_loader.get_requirements(chapter_code)
+                if not spec_items:
+                    return []
+
+                chapter_name = self.spec_loader.get_chapter_name(chapter_code)
+
+                # 构建提示词
+                requirements_text = self._build_requirements_text(spec_items)
+                user_prompt = self.USER_PROMPT_TEMPLATE.format(
+                    content=content[:8000],  # 限制长度
+                    requirements=requirements_text
+                )
+
+                # 调用LLM
+                llm_response = await self.llm_client.call(self.SYSTEM_PROMPT, user_prompt)
+
+                # 解析结果
+                found_points = self._parse_llm_response(llm_response)
+
+                # 生成缺失问题
+                return self._generate_missing_issues(chapter_name, spec_items, found_points)
+
+            except Exception as e:
+                logger.error(f"章节 {chapter_code} 审查失败: {str(e)}")
+                return []
+
+    def _group_and_merge_by_chapter(self, chunks: List[Dict[str, Any]]) -> Dict[str, str]:
+        """
+        按章节分组并合并内容
+
+        Args:
+            chunks: 文档块列表
+
+        Returns:
+            Dict[str, str]: chapter_code -> 合并后的内容
+        """
+        chapter_contents: Dict[str, List[str]] = {}
+
+        for chunk in chunks:
+            chapter_code = chunk.get('chapter_classification', '')
+            content = chunk.get('content', '')
+
+            if chapter_code and content:
+                if chapter_code not in chapter_contents:
+                    chapter_contents[chapter_code] = []
+                chapter_contents[chapter_code].append(content)
+
+        # 合并内容
+        return {
+            code: "\n\n".join(contents)
+            for code, contents in chapter_contents.items()
+        }
+
+    def _build_requirements_text(self, spec_items: List[SpecItem]) -> str:
+        """构建审查要求文本"""
+        lines = []
+        for item in spec_items:
+            range_hint = f"(共有{item.point_count}个要点,编号1~{item.point_count})" if item.point_count > 0 else ""
+            lines.append(f"{item.secondary_dir}{range_hint}:\n{item.requirement}\n")
+        return "\n".join(lines)
+
+    def _parse_llm_response(self, response: str) -> Dict[str, List[int]]:
+        """
+        解析LLM返回结果
+
+        Args:
+            response: LLM返回的文本
+
+        Returns:
+            Dict[str, List[int]]: {二级目录名称: [要点编号列表]}
+        """
+        if not response:
+            return {}
+
+        # 提取JSON
+        json_str = self._extract_json(response)
+        if not json_str:
+            return {}
+
+        try:
+            result = json.loads(json_str)
+        except json.JSONDecodeError:
+            return {}
+
+        # 规范化结果
+        normalized = {}
+        for key, value in result.items():
+            if isinstance(value, list):
+                normalized[key] = [int(v) for v in value if isinstance(v, (int, str)) and str(v).isdigit()]
+            elif isinstance(value, (int, str)) and str(value).isdigit():
+                normalized[key] = [int(value)]
+            else:
+                normalized[key] = []
+
+        return normalized
+
+    def _extract_json(self, text: str) -> str:
+        """从文本中提取JSON字符串"""
+        text = text.strip()
+
+        # 尝试代码块中的JSON
+        code_block_pattern = r'```(?:json)?\s*(\{.*?\})\s*```'
+        matches = re.findall(code_block_pattern, text, re.DOTALL)
+        for match in matches:
+            try:
+                json.loads(match)
+                return match
+            except json.JSONDecodeError:
+                continue
+
+        # 尝试直接解析
+        try:
+            json.loads(text)
+            return text
+        except json.JSONDecodeError:
+            pass
+
+        # 从第一个 { 开始提取
+        start_idx = text.find('{')
+        if start_idx == -1:
+            return ""
+
+        brace_count = 0
+        in_string = False
+        escape_next = False
+
+        for i in range(start_idx, len(text)):
+            char = text[i]
+
+            if escape_next:
+                escape_next = False
+                continue
+
+            if char == '\\':
+                escape_next = True
+                continue
+
+            if char == '"':
+                in_string = not in_string
+                continue
+
+            if not in_string:
+                if char == '{':
+                    brace_count += 1
+                elif char == '}':
+                    brace_count -= 1
+                    if brace_count == 0:
+                        json_str = text[start_idx:i+1]
+                        try:
+                            json.loads(json_str)
+                            return json_str
+                        except json.JSONDecodeError:
+                            pass
+
+        return ""
+
+    def _generate_missing_issues(
+        self,
+        chapter_name: str,
+        spec_items: List[SpecItem],
+        found_points: Dict[str, List[int]]
+    ) -> List[Issue]:
+        """
+        生成缺失问题列表
+
+        Args:
+            chapter_name: 章节名称
+            spec_items: 规范项列表
+            found_points: LLM识别出的要点
+
+        Returns:
+            List[Issue]: 缺失问题列表
+        """
+        issues = []
+
+        for item in spec_items:
+            found = found_points.get(item.secondary_dir, [])
+
+            if item.point_count > 0:
+                missing = [i for i in range(1, item.point_count + 1) if i not in found]
+            else:
+                missing = []
+
+            if missing:
+                # 计算风险等级
+                missing_count = len(missing)
+                if missing_count >= 3:
+                    risk_level = "高风险"
+                elif missing_count == 2:
+                    risk_level = "中风险"
+                else:
+                    risk_level = "低风险"
+
+                # 获取缺失要点的具体内容
+                requirement_parts = item.requirement.split(':')[-1].split(';')
+                missing_content = []
+                for idx in missing:
+                    if 0 < idx <= len(requirement_parts):
+                        missing_content.append(f"{idx}.{requirement_parts[idx-1]}")
+                missing_content_text = ';'.join(missing_content)
+
+                # 问题描述
+                issue_point = f"【内容不完整】{chapter_name}的'{item.secondary_dir}'部分缺少{missing_count}个要点"
+
+                # 建议:指明缺失第几点
+                suggestion = f"请补充'{item.secondary_dir}'的第{','.join(map(str, missing))}点内容:{missing_content_text}"
+
+                # 原因:包含具体条款依据和缺失内容
+                reason = f"根据《桥梁公司危险性较大工程管理实施细则(2025版)》,{chapter_name}的'{item.secondary_dir}'应包含:{item.requirement}。当前缺失:{missing_content_text}"
+
+                issues.append(Issue(
+                    issue_point=issue_point,
+                    location=chapter_name,
+                    suggestion=suggestion,
+                    reason=reason,
+                    risk_level=risk_level,
+                    reference_source=self.REFERENCE_SOURCE
+                ))
+
+        return issues
+
+
+# ============================================================================
+# 便捷函数
+# ============================================================================
+
+async def check_completeness(
+    chunks: List[Dict[str, Any]],
+    outline: Optional[Dict[str, Any]] = None,
+    spec_csv_path: Optional[str] = None,
+    llm_config: Optional[Dict[str, Any]] = None,
+    enable_llm_review: bool = True
+) -> CompletenessResult:
+    """
+    便捷函数:执行完整性检查
+
+    Args:
+        chunks: 三级分类后的文档块列表
+        outline: 三级分类后的目录结构(可选)
+        spec_csv_path: 规范CSV文件路径(可选,使用默认路径)
+        llm_config: LLM配置(可选)
+        enable_llm_review: 是否启用LLM审查
+
+    Returns:
+        CompletenessResult: 完整性审查结果
+    """
+    # 使用默认路径
+    if spec_csv_path is None:
+        spec_csv_path = str(
+            Path(__file__).parent / "config" / "Construction_Plan_Content_Specification.csv"
+        )
+
+    checker = CompletenessChecker(
+        spec_csv_path=spec_csv_path,
+        llm_config=llm_config,
+        max_concurrent=20
+    )
+
+    return await checker.check(chunks, outline, enable_llm_review)
+
+
+def result_to_dict(result: CompletenessResult) -> Dict[str, Any]:
+    """
+    将审查结果转换为字典格式(兼容原有接口)
+
+    Args:
+        result: 完整性审查结果
+
+    Returns:
+        Dict[str, Any]: 字典格式的结果
+    """
+    return {
+        "success": result.success,
+        "details": {
+            "name": "completeness_check",
+            "response": [
+                {
+                    "issue_point": issue.issue_point,
+                    "location": issue.location,
+                    "suggestion": issue.suggestion,
+                    "reason": issue.reason,
+                    "risk_level": issue.risk_level,
+                    "reference_source": issue.reference_source
+                }
+                for issue in result.issues
+            ],
+            "chapter_issues_count": len(result.chapter_issues),
+            "content_issues_count": len(result.content_issues),
+            "review_location_label": result.issues[-1].location if result.issues else "",
+            "chapter_code": "completeness",
+            "original_content": ""
+        },
+        "summary": result.summary,
+        "execution_time": result.execution_time,
+        "error_message": result.error_message
+    }

+ 1 - 1
core/construction_review/component/reviewers/check_completeness/components/data_loader.py

@@ -51,7 +51,7 @@ class CSVDataLoader(IDataLoader):
                     "内容要求": requirement,
                     "内容要点数量": point_count
                 })
-        
+        specification['catalogues'] = []
         return specification
     
     def load_documents(self, json_path: str) -> List[Dict[str, Any]]:

+ 14 - 6
core/construction_review/component/reviewers/check_completeness/components/result_analyzer.py

@@ -270,11 +270,19 @@ class ResultAnalyzer(IResultAnalyzer):
             #location = "; ".join(map(str, sources)) if sources else ""
 
             requirement_list = requirement.split(':')[-1].split(';')
-            requirement_text = ';'.join([requirement_list[i-1] for i in missing_points])
-            issue_point = (
-                f"{row.get('section_label', '')}下缺失{suorces_eum[row.get('标签', '')]}中的【{level2}】内容: {';'.join([str(index+1)+'.'+str(i.split('.')[-1]) for index, i in enumerate(requirement_list[:-1])])}" if requirement else "补充缺失要点内容"
-            )
-            suggestion = f"建议补充:{requirement_text}" if requirement else "补充缺失要点内容"
+            missing_count = len(missing_points)
+            section_label = row.get('section_label', '')
+            level2_name = suorces_eum.get(row.get('标签', ''), '')
+            
+            # 获取缺失要点的具体内容
+            missing_content_list = []
+            for idx in missing_points:
+                if 0 < idx <= len(requirement_list):
+                    missing_content_list.append(f"{idx}.{requirement_list[idx-1]}")
+            missing_content_text = ';'.join(missing_content_list)
+            
+            issue_point = f"【内容不完整】{section_label}的'{level2_name}'部分缺少{missing_count}个要点"
+            suggestion = f"请补充'{level2_name}'的第{','.join(map(str, missing_points))}点内容:{missing_content_text}"
             risk_level = self._map_risk_level(len(missing_points))
 
             # 构建问题项并添加到列表
@@ -282,7 +290,7 @@ class ResultAnalyzer(IResultAnalyzer):
                 "issue_point": issue_point,
                 "location": row.get("section_label", ""),
                 "suggestion": suggestion,
-                "reason": reason,
+                "reason": f"根据《桥梁公司危险性较大工程管理实施细则(2025版)》,{section_label}的'{level2_name}'应包含:{requirement}。当前缺失:{missing_content_text}",
                 "risk_level": risk_level,
                 "review_references": review_references,
                 "reference_source": reference_source

+ 1 - 0
core/construction_review/component/reviewers/check_completeness/components/review_pipeline.py

@@ -113,6 +113,7 @@ class ReviewPipeline(IReviewPipeline):
                 # 获取对应的规范要求
                 requirements = specification.get(chapter_classification, [])
                 if not requirements:
+                    print("111111")
                     return {
                         **doc,
                         'review_result': {

BIN
core/construction_review/component/reviewers/check_completeness/config/Construction_Plan_Content_Specification.csv


+ 551 - 0
core/construction_review/component/reviewers/check_completeness/lightweight_completeness_checker.py

@@ -0,0 +1,551 @@
+"""
+轻量级完整性审查模块
+
+特点:
+- 目录审查:二级粒度(检查二级章节是否齐全)
+- 完整性审查:三级粒度(基于分类结果,无LLM)
+- 大纲审查:二级粒度(检查二级章节一致性)
+
+完全依赖分类器输出的三级分类结果,无需LLM参与。
+"""
+
+import pandas as pd
+import asyncio
+from typing import Dict, List, Optional, Set, Tuple, Any
+from dataclasses import dataclass, field
+from collections import defaultdict
+from pathlib import Path
+
+
+@dataclass
+class TertiaryItem:
+    """三级分类项"""
+    first_code: str
+    second_code: str
+    third_code: str
+    first_cn: str
+    second_cn: str
+    third_cn: str
+    third_focus: str
+
+
+@dataclass
+class SecondaryItem:
+    """二级分类项"""
+    first_code: str
+    second_code: str
+    first_cn: str
+    second_cn: str
+
+
+@dataclass
+class LightweightCompletenessResult:
+    """轻量级完整性审查结果"""
+    overall_status: str  # complete / partial / incomplete
+    overall_score: int  # 0-100
+    
+    # 目录审查结果(二级)
+    catalogue_check: Dict[str, Any]
+    
+    # 完整性审查结果(三级)
+    tertiary_completeness: Dict[str, Any]
+    
+    # 大纲审查结果(二级)
+    outline_check: Optional[Dict[str, Any]]
+    
+    # 建议
+    recommendations: List[str] = field(default_factory=list)
+
+
+class TertiarySpecLoader:
+    """三级标准加载器"""
+    
+    # 一级分类名称映射
+    FIRST_NAMES = {
+        "basis": "编制依据",
+        "project_overview": "工程概况",
+        "construction": "施工部署",
+        "schedule": "施工进度计划",
+        "preparation": "施工准备与资源配置",
+        "method": "主要施工方法",
+        "safety": "安全管理",
+        "quality": "质量管理",
+        "environment": "环境管理",
+        "appendix": "附录"
+    }
+    
+    def __init__(self, csv_path: str):
+        self.csv_path = csv_path
+        self.tertiary_items: Dict[Tuple[str, str, str], TertiaryItem] = {}
+        self.secondary_items: Dict[Tuple[str, str], SecondaryItem] = {}
+        self._load()
+    
+    def _load(self) -> None:
+        """加载CSV文件"""
+        try:
+            # 尝试不同编码
+            encodings = ['utf-8-sig', 'utf-16', 'gbk', 'utf-8']
+            df = None
+            
+            for encoding in encodings:
+                try:
+                    df = pd.read_csv(self.csv_path, encoding=encoding, sep=None, engine='python')
+                    break
+                except UnicodeDecodeError:
+                    continue
+            
+            if df is None:
+                raise ValueError(f"无法读取CSV文件: {self.csv_path}")
+            
+            # 标准化列名
+            df.columns = [c.strip().lower().replace(' ', '_') for c in df.columns]
+            
+            # 遍历构建标准树
+            for _, row in df.iterrows():
+                first_code = str(row.get('first_contents_code', '')).strip()
+                second_code = str(row.get('second_contents_code', '')).strip()
+                third_code = str(row.get('third_contents_code', '')).strip()
+                
+                if not all([first_code, second_code, third_code]):
+                    continue
+                
+                first_cn = str(row.get('first_contents', '')).strip() or self.FIRST_NAMES.get(first_code, first_code)
+                second_cn = str(row.get('second_contents', '')).strip()
+                third_cn = str(row.get('third_contents', '')).strip()
+                third_focus = str(row.get('third_focus', '')).strip()
+                
+                # 存储三级项
+                key = (first_code, second_code, third_code)
+                self.tertiary_items[key] = TertiaryItem(
+                    first_code=first_code,
+                    second_code=second_code,
+                    third_code=third_code,
+                    first_cn=first_cn,
+                    second_cn=second_cn,
+                    third_cn=third_cn,
+                    third_focus=third_focus
+                )
+                
+                # 存储二级项
+                sec_key = (first_code, second_code)
+                if sec_key not in self.secondary_items:
+                    self.secondary_items[sec_key] = SecondaryItem(
+                        first_code=first_code,
+                        second_code=second_code,
+                        first_cn=first_cn,
+                        second_cn=second_cn
+                    )
+        
+        except Exception as e:
+            raise ValueError(f"加载标准文件失败: {e}")
+    
+    def get_tertiary_items(self) -> Dict[Tuple[str, str, str], TertiaryItem]:
+        """获取所有三级分类项"""
+        return self.tertiary_items
+    
+    def get_secondary_items(self) -> Dict[Tuple[str, str], SecondaryItem]:
+        """获取所有二级分类项"""
+        return self.secondary_items
+    
+    def get_tertiary_by_secondary(self, first_code: str, second_code: str) -> List[TertiaryItem]:
+        """获取指定二级分类下的所有三级分类"""
+        result = []
+        for key, item in self.tertiary_items.items():
+            if key[0] == first_code and key[1] == second_code:
+                result.append(item)
+        return result
+    
+    def get_secondary_names(self) -> Dict[Tuple[str, str], str]:
+        """获取二级分类名称映射"""
+        return {k: v.second_cn for k, v in self.secondary_items.items()}
+
+
+class LightweightCompletenessChecker:
+    """轻量级完整性检查器"""
+    
+    def __init__(self, standard_csv_path: str):
+        """
+        初始化检查器
+        
+        Args:
+            standard_csv_path: StandardCategoryTable.csv 文件路径
+        """
+        self.spec_loader = TertiarySpecLoader(standard_csv_path)
+        self.tertiary_specs = self.spec_loader.get_tertiary_items()
+        self.secondary_specs = self.spec_loader.get_secondary_items()
+        self.secondary_names = self.spec_loader.get_secondary_names()
+    
+    async def check(
+        self,
+        chunks: List[Dict],
+        outline: Optional[List[Dict]] = None
+    ) -> LightweightCompletenessResult:
+        """
+        执行轻量级完整性检查
+        
+        Args:
+            chunks: 文档分块列表,每个chunk需包含tertiary_category_code
+            outline: 目录结构(可选)
+        
+        Returns:
+            LightweightCompletenessResult
+        """
+        # 1. 提取实际存在的分类
+        actual_secondary = self._extract_secondary_from_chunks(chunks)
+        actual_tertiary = self._extract_tertiary_from_chunks(chunks)
+        
+        # 2. 目录审查(二级)
+        catalogue_result = self._check_catalogue(actual_secondary)
+        
+        # 3. 完整性审查(三级)- 核心
+        tertiary_result = self._check_tertiary_completeness(actual_tertiary)
+        
+        # 4. 大纲审查(二级)
+        outline_result = None
+        if outline:
+            outline_result = self._check_outline(actual_secondary, outline)
+        
+        # 5. 计算总体状态和评分
+        overall_status = self._calc_overall_status(tertiary_result)
+        overall_score = self._calc_overall_score(tertiary_result)
+        
+        # 6. 生成建议
+        recommendations = self._generate_recommendations(
+            tertiary_result, catalogue_result, outline_result
+        )
+        
+        return LightweightCompletenessResult(
+            overall_status=overall_status,
+            overall_score=overall_score,
+            catalogue_check=catalogue_result,
+            tertiary_completeness=tertiary_result,
+            outline_check=outline_result,
+            recommendations=recommendations
+        )
+    
+    def _extract_secondary_from_chunks(self, chunks: List[Dict]) -> Set[Tuple[str, str]]:
+        """从chunks提取实际存在的二级分类"""
+        actual = set()
+        for chunk in chunks:
+            cat1 = chunk.get("chapter_classification") or chunk.get("first_code")
+            cat2 = chunk.get("secondary_category_code") or chunk.get("second_code")
+            if cat1 and cat2:
+                actual.add((cat1, cat2))
+        return actual
+    
+    def _extract_tertiary_from_chunks(self, chunks: List[Dict]) -> Set[Tuple[str, str, str]]:
+        """从chunks提取实际存在的三级分类"""
+        actual = set()
+        for chunk in chunks:
+            cat1 = chunk.get("chapter_classification") or chunk.get("first_code")
+            cat2 = chunk.get("secondary_category_code") or chunk.get("second_code")
+            cat3 = chunk.get("tertiary_category_code") or chunk.get("third_code")
+            if cat1 and cat2 and cat3:
+                actual.add((cat1, cat2, cat3))
+        return actual
+    
+    def _check_catalogue(self, actual_secondary: Set[Tuple[str, str]]) -> Dict[str, Any]:
+        """
+        目录审查(二级粒度)
+        检查实际存在的二级分类与标准的差异
+        """
+        required = set(self.secondary_specs.keys())
+        actual = actual_secondary
+        
+        missing = required - actual
+        extra = actual - required
+        
+        # 构建缺失详情
+        missing_details = []
+        for cat1, cat2 in missing:
+            item = self.secondary_specs.get((cat1, cat2))
+            if item:
+                missing_details.append({
+                    "first_code": cat1,
+                    "first_name": item.first_cn,
+                    "secondary_code": cat2,
+                    "secondary_name": item.second_cn
+                })
+        
+        # 构建多余详情
+        extra_details = []
+        for cat1, cat2 in extra:
+            extra_details.append({
+                "first_code": cat1,
+                "first_name": TertiarySpecLoader.FIRST_NAMES.get(cat1, cat1),
+                "secondary_code": cat2,
+                "secondary_name": "未知"
+            })
+        
+        completeness_rate = len(actual & required) / len(required) * 100 if required else 0
+        
+        return {
+            "level": "secondary",
+            "is_complete": len(missing) == 0,
+            "total_required": len(required),
+            "actual_present": len(actual & required),
+            "missing_count": len(missing),
+            "extra_count": len(extra),
+            "completeness_rate": f"{completeness_rate:.1f}%",
+            "missing": missing_details,
+            "extra": extra_details
+        }
+    
+    def _check_tertiary_completeness(
+        self,
+        actual_tertiary: Set[Tuple[str, str, str]]
+    ) -> Dict[str, Any]:
+        """
+        三级完整性审查(核心方法,无LLM)
+        
+        逻辑:
+        1. 对比标准三级 vs 实际三级
+        2. 找出缺失的三级(无内容)
+        3. 按二级分组统计
+        """
+        required = set(self.tertiary_specs.keys())
+        actual = actual_tertiary
+        
+        present = required & actual
+        missing = required - actual
+        extra = actual - required
+        
+        # 缺失详情
+        missing_details = []
+        for cat1, cat2, cat3 in sorted(missing):
+            item = self.tertiary_specs.get((cat1, cat2, cat3))
+            if item:
+                missing_details.append({
+                    "first_code": cat1,
+                    "first_name": item.first_cn,
+                    "secondary_code": cat2,
+                    "secondary_name": item.second_cn,
+                    "tertiary_code": cat3,
+                    "tertiary_name": item.third_cn,
+                    "focus": item.third_focus
+                })
+        
+        # 按二级分组统计
+        secondary_stats = defaultdict(lambda: {
+            "total": 0, "present": 0, "missing": 0
+        })
+        
+        for cat1, cat2, cat3 in required:
+            key = (cat1, cat2)
+            secondary_stats[key]["total"] += 1
+            if (cat1, cat2, cat3) in present:
+                secondary_stats[key]["present"] += 1
+            else:
+                secondary_stats[key]["missing"] += 1
+        
+        # 计算完整率并构建统计列表
+        secondary_stats_list = []
+        for (cat1, cat2), stats in sorted(secondary_stats.items()):
+            item = self.secondary_specs.get((cat1, cat2))
+            completeness_rate = stats["present"] / stats["total"] * 100 if stats["total"] > 0 else 0
+            secondary_stats_list.append({
+                "first_code": cat1,
+                "first_name": item.first_cn if item else TertiarySpecLoader.FIRST_NAMES.get(cat1, cat1),
+                "secondary_code": cat2,
+                "secondary_name": item.second_cn if item else "未知",
+                "total_tertiary": stats["total"],
+                "present": stats["present"],
+                "missing": stats["missing"],
+                "completeness_rate": f"{completeness_rate:.1f}%"
+            })
+        
+        # 总体完整率
+        overall_rate = len(present) / len(required) * 100 if required else 0
+        
+        return {
+            "level": "tertiary",
+            "total": len(required),
+            "present": len(present),
+            "missing": len(missing),
+            "extra": len(extra),
+            "completeness_rate": f"{overall_rate:.1f}%",
+            "missing_details": missing_details,
+            "secondary_stats": secondary_stats_list
+        }
+    
+    def _check_outline(
+        self,
+        actual_secondary: Set[Tuple[str, str]],
+        outline: List[Dict]
+    ) -> Dict[str, Any]:
+        """
+        大纲审查(二级粒度)
+        对比目录结构与实际的二级分类
+        """
+        # 从outline提取目录中的二级
+        outline_secondary = set()
+        outline_secondary_details = {}
+        
+        for item in outline:
+            cat1 = item.get("chapter_classification")
+            cat2 = item.get("secondary_category_code")
+            if cat1 and cat2:
+                outline_secondary.add((cat1, cat2))
+                outline_secondary_details[(cat1, cat2)] = {
+                    "title": item.get("title", ""),
+                    "level": item.get("level", 0)
+                }
+        
+        # 空章节:目录有但内容无
+        empty_sections = []
+        for cat1, cat2 in outline_secondary - actual_secondary:
+            detail = outline_secondary_details.get((cat1, cat2), {})
+            item = self.secondary_specs.get((cat1, cat2))
+            empty_sections.append({
+                "first_code": cat1,
+                "first_name": item.first_cn if item else TertiarySpecLoader.FIRST_NAMES.get(cat1, cat1),
+                "secondary_code": cat2,
+                "secondary_name": item.second_cn if item else "未知",
+                "title": detail.get("title", "")
+            })
+        
+        # 未归类内容:内容有但目录无
+        unclassified_content = []
+        for cat1, cat2 in actual_secondary - outline_secondary:
+            item = self.secondary_specs.get((cat1, cat2))
+            unclassified_content.append({
+                "first_code": cat1,
+                "first_name": item.first_cn if item else TertiarySpecLoader.FIRST_NAMES.get(cat1, cat1),
+                "secondary_code": cat2,
+                "secondary_name": item.second_cn if item else "未知"
+            })
+        
+        # 匹配率
+        matched = outline_secondary & actual_secondary
+        match_rate = len(matched) / len(outline_secondary) * 100 if outline_secondary else 0
+        
+        return {
+            "level": "secondary",
+            "is_consistent": len(empty_sections) == 0 and len(unclassified_content) == 0,
+            "outline_secondary_count": len(outline_secondary),
+            "content_secondary_count": len(actual_secondary),
+            "matched_count": len(matched),
+            "match_rate": f"{match_rate:.1f}%",
+            "empty_sections": empty_sections,
+            "unclassified_content": unclassified_content
+        }
+    
+    def _calc_overall_status(self, tertiary_result: Dict) -> str:
+        """计算总体状态"""
+        rate_str = tertiary_result.get("completeness_rate", "0%").rstrip("%")
+        try:
+            rate = float(rate_str)
+        except:
+            rate = 0
+        
+        if rate >= 95:
+            return "complete"
+        elif rate >= 70:
+            return "partial"
+        else:
+            return "incomplete"
+    
+    def _calc_overall_score(self, tertiary_result: Dict) -> int:
+        """计算总体评分"""
+        rate_str = tertiary_result.get("completeness_rate", "0%").rstrip("%")
+        try:
+            rate = float(rate_str)
+        except:
+            rate = 0
+        return int(rate)
+    
+    def _generate_recommendations(
+        self,
+        tertiary_result: Dict,
+        catalogue_result: Dict,
+        outline_result: Optional[Dict]
+    ) -> List[str]:
+        """生成改进建议"""
+        recommendations = []
+        
+        # 基于三级完整性生成建议
+        missing = tertiary_result.get("missing_details", [])
+        if missing:
+            # 按二级分组统计缺失
+            sec_missing = defaultdict(list)
+            for item in missing:
+                key = (item["first_code"], item["secondary_code"])
+                sec_missing[key].append(item)
+            
+            # 找出缺失最严重的二级
+            sorted_sec = sorted(sec_missing.items(), key=lambda x: len(x[1]), reverse=True)
+            
+            for (cat1, cat2), items in sorted_sec[:3]:
+                if items:
+                    sec_name = items[0]["first_name"] + " > " + items[0]["secondary_name"]
+                    missing_names = [i["tertiary_name"] for i in items[:3]]
+                    if len(items) > 3:
+                        missing_names.append(f"等{len(items)}项")
+                    recommendations.append(
+                        f"【{sec_name}】缺少以下内容:{', '.join(missing_names)}"
+                    )
+        
+        # 基于目录审查生成建议
+        missing_sec = catalogue_result.get("missing", [])
+        if missing_sec:
+            names = [f"{m['first_name']} > {m['secondary_name']}" for m in missing_sec[:3]]
+            if len(missing_sec) > 3:
+                names.append(f"等{len(missing_sec)}个章节")
+            recommendations.append(f"目录缺少以下章节:{', '.join(names)}")
+        
+        # 基于大纲审查生成建议
+        if outline_result:
+            empty = outline_result.get("empty_sections", [])
+            if empty:
+                names = [e["secondary_name"] or e["title"] for e in empty[:3]]
+                if len(empty) > 3:
+                    names.append(f"等{len(empty)}个章节")
+                recommendations.append(f"以下章节有目录但无内容:{', '.join(names)}")
+        
+        if not recommendations:
+            recommendations.append("文档完整性良好,建议保持")
+        
+        return recommendations
+
+
+# 便捷函数
+async def check_completeness_lightweight(
+    chunks: List[Dict],
+    outline: Optional[List[Dict]] = None,
+    standard_csv_path: Optional[str] = None
+) -> LightweightCompletenessResult:
+    """
+    轻量级完整性审查入口函数
+    
+    Args:
+        chunks: 文档分块列表,每个chunk需包含tertiary_category_code
+        outline: 目录结构(可选)
+        standard_csv_path: 三级标准CSV文件路径,默认为doc_worker/config/StandardCategoryTable.csv
+    
+    Returns:
+        LightweightCompletenessResult
+    """
+    if standard_csv_path is None:
+        # 默认路径
+        default_path = Path(__file__).parent.parent.parent.parent.parent / "doc_worker" / "config" / "StandardCategoryTable.csv"
+        standard_csv_path = str(default_path)
+    
+    checker = LightweightCompletenessChecker(standard_csv_path)
+    return await checker.check(chunks=chunks, outline=outline)
+
+
+def result_to_dict(result: LightweightCompletenessResult) -> Dict[str, Any]:
+    """将结果对象转换为字典"""
+    return {
+        "overall_status": result.overall_status,
+        "overall_score": result.overall_score,
+        "catalogue_check": result.catalogue_check,
+        "tertiary_completeness": result.tertiary_completeness,
+        "outline_check": result.outline_check,
+        "recommendations": result.recommendations
+    }
+
+
+# 向后兼容的别名
+TertiarySpecLoader = TertiarySpecLoader
+CompletenessResult = LightweightCompletenessResult

+ 698 - 0
core/construction_review/component/reviewers/check_completeness/tertiary_completeness_checker.py

@@ -0,0 +1,698 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+三级完整性审查模块
+
+基于三级分类结果的完整性审查,替代原有的二级审查:
+1. 废弃旧的规范CSV,直接使用三级分类CSV作为审查标准
+2. 审查粒度从二级提升到三级
+3. 使用 third_focus 作为LLM审查的评判标准
+"""
+
+import asyncio
+import csv
+import json
+import re
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Set, Tuple
+
+import aiohttp
+
+from foundation.observability.logger.loggering import review_logger as logger
+
+
+# ============================================================================
+# 数据模型定义
+# ============================================================================
+
+@dataclass
+class TertiarySpecItem:
+    """三级审查标准项"""
+    first_code: str             # 一级分类代码 (basis)
+    first_name: str             # 一级分类名称 (编制依据)
+    second_code: str            # 二级分类代码 (LawsAndRegulations)
+    second_name: str            # 二级分类名称 (法律法规)
+    third_code: str             # 三级分类代码 (NationalLawsAndRegulations)
+    third_name: str             # 三级分类名称 (国家政府发布的法律法规...)
+    focus: str                  # 关注点描述 (国家级、法律、法规...)
+
+
+@dataclass
+class Issue:
+    """问题模型"""
+    issue_point: str            # 问题点
+    location: str               # 位置(章节路径)
+    suggestion: str             # 建议
+    reason: str                 # 原因
+    risk_level: str             # 风险等级
+    reference_source: str       # 参考来源
+    missing_focus: List[str] = field(default_factory=list)  # 未满足的关注点
+
+
+@dataclass
+class CompletenessResult:
+    """完整性审查结果"""
+    success: bool
+    issues: List[Issue]
+    secondary_issues: List[Issue] = field(default_factory=list)  # 二级结构问题
+    tertiary_issues: List[Issue] = field(default_factory=list)   # 三级内容问题
+    summary: Dict[str, Any] = field(default_factory=dict)
+    execution_time: float = 0.0
+    error_message: Optional[str] = None
+
+
+# ============================================================================
+# 三级规范加载器
+# ============================================================================
+
+class TertiarySpecLoader:
+    """三级规范加载器 - 从三级分类CSV加载审查标准"""
+
+    # 一级代码到中文名称的映射
+    TAG_NAME_MAP = {
+        "basis": "编制依据",
+        "overview": "工程概况",
+        "plan": "施工计划",
+        "technology": "施工工艺技术",
+        "safety": "安全保证措施",
+        "quality": "质量保证措施",
+        "environment": "环境保证措施",
+        "management": "施工管理及作业人员配备与分工",
+        "acceptance": "验收要求",
+        "other": "其他资料",
+    }
+
+    def __init__(self, csv_path: str):
+        """
+        初始化规范加载器
+
+        Args:
+            csv_path: 三级分类CSV文件路径
+        """
+        self.csv_path = csv_path
+        self._all_specs: Dict[str, List[TertiarySpecItem]] = {}  # third_code -> SpecItem
+        self._secondary_specs: Dict[Tuple[str, str], List[TertiarySpecItem]] = {}  # (first, second) -> [SpecItem]
+        self._loaded = False
+
+    def _load(self) -> None:
+        """加载规范文件"""
+        if self._loaded:
+            return
+
+        with open(self.csv_path, 'r', encoding='utf-8-sig') as f:
+            reader = csv.DictReader(f)
+            for row in reader:
+                first_code = (row.get('first_contents_code') or '').strip()
+                first_name = (row.get('first_contents') or '').strip()
+                second_code = (row.get('second_contents_code') or '').strip()
+                second_name = (row.get('second_contents') or '').strip()
+                third_code = (row.get('third_contents_code') or '').strip()
+                third_name = (row.get('third_contents') or '').strip()
+                focus = (row.get('third_focus') or '').strip()
+
+                if not first_code or not second_code:
+                    continue
+
+                spec_item = TertiarySpecItem(
+                    first_code=first_code,
+                    first_name=first_name,
+                    second_code=second_code,
+                    second_name=second_name,
+                    third_code=third_code,
+                    third_name=third_name,
+                    focus=focus
+                )
+
+                # 按三级代码索引
+                if third_code:
+                    self._all_specs[third_code] = spec_item
+
+                # 按(一级,二级)索引
+                key = (first_code, second_code)
+                if key not in self._secondary_specs:
+                    self._secondary_specs[key] = []
+                self._secondary_specs[key].append(spec_item)
+
+        self._loaded = True
+        logger.info(f"三级规范加载完成,共 {len(self._all_specs)} 个三级分类项")
+
+    def get_spec_by_third_code(self, third_code: str) -> Optional[TertiarySpecItem]:
+        """
+        根据三级分类代码获取规范项
+
+        Args:
+            third_code: 三级分类代码
+
+        Returns:
+            TertiarySpecItem 或 None
+        """
+        self._load()
+        return self._all_specs.get(third_code)
+
+    def get_specs_by_secondary(self, first_code: str, second_code: str) -> List[TertiarySpecItem]:
+        """
+        根据二级分类获取下属的三级规范项
+
+        Args:
+            first_code: 一级分类代码
+            second_code: 二级分类代码
+
+        Returns:
+            List[TertiarySpecItem]
+        """
+        self._load()
+        return self._secondary_specs.get((first_code, second_code), [])
+
+    def get_all_secondary_codes(self) -> Set[Tuple[str, str]]:
+        """获取所有二级分类代码组合"""
+        self._load()
+        return set(self._secondary_specs.keys())
+
+    def get_chapter_name(self, first_code: str) -> str:
+        """获取一级分类中文名称"""
+        return self.TAG_NAME_MAP.get(first_code, first_code)
+
+
+# ============================================================================
+# LLM客户端(复用原有实现)
+# ============================================================================
+
+class SimpleLLMClient:
+    """简化的LLM客户端"""
+
+    def __init__(self, config: Dict[str, Any]):
+        self.server_url = config.get('server_url', '')
+        self.model_id = config.get('model_id', '')
+        self.api_key = config.get('api_key', '')
+        self.timeout = config.get('timeout', 30)
+        self.max_retries = config.get('max_retries', 2)
+        self.temperature = config.get('temperature', 0.3)
+        self.max_tokens = config.get('max_tokens', 1024)
+
+    async def call(self, system_prompt: str, user_prompt: str) -> str:
+        """调用LLM API"""
+        headers = {
+            'Content-Type': 'application/json',
+            'Authorization': f'Bearer {self.api_key}'
+        }
+
+        messages = []
+        if system_prompt:
+            messages.append({'role': 'system', 'content': system_prompt})
+        messages.append({'role': 'user', 'content': user_prompt})
+
+        payload = {
+            'model': self.model_id,
+            'messages': messages,
+            'temperature': self.temperature,
+            'max_tokens': self.max_tokens
+        }
+
+        url = f"{self.server_url.rstrip('/')}/chat/completions"
+
+        for attempt in range(self.max_retries + 1):
+            try:
+                timeout = aiohttp.ClientTimeout(total=self.timeout)
+                async with aiohttp.ClientSession(timeout=timeout) as session:
+                    async with session.post(url, headers=headers, json=payload) as response:
+                        if response.status == 200:
+                            result = await response.json()
+                            return result.get('choices', [{}])[0].get('message', {}).get('content', '')
+                        else:
+                            error_text = await response.text()
+                            if attempt < self.max_retries:
+                                await asyncio.sleep(1 * (attempt + 1))
+                                continue
+                            raise Exception(f"API调用失败: {response.status}, {error_text}")
+            except asyncio.TimeoutError:
+                if attempt < self.max_retries:
+                    await asyncio.sleep(1 * (attempt + 1))
+                    continue
+                raise Exception(f"API调用超时: {self.timeout}秒")
+            except Exception as e:
+                if attempt < self.max_retries:
+                    await asyncio.sleep(1 * (attempt + 1))
+                    continue
+                raise e
+
+        raise Exception("LLM调用失败,已重试所有次数")
+
+
+# ============================================================================
+# 三级完整性审查器
+# ============================================================================
+
+class TertiaryCompletenessChecker:
+    """
+    三级完整性审查器
+
+    基于三级分类结果的完整性审查:
+    1. 二级结构检查:检查一级、二级目录完整性
+    2. 三级内容检查:检查每个三级分类的关注点满足情况
+    """
+
+    # 提示词模板
+    SYSTEM_PROMPT = """你是一名工程与施工领域的专业文档审查专家,负责审查施工方案文档的内容完整性。
+- 针对每个三级分类的关注点,判断文档内容是否满足要求
+- 关注点是一组关键词或要求描述,只要内容能体现这些关注点的核心含义即可认为满足
+- 判断要严格但合理,如果文本内容能够满足关注点的核心要求,即使表述方式不同,也应判定为已满足
+- 只输出JSON格式,不要添加任何解释性文字
+- /no_think"""
+
+    USER_PROMPT_TEMPLATE = """任务:审查施工方案文档内容,判断是否满足三级分类的关注点要求。
+
+【待审查文本内容】
+{content}
+
+【审查标准】
+三级分类:{tertiary_name}(代码:{tertiary_code})
+所属二级:{secondary_name}
+所属一级:{first_name}
+
+关注点要求:
+{focus}
+
+请判断上述内容是否满足该关注点要求。
+
+输出格式:
+{{
+  "satisfied": true/false,
+  "missing_focus_points": ["未满足的关键词1", "未满足的关键词2"],
+  "reason": "简要说明理由"
+}}
+
+注意:
+- satisfied 为 true 表示内容满足关注点要求
+- satisfied 为 false 表示内容未满足关注点要求,需在 missing_focus_points 中列出未满足的具体关键词
+- 只输出JSON对象,不要添加任何解释性文字"""
+
+    REFERENCE_SOURCE = "《桥梁公司危险性较大工程管理实施细则(2025版)》"
+
+    def __init__(
+        self,
+        tertiary_csv_path: str,
+        llm_config: Optional[Dict[str, Any]] = None,
+        max_concurrent: int = 20
+    ):
+        """
+        初始化三级完整性审查器
+
+        Args:
+            tertiary_csv_path: 三级分类CSV文件路径
+            llm_config: LLM配置(可选)
+            max_concurrent: 最大并发数
+        """
+        self.spec_loader = TertiarySpecLoader(tertiary_csv_path)
+        self.llm_client = SimpleLLMClient(llm_config) if llm_config else None
+        self.max_concurrent = max_concurrent
+        self.semaphore = asyncio.Semaphore(max_concurrent)
+
+    async def check(
+        self,
+        chunks: List[Dict[str, Any]],
+        outline: Optional[Dict[str, Any]] = None,
+        enable_llm_review: bool = True
+    ) -> CompletenessResult:
+        """
+        执行三级完整性检查
+
+        Args:
+            chunks: 三级分类后的文档块列表(需包含 tertiary_category_code)
+            outline: 目录结构(可选)
+            enable_llm_review: 是否启用LLM审查
+
+        Returns:
+            CompletenessResult: 完整性审查结果
+        """
+        start_time = time.time()
+
+        try:
+            # 1. 二级结构完整性检查(无需LLM)
+            secondary_issues = self._check_secondary_structure(chunks, outline)
+            logger.info(f"二级结构完整性检查完成,发现 {len(secondary_issues)} 个问题")
+
+            # 2. 三级内容完整性检查(需要LLM)
+            tertiary_issues = []
+            if enable_llm_review and self.llm_client:
+                tertiary_issues = await self._check_tertiary_content(chunks)
+                logger.info(f"三级内容完整性检查完成,发现 {len(tertiary_issues)} 个问题")
+
+            # 3. 合并问题列表
+            all_issues = secondary_issues + tertiary_issues
+
+            # 4. 构建汇总信息
+            summary = {
+                "total_issues": len(all_issues),
+                "secondary_issues": len(secondary_issues),
+                "tertiary_issues": len(tertiary_issues),
+                "high_risk": sum(1 for i in all_issues if i.risk_level == "高风险"),
+                "medium_risk": sum(1 for i in all_issues if i.risk_level == "中风险"),
+                "low_risk": sum(1 for i in all_issues if i.risk_level == "低风险"),
+                "review_mode": "三级审查"
+            }
+
+            execution_time = time.time() - start_time
+            logger.info(f"三级完整性审查完成,共发现 {len(all_issues)} 个问题,耗时 {execution_time:.2f}秒")
+
+            return CompletenessResult(
+                success=True,
+                issues=all_issues,
+                secondary_issues=secondary_issues,
+                tertiary_issues=tertiary_issues,
+                summary=summary,
+                execution_time=execution_time
+            )
+
+        except Exception as e:
+            execution_time = time.time() - start_time
+            logger.error(f"三级完整性审查失败: {str(e)}", exc_info=True)
+            return CompletenessResult(
+                success=False,
+                issues=[],
+                summary={},
+                execution_time=execution_time,
+                error_message=str(e)
+            )
+
+    def _check_secondary_structure(
+        self,
+        chunks: List[Dict[str, Any]],
+        outline: Optional[Dict[str, Any]] = None
+    ) -> List[Issue]:
+        """
+        二级结构完整性检查 - 检查一级、二级目录完整性
+
+        Args:
+            chunks: 文档块列表
+            outline: 目录结构
+
+        Returns:
+            List[Issue]: 结构缺失问题列表
+        """
+        issues = []
+
+        # 获取实际存在的二级分类
+        actual_secondary: Set[Tuple[str, str]] = set()
+
+        for chunk in chunks:
+            first_code = chunk.get('chapter_classification', '')
+            second_code = chunk.get('secondary_category_code', '')
+            if first_code and second_code and second_code != 'none':
+                actual_secondary.add((first_code, second_code))
+
+        # 从outline提取(如果有)
+        if outline and 'chapters' in outline:
+            # TODO: outline结构需要包含三级分类信息
+            pass
+
+        # 对比规范要求
+        required_secondary = self.spec_loader.get_all_secondary_codes()
+
+        # 检查缺失的二级分类
+        for first_code, second_code in required_secondary:
+            if (first_code, second_code) not in actual_secondary:
+                # 获取二级分类下属的三级项,用于生成问题描述
+                tertiary_items = self.spec_loader.get_specs_by_secondary(first_code, second_code)
+                tertiary_names = [item.third_name for item in tertiary_items[:3]]  # 取前3个
+
+                first_name = self.spec_loader.get_chapter_name(first_code)
+                second_name = tertiary_items[0].second_name if tertiary_items else second_code
+
+                issues.append(Issue(
+                    issue_point=f"缺少二级分类:{first_name} -> {second_name}",
+                    location=f"{first_name}",
+                    suggestion=f"建议添加【{second_name}】章节,应包含:{', '.join(tertiary_names)}等内容",
+                    reason=f"根据规范要求,【{first_name}】章节应包含【{second_name}】及其下属内容",
+                    risk_level="高风险",
+                    reference_source=self.REFERENCE_SOURCE
+                ))
+
+        return issues
+
+    async def _check_tertiary_content(self, chunks: List[Dict[str, Any]]) -> List[Issue]:
+        """
+        三级内容完整性检查 - 检查每个三级分类的关注点满足情况
+
+        Args:
+            chunks: 文档块列表
+
+        Returns:
+            List[Issue]: 内容缺失问题列表
+        """
+        issues = []
+
+        # 按三级分类分组
+        tertiary_groups: Dict[str, Dict[str, Any]] = {}
+
+        for chunk in chunks:
+            third_code = chunk.get('tertiary_category_code', '')
+            content = chunk.get('content', '')
+
+            if third_code and third_code != 'none' and content:
+                if third_code not in tertiary_groups:
+                    tertiary_groups[third_code] = {
+                        'contents': [],
+                        'first_code': chunk.get('chapter_classification', ''),
+                        'second_code': chunk.get('secondary_category_code', ''),
+                    }
+                tertiary_groups[third_code]['contents'].append(content)
+
+        if not tertiary_groups:
+            return issues
+
+        # 并发审查各三级分类
+        tasks = []
+        for third_code, data in tertiary_groups.items():
+            merged_content = "\n\n".join(data['contents'])
+            tasks.append(self._check_single_tertiary(third_code, merged_content))
+
+        results = await asyncio.gather(*tasks, return_exceptions=True)
+
+        for result in results:
+            if isinstance(result, list):
+                issues.extend(result)
+            elif isinstance(result, Exception):
+                logger.error(f"三级分类审查失败: {str(result)}")
+
+        return issues
+
+    async def _check_single_tertiary(self, third_code: str, content: str) -> List[Issue]:
+        """
+        审查单个三级分类的内容
+
+        Args:
+            third_code: 三级分类代码
+            content: 合并后的内容
+
+        Returns:
+            List[Issue]: 该三级分类的缺失问题
+        """
+        async with self.semaphore:
+            try:
+                # 获取规范项
+                spec_item = self.spec_loader.get_spec_by_third_code(third_code)
+                if not spec_item:
+                    return []
+
+                # 如果没有关注点,跳过审查
+                if not spec_item.focus:
+                    return []
+
+                # 构建提示词
+                user_prompt = self.USER_PROMPT_TEMPLATE.format(
+                    content=content[:8000],
+                    tertiary_name=spec_item.third_name,
+                    tertiary_code=third_code,
+                    secondary_name=spec_item.second_name,
+                    first_name=spec_item.first_name,
+                    focus=spec_item.focus
+                )
+
+                # 调用LLM
+                llm_response = await self.llm_client.call(self.SYSTEM_PROMPT, user_prompt)
+
+                # 解析结果
+                result = self._parse_llm_response(llm_response)
+
+                # 生成问题
+                if not result.get('satisfied', True):
+                    missing_focus = result.get('missing_focus_points', [])
+                    reason = result.get('reason', '未满足关注点要求')
+
+                    risk_level = "高风险" if len(missing_focus) >= 3 else "中风险" if len(missing_focus) >= 1 else "低风险"
+
+                    missing_count = len(missing_focus)
+                    return [Issue(
+                        issue_point=f"【{spec_item.second_name}】的'{spec_item.third_name}'内容不完整(缺少{missing_count}项)",
+                        location=f"{spec_item.first_name} -> {spec_item.second_name}",
+                        suggestion=f"请补充'{spec_item.third_name}'的以下内容:{', '.join(missing_focus[:3])}{'等' if missing_count > 3 else ''}",
+                        reason=f"根据规范要求,{spec_item.third_name}应满足:{spec_item.third_focus}",
+                        risk_level=risk_level,
+                        reference_source=self.REFERENCE_SOURCE,
+                        missing_focus=missing_focus
+                    )]
+
+                return []
+
+            except Exception as e:
+                logger.error(f"审查三级分类 {third_code} 失败: {str(e)}")
+                return []
+
+    def _parse_llm_response(self, response: str) -> Dict[str, Any]:
+        """解析LLM返回结果"""
+        if not response:
+            return {'satisfied': True}
+
+        json_str = self._extract_json(response)
+        if not json_str:
+            return {'satisfied': True}
+
+        try:
+            result = json.loads(json_str)
+            return {
+                'satisfied': result.get('satisfied', True),
+                'missing_focus_points': result.get('missing_focus_points', []),
+                'reason': result.get('reason', '')
+            }
+        except json.JSONDecodeError:
+            return {'satisfied': True}
+
+    def _extract_json(self, text: str) -> str:
+        """从文本中提取JSON字符串"""
+        text = text.strip()
+
+        # 尝试代码块
+        code_block_pattern = r'```(?:json)?\s*(\{.*?\})\s*```'
+        matches = re.findall(code_block_pattern, text, re.DOTALL)
+        for match in matches:
+            try:
+                json.loads(match)
+                return match
+            except json.JSONDecodeError:
+                continue
+
+        # 尝试直接解析
+        try:
+            json.loads(text)
+            return text
+        except json.JSONDecodeError:
+            pass
+
+        # 从第一个 { 提取
+        start_idx = text.find('{')
+        if start_idx == -1:
+            return ""
+
+        brace_count = 0
+        in_string = False
+        escape_next = False
+
+        for i in range(start_idx, len(text)):
+            char = text[i]
+
+            if escape_next:
+                escape_next = False
+                continue
+
+            if char == '\\':
+                escape_next = True
+                continue
+
+            if char == '"':
+                in_string = not in_string
+                continue
+
+            if not in_string:
+                if char == '{':
+                    brace_count += 1
+                elif char == '}':
+                    brace_count -= 1
+                    if brace_count == 0:
+                        json_str = text[start_idx:i+1]
+                        try:
+                            json.loads(json_str)
+                            return json_str
+                        except json.JSONDecodeError:
+                            pass
+
+        return ""
+
+
+# ============================================================================
+# 便捷函数
+# ============================================================================
+
+async def check_completeness_tertiary(
+    chunks: List[Dict[str, Any]],
+    outline: Optional[Dict[str, Any]] = None,
+    tertiary_csv_path: Optional[str] = None,
+    llm_config: Optional[Dict[str, Any]] = None,
+    enable_llm_review: bool = True
+) -> CompletenessResult:
+    """
+    便捷函数:执行三级完整性检查
+
+    Args:
+        chunks: 三级分类后的文档块列表(需包含 tertiary_category_code)
+        outline: 目录结构(可选)
+        tertiary_csv_path: 三级分类CSV路径(可选,使用默认路径)
+        llm_config: LLM配置(可选)
+        enable_llm_review: 是否启用LLM审查
+
+    Returns:
+        CompletenessResult: 完整性审查结果
+    """
+    # 使用默认路径
+    if tertiary_csv_path is None:
+        tertiary_csv_path = str(
+            Path(__file__).parent.parent.parent.parent / "doc_worker" / "config" / "StandardCategoryTable.csv"
+        )
+
+    checker = TertiaryCompletenessChecker(
+        tertiary_csv_path=tertiary_csv_path,
+        llm_config=llm_config,
+        max_concurrent=20
+    )
+
+    return await checker.check(chunks, outline, enable_llm_review)
+
+
+def result_to_dict(result: CompletenessResult) -> Dict[str, Any]:
+    """
+    将审查结果转换为字典格式(兼容原有接口)
+
+    Args:
+        result: 完整性审查结果
+
+    Returns:
+        Dict[str, Any]: 字典格式的结果
+    """
+    return {
+        "success": result.success,
+        "details": {
+            "name": "tertiary_completeness_check",
+            "response": [
+                {
+                    "issue_point": issue.issue_point,
+                    "location": issue.location,
+                    "suggestion": issue.suggestion,
+                    "reason": issue.reason,
+                    "risk_level": issue.risk_level,
+                    "reference_source": issue.reference_source,
+                    "missing_focus": issue.missing_focus
+                }
+                for issue in result.issues
+            ],
+            "secondary_issues_count": len(result.secondary_issues),
+            "tertiary_issues_count": len(result.tertiary_issues),
+            "review_location_label": result.issues[-1].location if result.issues else "",
+            "chapter_code": "tertiary_completeness",
+            "original_content": ""
+        },
+        "summary": result.summary,
+        "execution_time": result.execution_time,
+        "error_message": result.error_message
+    }

+ 290 - 0
core/construction_review/component/reviewers/check_completeness/test_completeness_checker.py

@@ -0,0 +1,290 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+简化版完整性审查模块 - 测试示例
+
+演示如何使用新的完整性审查模块。
+"""
+
+import asyncio
+import json
+from pathlib import Path
+
+# 添加项目根目录到路径
+import sys
+sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent.parent))
+
+from core.construction_review.component.reviewers.completeness_checker import (
+    CompletenessChecker,
+    check_completeness,
+    result_to_dict
+)
+
+
+# ============================================================================
+# 测试数据
+# ============================================================================
+
+# 模拟三级分类后的chunks数据
+MOCK_CHUNKS = [
+    {
+        "chunk_id": "chunk_1",
+        "page": "1",
+        "content": """
+        第一章 编制依据
+
+        一、法律法规
+        1. 《中华人民共和国建筑法》
+        2. 《建设工程安全生产管理条例》
+
+        二、标准规范
+        1. 《建筑施工安全检查标准》(JGJ59-2011)
+        """,
+        "section_label": "第一章 编制依据->一、法律法规",
+        "chapter_classification": "basis",
+        "chapter": "第一章 编制依据"
+    },
+    {
+        "chunk_id": "chunk_2",
+        "page": "2",
+        "content": """
+        第二章 工程概况
+
+        一、工程简介
+        本工程为某某大桥建设工程,位于某某省某某市...
+
+        二、工程规模
+        主桥全长约500米,桥面宽度24.5米...
+        """,
+        "section_label": "第二章 工程概况->一、工程简介",
+        "chapter_classification": "overview",
+        "chapter": "第二章 工程概况"
+    },
+    {
+        "chunk_id": "chunk_3",
+        "page": "3",
+        "content": """
+        第三章 施工工艺技术
+
+        一、技术参数
+        1. 混凝土强度等级:C50
+        2. 钢筋级别:HRB400
+
+        二、工艺流程
+        1. 测量放线
+        2. 基坑开挖
+        """,
+        "section_label": "第三章 施工工艺技术->一、技术参数",
+        "chapter_classification": "technology",
+        "chapter": "第三章 施工工艺技术"
+    }
+]
+
+# 模拟outline数据
+MOCK_OUTLINE = {
+    "chapters": [
+        {
+            "title": "第一章 编制依据",
+            "chapter_classification": "basis",
+            "subsections": [
+                {"title": "一、法律法规"},
+                {"title": "二、标准规范"},
+                {"title": "三、文件制度"}  # 这个在chunks中缺失
+            ]
+        },
+        {
+            "title": "第二章 工程概况",
+            "chapter_classification": "overview",
+            "subsections": [
+                {"title": "一、工程简介"},
+                {"title": "二、工程规模"},
+                {"title": "三、施工条件"}  # 这个在chunks中缺失
+            ]
+        },
+        {
+            "title": "第三章 施工工艺技术",
+            "chapter_classification": "technology",
+            "subsections": [
+                {"title": "一、技术参数"},
+                {"title": "二、工艺流程"}
+            ]
+        }
+    ]
+}
+
+# LLM配置(使用本地模型)
+LLM_CONFIG = {
+    "server_url": "http://192.168.91.253:8003/v1/",
+    "model_id": "qwen3-30b",
+    "api_key": "sk-123456",
+    "timeout": 30,
+    "max_retries": 2,
+    "temperature": 0.3,
+    "max_tokens": 1024
+}
+
+
+# ============================================================================
+# 测试函数
+# ============================================================================
+
+async def test_chapter_check_only():
+    """测试仅章节完整性检查(不需要LLM)"""
+    print("=" * 60)
+    print("测试1:章节完整性检查(不使用LLM)")
+    print("=" * 60)
+
+    spec_csv_path = str(
+        Path(__file__).parent / "config" / "Construction_Plan_Content_Specification.csv"
+    )
+
+    checker = CompletenessChecker(
+        spec_csv_path=spec_csv_path,
+        llm_config=None,  # 不使用LLM
+        max_concurrent=20
+    )
+
+    # 仅执行章节检查
+    result = await checker.check(
+        chunks=MOCK_CHUNKS,
+        outline=MOCK_OUTLINE,
+        enable_llm_review=False  # 禁用LLM审查
+    )
+
+    print(f"\n审查成功: {result.success}")
+    print(f"章节问题数量: {len(result.chapter_issues)}")
+    print(f"内容问题数量: {len(result.content_issues)}")
+    print(f"总问题数量: {len(result.issues)}")
+    print(f"执行时间: {result.execution_time:.2f}秒")
+
+    if result.chapter_issues:
+        print("\n章节缺失问题:")
+        for issue in result.chapter_issues:
+            print(f"  - [{issue.risk_level}] {issue.issue_point}")
+            print(f"    位置: {issue.location}")
+            print(f"    建议: {issue.suggestion}")
+
+    return result
+
+
+async def test_full_check():
+    """测试完整检查(章节 + LLM内容审查)"""
+    print("\n" + "=" * 60)
+    print("测试2:完整检查(章节 + LLM内容审查)")
+    print("=" * 60)
+
+    spec_csv_path = str(
+        Path(__file__).parent / "config" / "Construction_Plan_Content_Specification.csv"
+    )
+
+    checker = CompletenessChecker(
+        spec_csv_path=spec_csv_path,
+        llm_config=LLM_CONFIG,
+        max_concurrent=20
+    )
+
+    # 执行完整检查
+    result = await checker.check(
+        chunks=MOCK_CHUNKS,
+        outline=MOCK_OUTLINE,
+        enable_llm_review=True
+    )
+
+    print(f"\n审查成功: {result.success}")
+    print(f"章节问题数量: {len(result.chapter_issues)}")
+    print(f"内容问题数量: {len(result.content_issues)}")
+    print(f"总问题数量: {len(result.issues)}")
+    print(f"执行时间: {result.execution_time:.2f}秒")
+
+    print(f"\n汇总信息:")
+    print(f"  - 高风险: {result.summary.get('high_risk', 0)}")
+    print(f"  - 中风险: {result.summary.get('medium_risk', 0)}")
+    print(f"  - 低风险: {result.summary.get('low_risk', 0)}")
+
+    if result.issues:
+        print("\n所有问题:")
+        for i, issue in enumerate(result.issues, 1):
+            print(f"  {i}. [{issue.risk_level}] {issue.issue_point}")
+
+    return result
+
+
+async def test_convenience_function():
+    """测试便捷函数"""
+    print("\n" + "=" * 60)
+    print("测试3:使用便捷函数")
+    print("=" * 60)
+
+    result = await check_completeness(
+        chunks=MOCK_CHUNKS,
+        outline=MOCK_OUTLINE,
+        llm_config=LLM_CONFIG,
+        enable_llm_review=False  # 禁用LLM以加快测试
+    )
+
+    # 转换为字典格式
+    result_dict = result_to_dict(result)
+
+    print(f"\n字典格式结果:")
+    print(f"  success: {result_dict['success']}")
+    print(f"  issues_count: {len(result_dict['details']['response'])}")
+    print(f"  execution_time: {result_dict['execution_time']:.2f}秒")
+
+    return result_dict
+
+
+async def test_spec_loader():
+    """测试规范加载器"""
+    print("\n" + "=" * 60)
+    print("测试4:规范加载器")
+    print("=" * 60)
+
+    from core.construction_review.component.reviewers.completeness_checker import SpecLoader
+
+    spec_csv_path = str(
+        Path(__file__).parent / "config" / "Construction_Plan_Content_Specification.csv"
+    )
+
+    loader = SpecLoader(spec_csv_path)
+    all_specs = loader.get_all_specs()
+
+    print(f"\n加载的章节分类数量: {len(all_specs)}")
+
+    for tag, items in all_specs.items():
+        chapter_name = loader.get_chapter_name(tag)
+        print(f"\n  [{tag}] {chapter_name}:")
+        for item in items:
+            print(f"    - {item.secondary_dir} ({item.point_count}个要点)")
+
+
+# ============================================================================
+# 主函数
+# ============================================================================
+
+async def main():
+    """主测试函数"""
+    print("\n" + "=" * 60)
+    print("简化版完整性审查模块 - 测试")
+    print("=" * 60)
+
+    # 测试1:仅章节检查
+    await test_chapter_check_only()
+
+    # 测试3:便捷函数
+    await test_convenience_function()
+
+    # 测试4:规范加载器
+    await test_spec_loader()
+
+    # 测试2:完整检查(需要LLM,可能较慢)
+    # 取消下面的注释以启用完整测试
+    # await test_full_check()
+
+    print("\n" + "=" * 60)
+    print("测试完成")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())

+ 12 - 14
core/construction_review/component/reviewers/reference_basis_reviewer.py

@@ -366,17 +366,17 @@ class BasisReviewService:
         start_time = time.time()
         total_batches = (len(items) + 2) // 3  # 计算总批次数
         
-        # 发送开始审查的SSE推送
+        # 发送开始审查的SSE推送(使用独立命名空间,避免与主流程进度冲突)
         if progress_manager and callback_task_id:
             try:
                 await progress_manager.update_stage_progress(
                     callback_task_id=callback_task_id,
-                    stage_name="AI审查",
-                    current=0,
+                    stage_name="编制依据审查-子任务",  # 独立命名空间
                     status="processing",
                     message=f"开始编制依据审查,共{len(items)}项编制依据",
                     overall_task_status="processing",
                     event_type="processing"
+                    # 不设置 current,避免覆盖主流程进度
                 )
             except Exception as e:
                 logger.error(f"SSE推送开始消息失败: {e}")
@@ -400,20 +400,19 @@ class BasisReviewService:
                     if isinstance(item, dict) and item.get('is_standard', False):
                         batch_standard_count += 1
 
-                # 立即推送当前批次完成的SSE消息
+                # 立即推送当前批次完成的SSE消息(使用独立命名空间)
                 logger.info(f"批次{batch_index + 1}完成,准备推送SSE")
                 if progress_manager and callback_task_id:
                     try:
-                        progress_percent = int((batch_index + 1) / total_batches * 100)
                         await progress_manager.update_stage_progress(
                             callback_task_id=callback_task_id,
-                            stage_name=f"编制依据审查-批次{batch_index + 1}",
-                            current=progress_percent,
+                            stage_name=f"编制依据审查-子任务-批次{batch_index + 1}",  # 独立命名空间
                             status="processing",
                             message=f"完成第{batch_index + 1}/{total_batches}批次编制依据审查,{len(batch)}项,其中{batch_standard_count}项为标准",
                             overall_task_status="processing",
                             event_type="processing",
                             issues=result  # 推送该批次的审查结果
+                            # 不设置 current,避免覆盖主流程进度
                         )
                         logger.info(f"批次{batch_index + 1} SSE推送成功")
                     except Exception as e:
@@ -426,19 +425,18 @@ class BasisReviewService:
                 error_result = [{"name": name, "is_standard": False, "status": "", "meg": f"批次处理失败: {str(e)}"}
                                 for name in batch]
 
-                # 即使失败也要推送结果
+                # 即使失败也要推送结果(使用独立命名空间)
                 if progress_manager and callback_task_id:
                     try:
-                        progress_percent = int((batch_index + 1) / total_batches * 100)
                         await progress_manager.update_stage_progress(
                             callback_task_id=callback_task_id,
-                            stage_name=f"编制依据审查-批次{batch_index + 1}",
-                            current=progress_percent,
+                            stage_name=f"编制依据审查-子任务-批次{batch_index + 1}",  # 独立命名空间
                             status="processing",
                             message=f"第{batch_index + 1}/{total_batches}批次处理失败",
                             overall_task_status="processing",
                             event_type="processing",
                             issues=error_result
+                            # 不设置 current,避免覆盖主流程进度
                         )
                     except Exception as push_e:
                         logger.error(f"SSE推送失败批次{batch_index + 1}结果失败: {push_e}")
@@ -492,18 +490,18 @@ class BasisReviewService:
         logger.info(f"并发执行完成,成功批次: {successful_batches}/{total_batches}")
 
 
-        # 发送完成审查的SSE推送
+        # 发送完成审查的SSE推送(使用独立命名空间,不设置current避免覆盖主流程进度)
         elapsed_time = time.time() - start_time
         if progress_manager and callback_task_id:
             try:
                 await progress_manager.update_stage_progress(
                     callback_task_id=callback_task_id,
-                    stage_name="编制依据审查",
-                    current=15,
+                    stage_name="编制依据审查-子任务",  # 独立命名空间
                     status="processing",
                     message=f"编制依据审查完成,共{total_items}项,发现问题{issue_items}项,耗时{elapsed_time:.2f}秒",
                     overall_task_status="processing",
                     event_type="processing"
+                    # 不设置 current,避免覆盖主流程进度
                 )
             except Exception as e:
                 logger.error(f"SSE推送完成消息失败: {e}")

+ 15 - 17
core/construction_review/component/reviewers/timeliness_basis_reviewer.py

@@ -307,17 +307,17 @@ class BasisReviewService:
         start_time = time.time()
         total_batches = (len(items) + 2) // 3  # 计算总批次数
         
-        # 发送开始审查的SSE推送
+        # 发送开始审查的SSE推送(使用独立命名空间,避免与主流程进度冲突)
         if progress_manager and callback_task_id:
             try:
                 await progress_manager.update_stage_progress(
                     callback_task_id=callback_task_id,
-                    stage_name="AI审查",
-                    current=0,
+                    stage_name="时效性审查-子任务",  # 独立命名空间
                     status="processing",
-                    message=f"开始编制依据审查,共{len(items)}项编制依据",
+                    message=f"开始时效性审查,共{len(items)}项编制依据",
                     overall_task_status="processing",
                     event_type="processing"
+                    # 不设置 current,避免覆盖主流程进度
                 )
             except Exception as e:
                 logger.error(f"SSE推送开始消息失败: {e}")
@@ -341,20 +341,19 @@ class BasisReviewService:
                     if isinstance(item, dict) and item.get('is_standard', False):
                         batch_standard_count += 1
 
-                # 立即推送当前批次完成的SSE消息
+                # 立即推送当前批次完成的SSE消息(使用独立命名空间)
                 logger.info(f"批次{batch_index + 1}完成,准备推送SSE")
                 if progress_manager and callback_task_id:
                     try:
-                        progress_percent = int((batch_index + 1) / total_batches * 100)
                         await progress_manager.update_stage_progress(
                             callback_task_id=callback_task_id,
-                            stage_name=f"编制依据审查-批次{batch_index + 1}",
-                            current=progress_percent,
+                            stage_name=f"时效性审查-子任务-批次{batch_index + 1}",  # 独立命名空间
                             status="processing",
-                            message=f"完成第{batch_index + 1}/{total_batches}批次编制依据审查,{len(batch)}项,其中{batch_standard_count}项为标准",
+                            message=f"完成第{batch_index + 1}/{total_batches}批次时效性审查,{len(batch)}项,其中{batch_standard_count}项为标准",
                             overall_task_status="processing",
                             event_type="processing",
                             issues=result  # 推送该批次的审查结果
+                            # 不设置 current,避免覆盖主流程进度
                         )
                         logger.info(f"批次{batch_index + 1} SSE推送成功")
                     except Exception as e:
@@ -367,19 +366,18 @@ class BasisReviewService:
                 error_result = [{"name": name, "is_standard": False, "status": "", "meg": f"批次处理失败2: {str(e)}"}
                                 for name in batch]
 
-                # 即使失败也要推送结果
+                # 即使失败也要推送结果(使用独立命名空间)
                 if progress_manager and callback_task_id:
                     try:
-                        progress_percent = int((batch_index + 1) / total_batches * 100)
                         await progress_manager.update_stage_progress(
                             callback_task_id=callback_task_id,
-                            stage_name=f"编制依据审查-批次{batch_index + 1}",
-                            current=progress_percent,
+                            stage_name=f"时效性审查-子任务-批次{batch_index + 1}",  # 独立命名空间
                             status="processing",
                             message=f"第{batch_index + 1}/{total_batches}批次处理失败",
                             overall_task_status="processing",
                             event_type="processing",
                             issues=error_result
+                            # 不设置 current,避免覆盖主流程进度
                         )
                     except Exception as push_e:
                         logger.error(f"SSE推送失败批次{batch_index + 1}结果失败: {push_e}")
@@ -433,18 +431,18 @@ class BasisReviewService:
         logger.info(f"并发执行完成,成功批次: {successful_batches}/{total_batches}")
 
 
-        # 发送完成审查的SSE推送
+        # 发送完成审查的SSE推送(使用独立命名空间,不设置current避免覆盖主流程进度)
         elapsed_time = time.time() - start_time
         if progress_manager and callback_task_id:
             try:
                 await progress_manager.update_stage_progress(
                     callback_task_id=callback_task_id,
-                    stage_name="编制依据审查",
-                    current=15,
+                    stage_name="时效性审查-子任务",  # 独立命名空间
                     status="processing",
-                    message=f"编制依据审查完成,共{total_items}项,发现问题{issue_items}项,耗时{elapsed_time:.2f}秒",
+                    message=f"时效性审查完成,共{total_items}项,发现问题{issue_items}项,耗时{elapsed_time:.2f}秒",
                     overall_task_status="processing",
                     event_type="processing"
+                    # 不设置 current,避免覆盖主流程进度
                 )
             except Exception as e:
                 logger.error(f"SSE推送完成消息失败: {e}")

+ 6 - 6
core/construction_review/component/reviewers/utils/directory_extraction.py

@@ -165,17 +165,17 @@ async def extract_basis_with_langchain_qwen(progress_manager,callback_task_id:st
 
         logger.info(f"[编制依据提取] 开始使用 LLM 提取,文本长度: {len(text)}")
 
-        # 推送开始消息
+        # 推送开始消息(使用独立命名空间,避免与主流程进度冲突)
         if progress_manager and callback_task_id:
             try:
                 await progress_manager.update_stage_progress(
                     callback_task_id=callback_task_id,
-                    stage_name="AI审查",
-                    current=0,
+                    stage_name="编制依据提取-子任务",  # 独立命名空间
                     status="processing",
                     message=f"开始编制依据提取",
                     overall_task_status="processing",
                     event_type="processing"
+                    # 不设置 current,避免覆盖主流程进度
                 )
             except Exception as e:
                 logger.error(f"SSE推送开始消息失败: {e}")
@@ -197,18 +197,18 @@ async def extract_basis_with_langchain_qwen(progress_manager,callback_task_id:st
                 if char == "}":
                     brace_count += 1
                     
-                    # 每5个}推送一次进度
+                    # 每5个}推送一次进度(使用独立命名空间)
                     if brace_count % 5 == 0:
                         if progress_manager and callback_task_id:
                             try:
                                 await progress_manager.update_stage_progress(
                                     callback_task_id=callback_task_id,
-                                    stage_name="AI审查",
-                                    current=0,
+                                    stage_name="编制依据提取-子任务",  # 独立命名空间
                                     status="processing",
                                     message=f"编制依据提取中... (已处理 {brace_count} 个结构)",
                                     overall_task_status="processing",
                                     event_type="processing"
+                                    # 不设置 current,避免覆盖主流程进度
                                 )
                             except Exception as e:
                                 logger.error(f"SSE推送进度失败: {e}")

+ 10 - 266
core/construction_review/workflows/ai_review_workflow.py

@@ -17,7 +17,6 @@
 ├── execute()                       # 执行AI审查工作流
 ├── _start_node()                   # 开始节点
 ├── _initialize_progress_node()     # 初始化进度节点
-├── _ai_review_node()               # AI审查核心节点(基于review_config)
 ├── _ai_review_node_check_item()    # AI审查项检查节点(基于review_item_config)
 ├── _save_results_node()            # 保存结果节点(入库/本地文件)
 ├── _complete_node()                # 完成节点
@@ -130,14 +129,13 @@ class AIReviewWorkflow:
             StateGraph: 配置完成的LangGraph工作流图实例
 
         Note:
-            创建包含开始、初始化进度、AI审查、保存结果、完成、错误处理和终止节点的完整工作流
+            创建包含开始、初始化进度、AI审查项检查、保存结果、完成、错误处理和终止节点的完整工作流
             设置节点间的转换关系和条件边,支持错误处理流程和任务终止流程
-            工作流路径: start → initialize_progress → ai_review → save_results → complete → END
+            工作流路径: start → initialize_progress → ai_review_check_item → save_results → complete → END
         """
         workflow = StateGraph(AIReviewState)
         workflow.add_node("start", self._start_node)
         workflow.add_node("initialize_progress", self._initialize_progress_node)
-        #workflow.add_node("ai_review", self._ai_review_node)
         workflow.add_node("ai_review_check_item",self._ai_review_node_check_item)
         workflow.add_node("save_results", self._save_results_node)  # 添加保存结果节点
         workflow.add_node("complete", self._complete_node)
@@ -152,22 +150,10 @@ class AIReviewWorkflow:
             self._should_check_item_or_dimensions,
             {
                 "activate_ai_review_check_item": "ai_review_check_item",  # 使用 review_item_config
-                # "activate_ai_review": "ai_review"                 # 使用 review_config
             }
         )
 
-        # # 添加条件边(错误处理 + 终止检查)
-        # workflow.add_conditional_edges(
-        #     "ai_review",
-        #     self._should_terminate_or_error,
-        #     {
-        #         "terminate": "terminate",  # 终止路径
-        #         "success": "save_results",  # 成功后先保存结果
-        #         "error": "error_handler"  # 错误处理
-        #     }
-        # )
-
-                # 添加条件边(错误处理 + 终止检查)
+        # 添加条件边(错误处理 + 终止检查)
         workflow.add_conditional_edges(
             "ai_review_check_item",
             self._should_terminate_or_error,
@@ -512,238 +498,6 @@ class AIReviewWorkflow:
                 "messages": [AIMessage(content=f"AI审查项检查失败: {str(e)}")]
             }
 
-    # async def _ai_review_node(self, state: AIReviewState) -> AIReviewState:
-    #     """
-    #     AI审查节点
-
-    #     Args:
-    #         state: AI审查状态
-
-    #     Returns:
-    #         AIReviewState: 更新后的审查状态
-    #     """
-    #     try:
-    #         logger.info(f"AI审查节点开始执行,任务ID: {self.task_info.callback_task_id}")
-
-    #         # ⚠️ 检查终止信号(执行前)
-    #         if await self.workflow_manager.check_terminate_signal(state["callback_task_id"]):
-    #             logger.warning(f"AI审查节点检测到终止信号,任务ID: {state['callback_task_id']}")
-    #             return {
-    #                 "status": "terminated",
-    #                 "current_stage": "ai_review",
-    #                 "messages": [AIMessage(content="检测到终止信号")]
-    #             }
-
-    #         test_designation_chunk_flag = self.task_info.get_test_designation_chunk_flag()
-    #         logger.debug(f"测试定位标志: {test_designation_chunk_flag}")
-
-    #         # 1. 准备审查单元数据
-    #         review_chunks, total_units = await self.core_fun._prepare_review_units(state, test_designation_chunk_flag)
-
-    #         # 检查指定测试章节是否未找到
-    #         if test_designation_chunk_flag is not None and not review_chunks:
-    #             error_msg = f"AI审查测试失败:未找到指定审查标志「{test_designation_chunk_flag}」。请修改指定审查标识字段串,建议去除前后符号等(如书名号《》、括号()等),使用更简洁的关键词重新尝试。"
-    #             logger.error(f"🔴 {error_msg}")
-    #             return {
-    #                 "current_stage": "ai_review",
-    #                 "error_message": error_msg,
-    #                 "status": "failed",
-    #                 "messages": [AIMessage(content=error_msg)]
-    #             }
-
-    #         logger.info(f"准备审查单元完成,总单元数: {total_units}, 实际审查: {len(review_chunks)}")
-
-    #         if not review_chunks:
-    #             logger.warning(f"没有可审查的单元,任务ID: {state['callback_task_id']}")
-    #             return {
-    #                 "current_stage": "ai_review",
-    #                 "error_message": "没有可审查的单元",
-    #                 "status": "failed",
-    #                 "messages": [AIMessage(content=f"没有可审查的单元,任务ID: {state['callback_task_id']}")]
-    #             }
-
-    #         logger.info(f"开始核心审查,任务ID: {state['callback_task_id']}")
-    #         await self.core_fun._send_start_review_progress(state, total_units,'core_review')
-    #         # 2. 执行基础并发审查(内部会检测终止信号)
-    #         logger.info(f"开始执行并发审查,任务ID: {state['callback_task_id']}")
-    #         successful_results = await self.core_fun._execute_concurrent_reviews(
-    #             review_chunks, total_units, state, check_terminate=True
-    #         )
-    #         logger.info(f"并发审查完成,成功结果: {len(successful_results)}, 任务ID: {state['callback_task_id']}")
-
-    #         # ⚠️ 再次检查终止信号(并发审查后)
-    #         if await self.workflow_manager.check_terminate_signal(state["callback_task_id"]):
-    #             logger.warning(f"AI审查节点并发审查后检测到终止信号,任务ID: {state['callback_task_id']}")
-    #             return {
-    #                 "status": "terminated",
-    #                 "current_stage": "ai_review",
-    #                 "messages": [AIMessage(content="检测到终止信号")]
-    #             }
-
-    #         # 开始条文完整性审查
-            
-
-    #         # 开始大纲审查
-    #         await self.core_fun._send_start_review_progress(state, total_units,'outline')
-    #         completeness_check = "completeness_check" in  self.task_info.get_review_config_list()
-    #         if not completeness_check:
-    #             logger.info(f"跳过执行大纲审查")
-    #         else:
-    #         # 3. 执行大纲审查
-    #             logger.info(f"开始执行大纲审查")
-
-
-
-
-    #             check_completeness_result = await self.ai_review_engine.check_completeness(
-    #                 trace_id_idx = state["callback_task_id"],
-    #                 review_content = state["structured_content"]["chunks"],
-    #                 state = state,
-    #                 stage_name = state.get("stage_name", "完整性审查")
-    #             )
-    #             outline_review_result = {} 
-
-
-
-    #         # # 4. 执行编制依据审查
-    #         # #await self.core_fun._send_start_review_progress(state, total_units,'prpe_basis')
-    #         reference_check = "reference_check" in self.task_info.get_review_config_list()
-    #         timeliness_check = "timeliness_check" in self.task_info.get_review_config_list()
-    #         reference_check_result = None
-    #         timeliness_check_result = None
-
-    #         # 统一提取一次编制依据内容(任一审查开启时才提取)
-    #         basis_content = ""
-    #         basis_items = None
-    #         if reference_check or timeliness_check:
-    #             prep_basis_content = self._extract_prep_basis_content(state["structured_content"])
-    #             if prep_basis_content:
-    #                 try:
-    #                     basis_items = await extract_basis_with_langchain_qwen(
-    #                         progress_manager=state.get("progress_manager"),
-    #                         callback_task_id=state.get("callback_task_id"),
-    #                         text=prep_basis_content,
-    #                     )
-    #                     basis_content = "\n".join(
-    #                         [
-    #                             item.raw
-    #                             for item in getattr(basis_items, "items", [])
-    #                             if getattr(item, "raw", None)
-    #                         ]
-    #                     ).strip()
-    #                     if not basis_content:
-    #                         basis_content = prep_basis_content
-    #                     logger.info(
-    #                         f"编制依据AI提取完成,条数: {len(getattr(basis_items, 'items', []))}"
-    #                     )
-    #                 except Exception as e:
-    #                     logger.error(f"编制依据AI提取失败,回退原文: {e}", exc_info=True)
-    #                     basis_content = prep_basis_content
-    #             else:
-    #                 logger.warning(f"未找到编制依据内容,跳过编制依据审查准备")
-
-    #         logger.info(f"执行编制依据审查")
-    #         if not reference_check:
-    #             logger.info(f"跳过执行编制依据审查")
-    #         else:
-    #             if basis_content:
-    #                 logger.info(f"开始执行编制依据审查")
-
-    #                 prep_basis_review_data = {
-    #                     'content': basis_content,
-    #                     'basis_items': basis_items,
-    #                     'max_concurrent': self.max_concurrent
-    #                 }
-
-    #                 reference_check_result = await self.ai_review_engine.reference_basis_reviewer(
-    #                     review_data=prep_basis_review_data,
-    #                     trace_id=state["callback_task_id"],
-    #                     state=state,
-    #                     stage_name="编制依据审查"
-    #                 )
-    #             else:
-    #                 logger.warning(f"未找到编制依据内容,跳过编制依据审查")
-
-    #         logger.info(f"执行编制依据审查(时效性)")
-    #         if not timeliness_check:
-    #             logger.info(f"跳过执行编制依据审查(时效性)")
-    #         else:
-    #             if basis_content:
-    #                 logger.info(f"开始执行编制依据审查(时效性)")
-
-    #                 timeliness_check_data = {
-    #                     'content': basis_content,
-    #                     'basis_items': basis_items,
-    #                     'max_concurrent': self.max_concurrent
-    #                 }
-
-    #                 timeliness_check_result = await self.ai_review_engine.timeliness_basis_reviewer(
-    #                     review_data=timeliness_check_data,
-    #                     trace_id=state["callback_task_id"],
-    #                     state=state,
-    #                     stage_name="编制依据审查"
-    #                 )
-    #             else:
-    #                 logger.warning(f"未找到编制依据内容,跳过编制依据审查(时效性)")
-
-
-    #         # 6. 汇总结果
-    #         summary = self.inter_tool._aggregate_results(successful_results)
-
-    #         # 将所有单元的issues合并成一个列表
-    #         all_issues = []
-    #         if completeness_check:
-    #             all_issues.append(outline_review_result)
-    #             all_issues.append(check_completeness_result)
-    #         if reference_check and reference_check_result:
-    #             all_issues.append(reference_check_result)
-    #         if timeliness_check and timeliness_check_result:
-    #             all_issues.append(timeliness_check_result)
-    #         for unit_issues in successful_results:
-    #             if unit_issues and isinstance(unit_issues, list):
-    #                 all_issues.extend(unit_issues)
-
-    #         # 构建符合格式的review_results(兼容 execute() 方法的期望格式)
-    #         review_results = {
-    #             # 兼容旧版格式的字段
-    #             "total_units": total_units,
-    #             "successful_units": len(successful_results),
-    #             "failed_units": max(0, total_units - len(successful_results)),
-    #             "review_results": all_issues,  # 审查结果列表
-    #             "summary": summary,
-
-    #             # 额外的元信息
-    #             "callback_task_id": state["callback_task_id"],
-    #             "file_name": state.get("file_name", ""),
-    #             "user_id": state["user_id"],
-    #             "current": 100,
-    #             "stage_name": "完整审查结果",
-    #             "status": "full_review_result",
-    #             "message": f"审查完成,共发现{summary.get('total_issues', 0)}个问题",
-    #             "updated_at": int(time.time())
-    #         }
-
-    #         logger.info(f"AI审查节点执行成功,任务ID: {state['callback_task_id']}, 总单元数: {total_units}, 成功: {len(successful_results)}")
-
-    #         # 返回新的状态,避免原地修改导致的LangGraph冲突
-    #         return {
-    #             "current_stage": "ai_review_completed",
-    #             "review_results": review_results,
-    #             "status": "completed",
-    #             "messages": [AIMessage(content="AI审查完成")]
-    #         }
-
-    #     except Exception as e:
-    #         logger.error(f"AI审查节点执行失败,任务ID: {state['callback_task_id']}, 错误: {str(e)}", exc_info=True)
-
-    #         # 返回错误状态
-    #         return {
-    #             "current_stage": "ai_review_failed",
-    #             "error_message": str(e),
-    #             "status": "failed",
-    #             "messages": [AIMessage(content=f"AI审查失败: {str(e)}")]
-    #         }
-
     async def _save_results_node(self, state: AIReviewState) -> AIReviewState:
         """
         保存结果节点 - 将审查结果存储到本地文件或数据库
@@ -895,34 +649,24 @@ class AIReviewWorkflow:
 
     def _should_check_item_or_dimensions(self, state: AIReviewState) -> str:
         """
-        检查应该使用 review_item_config 还是 review_config
+        检查应该使用 review_item_config
 
         Args:
             state: AI审查工作流状态
 
         Returns:
-            str: "success" 使用 review_item_config(章节_审查维度格式)
-                 "error" 使用 review_config(审查维度格式)
+            str: "activate_ai_review_check_item" 使用 review_item_config(章节_审查维度格式)
 
         Note:
-            基于互斥验证逻辑:
-            1. review_config 和 review_item_config 互斥,只有一个有值
-            2. 如果 review_item_config 不为 None,走 ai_review_check_item 节点
-            3. 如果 review_config 不为 None,走 ai_review 节点
+            基于验证逻辑:
+            如果 review_item_config 不为空,走 ai_review_check_item 节点
         """
         # 获取 review_item_config 列表(使用 get_review_item_config_list 方法)
         review_item_config = self.task_info.get_review_item_config_list()
 
-        # 判断是否应该使用 review_item_config
-        if len(review_item_config) > 0:
-            # 使用 review_item_config(章节_审查维度格式)
-            logger.info(f"使用 review_item_config 进行审查: {review_item_config}")
-            return "activate_ai_review_check_item"
-        else:
-            # 使用 review_config(审查维度格式)
-            review_config = self.task_info.get_review_config_list()
-            logger.info(f"使用 review_config 进行审查: {review_config}")
-            return "activate_ai_review"
+        # 使用 review_item_config(章节_审查维度格式)
+        logger.info(f"使用 review_item_config 进行审查: {review_item_config}")
+        return "activate_ai_review_check_item"
 
     def _should_terminate_or_error(self, state: AIReviewState) -> str:
         """

+ 20 - 17
core/construction_review/workflows/core_functions/ai_review_core_fun.py

@@ -149,36 +149,39 @@ class AIReviewCoreFun:
             chunk_results = await self._execute_chunk_methods(
                 chapter_code, chunk, global_chunk_index, func_names, state
             )
-            if not chunk_results.get('is_sse_push', False):
-                logger.info(f"跳过当前未成功审查块 {chunk_index} 处理完成")
-                continue  # 跳过未成功执行的块
 
             # 格式化当前块的结果为issues
             chunk_page = chunk.get('page', '')
             review_location_label = f"第{chunk_page}页:{chunk_label}"
-            issues = self.inter_tool._format_review_results_to_issues(
-                callback_task_id=state["callback_task_id"],
-                unit_index=global_chunk_index,
-                review_location_label=review_location_label,
-                chapter_code=chapter_code,
-                unit_content=chunk,
-                basic_result=chunk_results.get('basic_compliance', {}),
-                technical_result=chunk_results.get('technical_compliance', {}),
-                merged_results=None  # 不使用 merged_results
-            )
 
-            # 推送当前块的进度
+            # 根据审查结果决定issues内容
+            if chunk_results.get('is_sse_push', False):
+                # 审查成功,正常格式化issues
+                issues = self.inter_tool._format_review_results_to_issues(
+                    callback_task_id=state["callback_task_id"],
+                    unit_index=global_chunk_index,
+                    review_location_label=review_location_label,
+                    chapter_code=chapter_code,
+                    unit_content=chunk,
+                    basic_result=chunk_results.get('basic_compliance', {}),
+                    technical_result=chunk_results.get('technical_compliance', {}),
+                    merged_results=None  # 不使用 merged_results
+                )
+            else:
+                # 审查未成功执行,仍然推送进度,但issues为空
+                logger.warning(f"审查块 {chunk_index} 未成功执行(可能因审查方法与块类型不匹配),但仍更新进度")
+                issues = []
+
+            # 推送当前块的进度(无论审查是否成功都要推送)
             current = int(((global_chunk_index + 1) / total_chunks) * 100)
             await self._send_unit_review_progress(
                 state, global_chunk_index, total_chunks, chunk_label, issues, current
             )
 
-            # 累积issues
+            # 累积issues(只有成功时才有内容)
             if issues:
                 logger.info(f"🔍 块{chunk_index}: 添加{len(issues)}个issues到all_issues (之前: {len(all_issues)})")
                 all_issues.extend(issues)
-            else:
-                logger.warning(f"⚠️ 块{chunk_index}: issues为空,未添加到all_issues")
 
         chapter_processed_chunks = len(chapter_content)
         logger.info(f"🔍 章节{chapter_code}完成: 本章节处理{chapter_processed_chunks}个块, all_issues最终数量={len(all_issues)}")

+ 13 - 12
core/construction_review/workflows/document_workflow.py

@@ -39,17 +39,18 @@ class DocumentWorkflow:
             logger.info(f"开始文档处理工作流,文件ID: {self.file_id}")
 
             # 检查是否已初始化进度,避免重复初始化
-            existing_progress = await self.progress_manager.get_progress(self.callback_task_id)
-            if not existing_progress:
-                logger.warning(f"文档处理工作流未找到进度数据: {self.callback_task_id}")
-
-            await self.progress_manager.update_stage_progress(
-                callback_task_id=self.callback_task_id,
-                stage_name="文档解析",
-                current=100,
-                status="docu_ans_completed",
-                message="开始文档解析"
-            )
+            if self.progress_manager:
+                existing_progress = await self.progress_manager.get_progress(self.callback_task_id)
+                if not existing_progress:
+                    logger.warning(f"文档处理工作流未找到进度数据: {self.callback_task_id}")
+
+                await self.progress_manager.update_stage_progress(
+                    callback_task_id=self.callback_task_id,
+                    stage_name="文档解析",
+                    current=100,
+                    status="docu_ans_completed",
+                    message="开始文档解析"
+                )
 
             structured_content = await self.document_processor.process_document(
                 file_content=file_content,
@@ -69,7 +70,7 @@ class DocumentWorkflow:
             return result
 
         except Exception as e:
-            logger.error(f"文档处理工作流失败: {str(e)}")
+            logger.error(f"文档处理工作流失败: {str(e)}", exc_info=True)
 
             # 更新错误状态
             if self.progress_manager:

+ 66 - 5
core/construction_review/workflows/report_workflow.py

@@ -95,17 +95,78 @@ class ReportWorkflow:
         except Exception as e:
             logger.error(f"报告生成工作流失败: {str(e)}", exc_info=True)
 
-            # 更新错误状态
+            # 更新错误状态(标记为完成,但使用降级报告)
             if self.progress_manager:
                 await self.progress_manager.update_stage_progress(
                     callback_task_id=self.callback_task_id,
                     stage_name="报告生成",
-                    current=90,
-                    status="failed",
-                    message=f"报告生成失败: {str(e)}"
+                    current=95,
+                    status="completed",
+                    message=f"报告生成使用了降级方案(LLM服务暂时不可用)"
                 )
 
-            raise
+            # 返回降级报告,不让任务完全失败
+            fallback_report = self._generate_fallback_report()
+            logger.warning(f"返回降级报告,文件ID: {self.file_id}")
+            return fallback_report
+
+    def _generate_fallback_report(self) -> dict:
+        """
+        生成降级报告(当 LLM 报告生成失败时使用)
+        
+        Returns:
+            dict: 降级报告字典
+        """
+        
+        # 从 AI 审查结果中提取问题统计
+        review_results = self.ai_review_results.get('review_results', [])
+        high_count = 0
+        medium_count = 0
+        low_count = 0
+        
+        for issue_wrapper in review_results:
+            for issue_id, issue_detail in issue_wrapper.items():
+                review_lists = issue_detail.get('review_lists', [])
+                for review_item in review_lists:
+                    if not review_item.get('exist_issue', False):
+                        continue
+                    check_result = review_item.get('check_result', {})
+                    if isinstance(check_result, str):
+                        continue
+                    risk_level = check_result.get('risk_level', '').lower()
+                    if '高' in risk_level or 'high' in risk_level:
+                        high_count += 1
+                    elif '中' in risk_level or 'medium' in risk_level:
+                        medium_count += 1
+                    else:
+                        low_count += 1
+        
+        total_issues = high_count + medium_count + low_count
+        
+        return {
+            'file_id': self.file_id,
+            'file_name': self.file_name,
+            'total_issues': total_issues,
+            'risk_stats': {
+                'high': high_count,
+                'medium': medium_count,
+                'low': low_count
+            },
+            'overall_assessment': f"【降级报告】文档《{self.file_name}》的审查已完成。共发现 {total_issues} 个问题,其中高风险 {high_count} 个,中风险 {medium_count} 个,低风险 {low_count} 个。(注:由于 LLM 服务暂时不可用,此报告为自动生成的简要统计)",
+            'high_risk_summary': ["请查看详细审查结果了解高风险问题"] if high_count > 0 else ["未发现高风险问题"],
+            'medium_risk_summary': ["请查看详细审查结果了解中风险问题"] if medium_count > 0 else ["未发现中风险问题"],
+            'improvement_recommendations': [
+                f"优先处理 {high_count} 个高风险问题" if high_count > 0 else "文档整体质量良好",
+                f"建议处理 {medium_count} 个中风险问题" if medium_count > 0 else None,
+                "【系统提示】报告摘要生成服务暂时不可用,以上为自动生成的简要统计"
+            ],
+            'risk_alerts': [
+                f"⚠️ 发现 {high_count} 个高风险问题,需要关注" if high_count > 0 else None,
+                f"⚠️ 发现 {medium_count} 个中风险问题" if medium_count > 0 else None
+            ],
+            'generated_at': datetime.now().isoformat(),
+            'is_fallback': True  # 标记为降级报告
+        }
 
     def _convert_report_to_dict(self, final_report) -> dict:
         """

+ 48 - 6
core/construction_write/component/outline_generator.py

@@ -513,11 +513,17 @@ class OutlineGenerator:
         Returns:
             bool: 有终止信号返回 True
         """
-        # 从 progress_manager 或外部获取终止信号
-        # 实际实现时需要从 Redis 读取
-        callback_task_id = state["callback_task_id"]
-        # 模拟检查(实际实现时调用 Redis)
-        return False
+        callback_task_id = state.get("callback_task_id", "")
+        if not callback_task_id:
+            return False
+
+        try:
+            # 从 Redis 检查终止信号
+            from core.base.workflow_manager import workflow_manager
+            return await workflow_manager.check_outline_terminate_signal(callback_task_id)
+        except Exception as e:
+            logger.error(f"检查终止信号失败: {str(e)}")
+            return False
 
     def _create_terminated_state(self, state: OutlineGenerationState, stage: str) -> Dict[str, Any]:
         """
@@ -760,6 +766,11 @@ class OutlineGenerator:
 
         logger.info(f"[章节生成] trace_id: {trace_id}, 开始生成 {current_index} {title}")
 
+        # 检查终止信号 - 在章节生成开始前
+        if await self._check_terminate_by_trace_id(trace_id):
+            logger.warning(f"[章节生成] trace_id: {trace_id}, 检测到终止信号,停止生成 {current_index} {title}")
+            raise asyncio.CancelledError(f"任务被终止: {current_index} {title}")
+
         # 1. 生成当前章节的 content
         chapter_content = await self._generate_chapter_content(
             trace_id=trace_id,
@@ -769,6 +780,11 @@ class OutlineGenerator:
             level=level
         )
 
+        # 检查终止信号 - 在 LLM 调用后
+        if await self._check_terminate_by_trace_id(trace_id):
+            logger.warning(f"[章节生成] trace_id: {trace_id}, LLM调用后检测到终止信号,停止生成 {current_index} {title}")
+            raise asyncio.CancelledError(f"任务被终止: {current_index} {title}")
+
         # 2. 构建基础节点结构
         result_node = {
             "index": current_index,
@@ -797,6 +813,12 @@ class OutlineGenerator:
 
             child_results = await asyncio.gather(*child_tasks, return_exceptions=True)
 
+            # 检查是否有取消异常,向上传播
+            for result in child_results:
+                if isinstance(result, asyncio.CancelledError):
+                    logger.warning(f"[章节生成] trace_id: {trace_id}, 子章节生成被取消,向上传播")
+                    raise result
+
             # 过滤异常并添加子节点
             result_node["children"] = [
                 r for r in child_results if not isinstance(r, Exception)
@@ -871,7 +893,6 @@ class OutlineGenerator:
                     stage_name="大纲生成",
                     status="processing",
                     message=f"正在生成章节 [{chapter_index}] {chapter_title}...",
-                    event_type="outline_chapter_progress",
                     current=current_progress
                 )
                 logger.info(f"[章节进度] task_id={trace_id}, 章节 [{chapter_index}] {chapter_title} 生成完成, 进度={current_progress}%")
@@ -908,6 +929,27 @@ class OutlineGenerator:
             return "_".join(result_parts)
         return trace_id
 
+    async def _check_terminate_by_trace_id(self, trace_id: str) -> bool:
+        """
+        根据 trace_id 检查是否有终止信号
+
+        Args:
+            trace_id: 追踪ID
+
+        Returns:
+            bool: 有终止信号返回 True
+        """
+        callback_task_id = self._extract_callback_task_id(trace_id)
+        if not callback_task_id:
+            return False
+
+        try:
+            from core.base.workflow_manager import workflow_manager
+            return await workflow_manager.check_outline_terminate_signal(callback_task_id)
+        except Exception as e:
+            logger.error(f"检查终止信号失败: {str(e)}")
+            return False
+
     async def _generate_chapter_content(
         self,
         trace_id: str,

+ 12 - 0
core/construction_write/workflows/agent.py

@@ -0,0 +1,12 @@
+import time
+
+
+def agent_test():
+    def agent_run(ACTIONS=None):
+        print("Agent is running...")
+    
+    while True:
+        agent_run(ACTIONS=None)
+        time.sleep(5)
+        break
+    print("This is a test function for the agent workflow.")

+ 59 - 0
demo.py

@@ -0,0 +1,59 @@
+import requests
+
+# 目标请求地址
+url = 'http://183.220.37.46:23424/file_parse'
+
+# 请求头
+headers = {
+    'accept': 'application/json'
+}
+
+# 构建表单数据和文件
+files = {
+    # 各种表单参数
+    'return_middle_json': (None, 'false'),
+    'return_model_output': (None, 'false'),
+    'return_md': (None, 'true'),
+    'return_images': (None, 'false'),
+    'end_page_id': (None, '99999'),
+    'parse_method': (None, 'auto'),
+    'start_page_id': (None, '0'),
+    'lang_list': (None, 'ch'),
+    'output_dir': (None, './output'),
+    'server_url': (None, 'string'),
+    'return_content_list': (None, 'false'),
+    'backend': (None, 'hybrid-auto-engine'),
+    'table_enable': (None, 'true'),
+    'response_format_zip': (None, 'false'),
+    'formula_enable': (None, 'true'),
+    # 文件上传字段(核心)
+    'files': open(r'/D:/wx_work/sichuan_luqiao/lu_sgsc_testfile/施工方案/301_四川川交路桥有限责任公司秦唐高速公路唐山段ZT1合同项目部.pdf', 'rb')
+}
+
+try:
+    # 发送 POST 请求
+    response = requests.post(
+        url=url,
+        headers=headers,
+        files=files,
+        # 设置超时时间,避免请求一直挂起
+        timeout=600
+    )
+    
+    # 打印响应结果
+    print(f"响应状态码: {response.status_code}")
+    print(f"响应内容: {response.json()}")
+
+except FileNotFoundError as e:
+    print(f"错误:文件未找到 - {e}")
+except requests.exceptions.Timeout as e:
+    print(f"错误:请求超时 - {e}")
+except requests.exceptions.RequestException as e:
+    print(f"错误:请求失败 - {e}")
+except ValueError as e:
+    print(f"错误:响应不是有效的JSON - {e}")
+    print(f"原始响应内容: {response.text}")
+finally:
+    # 确保文件句柄关闭
+    if 'files' in locals() and hasattr(files['files'], 'close'):
+        files['files'].close()

+ 132 - 0
track/observations/list_directory_a62ffe4b.txt

@@ -0,0 +1,132 @@
+{
+  "success": true,
+  "items": [
+    {
+      "name": "build_graph_app.png",
+      "type": "file",
+      "size": 15899,
+      "modified": 1766145970.1171029
+    },
+    {
+      "name": "config",
+      "type": "directory",
+      "size": 0,
+      "modified": 1772158458.5923946
+    },
+    {
+      "name": "core",
+      "type": "directory",
+      "size": 0,
+      "modified": 1770189404.0749247
+    },
+    {
+      "name": "data_pipeline",
+      "type": "directory",
+      "size": 0,
+      "modified": 1772158458.5720415
+    },
+    {
+      "name": "demo.py",
+      "type": "file",
+      "size": 1910,
+      "modified": 1772790245.5252242
+    },
+    {
+      "name": "docker",
+      "type": "directory",
+      "size": 0,
+      "modified": 1763961375.6183474
+    },
+    {
+      "name": "Dockerfile",
+      "type": "file",
+      "size": 932,
+      "modified": 1766145969.1688297
+    },
+    {
+      "name": "foundation",
+      "type": "directory",
+      "size": 0,
+      "modified": 1772161997.2386956
+    },
+    {
+      "name": "logs",
+      "type": "directory",
+      "size": 0,
+      "modified": 1772189940.7138743
+    },
+    {
+      "name": "README.md",
+      "type": "file",
+      "size": 3309,
+      "modified": 1766561851.7637222
+    },
+    {
+      "name": "README_deploy.md",
+      "type": "file",
+      "size": 3000,
+      "modified": 1772158458.5911317
+    },
+    {
+      "name": "README_test.md",
+      "type": "file",
+      "size": 95,
+      "modified": 1766561851.7637222
+    },
+    {
+      "name": "requirements.txt",
+      "type": "file",
+      "size": 9910,
+      "modified": 1766145969.344908
+    },
+    {
+      "name": "run.sh",
+      "type": "file",
+      "size": 1822,
+      "modified": 1763961375.6193478
+    },
+    {
+      "name": "server",
+      "type": "directory",
+      "size": 0,
+      "modified": 1772161997.2614875
+    },
+    {
+      "name": "temp",
+      "type": "directory",
+      "size": 0,
+      "modified": 1772777344.579789
+    },
+    {
+      "name": "todo.md",
+      "type": "file",
+      "size": 18949,
+      "modified": 1767431459.7554548
+    },
+    {
+      "name": "track",
+      "type": "directory",
+      "size": 0,
+      "modified": 1772791938.2912004
+    },
+    {
+      "name": "utils_test",
+      "type": "directory",
+      "size": 0,
+      "modified": 1767958435.8818636
+    },
+    {
+      "name": "views",
+      "type": "directory",
+      "size": 0,
+      "modified": 1772161997.2614875
+    },
+    {
+      "name": "__init__.py",
+      "type": "file",
+      "size": 0,
+      "modified": 1766145970.2239406
+    }
+  ],
+  "count": 21
+}

La diferencia del archivo ha sido suprimido porque es demasiado grande
+ 2 - 0
track/observations/read_file_791b3a3a.txt


La diferencia del archivo ha sido suprimido porque es demasiado grande
+ 2 - 0
track/observations/read_file_918670c7.txt


La diferencia del archivo ha sido suprimido porque es demasiado grande
+ 2 - 0
track/observations/read_file_998abacd.txt


La diferencia del archivo ha sido suprimido porque es demasiado grande
+ 2 - 0
track/observations/read_file_b6948d8b.txt


La diferencia del archivo ha sido suprimido porque es demasiado grande
+ 2 - 0
track/observations/read_file_e15321d0.txt


La diferencia del archivo ha sido suprimido porque es demasiado grande
+ 2 - 0
track/observations/read_file_ee5a4ec3.txt


+ 217 - 0
track/observations/search_content_1047433b.txt

@@ -0,0 +1,217 @@
+{
+  "success": true,
+  "matches": [
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\ai_review_engine.py",
+      "line_number": 17,
+      "line_content": "├── basic_compliance_check()      # 基础合规性检查 (语法/语义/完整性)",
+      "context": "\n🏗️ 核心审查流程:\n├── basic_compliance_check()      # 基础合规性检查 (语法/语义/完整性)\n├── technical_compliance_check()  # 技术性合规检查 (标准/设计/参数)\n├── rag_enhanced_check()          # RAG增强审查 (向量/混合检索)\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\ai_review_engine.py",
+      "line_number": 25,
+      "line_content": "├── check_completeness()          # 完整性检查",
+      "context": "├── check_grammar()               # 词句语法检查\n├── check_semantic_logic()        # 语义逻辑检查\n├── check_completeness()          # 完整性检查\n├── check_mandatory_standards()   # 强制性标准检查\n├── check_design_values()         # 设计值检查\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\ai_review_engine.py",
+      "line_number": 658,
+      "line_content": "        完整性检查",
+      "context": "                               state: str, stage_name: str) -> Dict[str, Any]:\n        \"\"\"\n        完整性检查\n\n        Args:\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\ai_review_engine.py",
+      "line_number": 667,
+      "line_content": "            Dict[str, Any]: 完整性检查结果",
+      "context": "\n        Returns:\n            Dict[str, Any]: 完整性检查结果\n        \"\"\"\n\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\ai_review_engine.py",
+      "line_number": 750,
+      "line_content": "            logger.info(f\"[完整性检查] 准备将大纲审查结果存储到Redis,bind_id: {trace_id_idx}\")",
+      "context": "            review_results_df.to_csv(str(Path(\"temp\") / f'{trace_id_idx}_completeness_review_results.csv'), encoding='utf-8-sig', index=False)\n            # 将审查结果存储到Redis,供 outline_check 使用\n            logger.info(f\"[完整性检查] 准备将大纲审查结果存储到Redis,bind_id: {trace_id_idx}\")\n            from .reviewers.check_completeness.utils.redis_csv_utils import df_store_to_redis\n            df_store_to_redis(self.redis_client, data=review_results_df, bind_id=trace_id_idx)\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\ai_review_engine.py",
+      "line_number": 753,
+      "line_content": "            logger.info(f\"[完整性检查] 数据已成功存储到Redis,bind_id: {trace_id_idx}\")",
+      "context": "            from .reviewers.check_completeness.utils.redis_csv_utils import df_store_to_redis\n            df_store_to_redis(self.redis_client, data=review_results_df, bind_id=trace_id_idx)\n            logger.info(f\"[完整性检查] 数据已成功存储到Redis,bind_id: {trace_id_idx}\")\n\n            df_filtered = review_results_df.drop_duplicates(subset='title', keep='first').reset_index(drop=True)\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\report_generator.py",
+      "line_number": 325,
+      "line_content": "            'completeness_check': '完整性审查',",
+      "context": "        check_item_names = {\n            'timeliness_check': '时效性审查',\n            'completeness_check': '完整性审查',\n            'semantic_logic_check': '语义逻辑审查',\n            'reference_check': '参考文献审查',\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\workflows\\ai_review_workflow.py",
+      "line_number": 367,
+      "line_content": "            # 筛选完整性存在完整性审查的分类,将其整章进行合并",
+      "context": "            # ]\n\n            # 筛选完整性存在完整性审查的分类,将其整章进行合并\n            filtered_chunks = self.core_fun._merge_chunks_for_completeness_check(\n                filtered_chunks, review_item_dict_sorted\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\doc_worker\\docx_worker\\text_splitter.py",
+      "line_number": 7,
+      "line_content": "3. 对超过最大字符数的块按段落-句子进行再次切分,保持语义完整性",
+      "context": "1. 跳过目录页,只在正文中定位章节标题\n2. 按最低目录层级进行切分,形成章节块\n3. 对超过最大字符数的块按段落-句子进行再次切分,保持语义完整性\n\"\"\"\n\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\doc_worker\\pdf_worker\\text_splitter.py",
+      "line_number": 7,
+      "line_content": "3. 对超过最大字符数的块按段落-句子进行再次切分,保持语义完整性",
+      "context": "1. 跳过目录页,只在正文中定位章节标题\n2. 按最低目录层级进行切分,形成章节块\n3. 对超过最大字符数的块按段落-句子进行再次切分,保持语义完整性\n4. 支持层级路径构建和子标题查找\n\"\"\"\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\catalogues_check\\catalogues_check.py",
+      "line_number": 559,
+      "line_content": "                    \"suggestion\": f\"目录缺失:目录中缺失'{missing_item}'这个小节;当前章节仅涉及'{title if title else chapter_label}',目录中未体现'{missing_item}'相关内容;整改建议:建议在目录中补充'{missing_item}'相关内容,确保目录完整性。\",",
+      "context": "                    \"issue_point\": f\"{missing_item}缺失\",\n                    \"location\": title if title else chapter_label,\n                    \"suggestion\": f\"目录缺失:目录中缺失'{missing_item}'这个小节;当前章节仅涉及'{title if title else chapter_label}',目录中未体现'{missing_item}'相关内容;整改建议:建议在目录中补充'{missing_item}'相关内容,确保目录完整性。\",\n                    \"reason\": f\"该章节应具备要点:{specification_items_text}\" if specification_items_text else \"\",\n                    \"risk_level\": \"高风险\",\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\catalogues_check\\catalogues_check.py",
+      "line_number": 585,
+      "line_content": "                    \"suggestion\": f\"大纲缺失:大纲中缺失'{miss_outline}'这个小节;当前章节仅涉及'{title if title else chapter_label}',大纲中未涵盖'{miss_outline}'相关内容;整改建议:建议在大纲中补充'{miss_outline}'相关内容,确保大纲完整性。\",",
+      "context": "                    \"issue_point\": f\"{miss_outline}缺失\",\n                    \"location\": title if title else chapter_label,\n                    \"suggestion\": f\"大纲缺失:大纲中缺失'{miss_outline}'这个小节;当前章节仅涉及'{title if title else chapter_label}',大纲中未涵盖'{miss_outline}'相关内容;整改建议:建议在大纲中补充'{miss_outline}'相关内容,确保大纲完整性。\",\n                    \"reason\": f\"该章节应具备要点:{specification_items_text}\" if specification_items_text else \"\",\n                    \"risk_level\": \"高风险\",\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\utils\\punctuation_checker.py",
+      "line_number": 53,
+      "line_content": "- 仅检查包裹的**完整性**:书名号是否包裹了规范名称的全部内容;括号是否包裹了编号的全部内容",
+      "context": "\n【判断原则】\n- 仅检查包裹的**完整性**:书名号是否包裹了规范名称的全部内容;括号是否包裹了编号的全部内容\n- 中文括号()和英文括号()混用视为正常,不区分\n- 若内容在符号外遗漏,或符号包裹了多余内容,则判定为false\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\utils\\punctuation_checker.py",
+      "line_number": 119,
+      "line_content": "    检查规范文本中的书名号和括号使用是否正确,先进行成对预检,再用LLM判断包裹完整性",
+      "context": "async def check_punctuation(items: List[str]) -> str:\n    \"\"\"\n    检查规范文本中的书名号和括号使用是否正确,先进行成对预检,再用LLM判断包裹完整性\n    \n    Args:\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\utils\\punctuation_checker.py",
+      "line_number": 132,
+      "line_content": "    llm_inputs = []   # 需要LLM判定包裹完整性的文本",
+      "context": "    # 1) 预检:是否存在且成对出现\n    pre_results = []  # 预填结果,若需LLM再补充\n    llm_inputs = []   # 需要LLM判定包裹完整性的文本\n\n    for text in items:\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\workflows\\core_functions\\ai_review_core_fun.py",
+      "line_number": 266,
+      "line_content": "        # 只有非完整性审查的chunk才执行RAG检索(注意括号位置,确保运算符优先级正确)",
+      "context": "        is_complete_field = chunk.get('is_complete_field', False)\n        logger.info(f\"检查is_complete_field值是否正常: {is_complete_field}\")\n        # 只有非完整性审查的chunk才执行RAG检索(注意括号位置,确保运算符优先级正确)\n        if ('check_parameter_compliance' in func_names or 'check_non_parameter_compliance' in func_names) and not is_complete_field:\n            logger.debug(\"开始执行RAG检索增强\")\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\workflows\\core_functions\\ai_review_core_fun.py",
+      "line_number": 1223,
+      "line_content": "        筛选包含完整性审查的分类,将其整章进行合并",
+      "context": "    ) -> List[Dict[str, Any]]:\n        \"\"\"\n        筛选包含完整性审查的分类,将其整章进行合并\n\n        Args:\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\workflows\\core_functions\\ai_review_core_fun.py",
+      "line_number": 1243,
+      "line_content": "            # 1. 找出包含完整性审查的章节分类",
+      "context": "        \"\"\"\n        try:\n            # 1. 找出包含完整性审查的章节分类\n            completeness_chapters = set()\n            for chapter_code, func_names in review_item_dict.items():\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\workflows\\core_functions\\ai_review_core_fun.py",
+      "line_number": 1250,
+      "line_content": "                logger.info(\"没有包含完整性审查的章节,无需合并\")",
+      "context": "\n            if not completeness_chapters:\n                logger.info(\"没有包含完整性审查的章节,无需合并\")\n                return chunks\n\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\workflows\\core_functions\\ai_review_core_fun.py",
+      "line_number": 1253,
+      "line_content": "            logger.info(f\"包含完整性审查的章节分类: {completeness_chapters}\")",
+      "context": "                return chunks\n\n            logger.info(f\"包含完整性审查的章节分类: {completeness_chapters}\")\n\n            # 2. 筛选出需要合并的chunks(属于完整性审查章节的)\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\workflows\\core_functions\\ai_review_core_fun.py",
+      "line_number": 1255,
+      "line_content": "            # 2. 筛选出需要合并的chunks(属于完整性审查章节的)",
+      "context": "            logger.info(f\"包含完整性审查的章节分类: {completeness_chapters}\")\n\n            # 2. 筛选出需要合并的chunks(属于完整性审查章节的)\n            chunks_to_merge = []\n            for chunk in chunks:\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\foundation\\ai\\models\\model_handler.py",
+      "line_number": 341,
+      "line_content": "            # 验证配置完整性",
+      "context": "            doubao_api_key = self.config.get(\"doubao\", \"DOUBAO_API_KEY\")\n\n            # 验证配置完整性\n            if not all([doubao_url, doubao_model_id, doubao_api_key]):\n                missing = []\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\foundation\\ai\\models\\model_handler.py",
+      "line_number": 390,
+      "line_content": "            # 验证配置完整性",
+      "context": "            qwen_api_key = self.config.get(\"qwen\", \"QWEN_API_KEY\")\n\n            # 验证配置完整性\n            if not all([qwen_url, qwen_model_id, qwen_api_key]):\n                missing = []\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\foundation\\ai\\models\\model_handler.py",
+      "line_number": 439,
+      "line_content": "            # 验证配置完整性",
+      "context": "            qwen3_30b_api_key = self.config.get(\"qwen3_30b\", \"QWEN3_30B_API_KEY\")\n\n            # 验证配置完整性\n            if not all([qwen3_30b_url, qwen3_30b_model_id, qwen3_30b_api_key]):\n                missing = []\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\foundation\\ai\\models\\model_handler.py",
+      "line_number": 488,
+      "line_content": "            # 验证配置完整性",
+      "context": "            deepseek_api_key = self.config.get(\"deepseek\", \"DEEPSEEK_API_KEY\")\n\n            # 验证配置完整性\n            if not all([deepseek_url, deepseek_model_id, deepseek_api_key]):\n                missing = []\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\foundation\\ai\\models\\model_handler.py",
+      "line_number": 537,
+      "line_content": "            # 验证配置完整性",
+      "context": "            gemini_api_key = self.config.get(\"gemini\", \"GEMINI_API_KEY\")\n\n            # 验证配置完整性\n            if not all([gemini_url, gemini_model_id, gemini_api_key]):\n                missing = []\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\foundation\\ai\\models\\model_handler.py",
+      "line_number": 617,
+      "line_content": "            # 验证配置完整性",
+      "context": "            api_key = self.config.get(\"lq_qwen3_8B_lora\", \"LQ_QWEN3_8B_LQ_LORA_API_KEY\", \"dummy\")\n\n            # 验证配置完整性\n            if not all([server_url, model_id]):\n                missing = []\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\foundation\\ai\\models\\model_handler.py",
+      "line_number": 762,
+      "line_content": "            # 验证配置完整性",
+      "context": "            dimensions = self.config.get(\"siliconflow_embed\", \"SLCF_EMBED_DIMENSIONS\", \"4096\")\n\n            # 验证配置完整性\n            if not all([server_url, api_key, model_id]):\n                missing = []\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\views\\construction_review\\launch_review.py",
+      "line_number": 93,
+      "line_content": "        'completeness_check',         # 条文完整性审查",
+      "context": "        'sensitive_word_check',       # 词句语法检查\n        'semantic_logic_check',       # 语义逻辑审查\n        'completeness_check',         # 条文完整性审查\n        'timeliness_check',           # 时效性审查\n        'reference_check',            # 规范性审查\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\views\\construction_review\\launch_review.py",
+      "line_number": 178,
+      "line_content": "    catalogue_invalid = []  # 目录章节使用了非完整性审查",
+      "context": "    invalid_chapter = []  # 章节code不支持\n    invalid_review = []   # 审查项code不支持\n    catalogue_invalid = []  # 目录章节使用了非完整性审查\n\n    for item in review_item_config:\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\views\\construction_review\\launch_review.py",
+      "line_number": 193,
+      "line_content": "        # 5. 特殊规则:目录章节只能使用完整性审查",
+      "context": "            continue  # 章节不支持时不继续检查审查项\n\n        # 5. 特殊规则:目录章节只能使用完整性审查\n        if chapter_code == \"catalogue\" and review_dim != \"completeness_check\":\n            catalogue_invalid.append(item)\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\views\\construction_review\\schemas\\error_schemas.py",
+      "line_number": 227,
+      "line_content": "        \"message\": \"目录章节仅支持完整性审查\",",
+      "context": "        \"code\": \"QDSC021\",\n        \"error_type\": \"CATALOGUE_COMPLETENESS_ONLY\",\n        \"message\": \"目录章节仅支持完整性审查\",\n        \"status_code\": 400\n    }\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\views\\construction_review\\schemas\\error_schemas.py",
+      "line_number": 516,
+      "line_content": "        \"\"\"目录章节仅支持完整性审查\"\"\"",
+      "context": "    @staticmethod\n    def catalogue_completeness_only(invalid_items: list = None):\n        \"\"\"目录章节仅支持完整性审查\"\"\"\n        logger.error(f\"目录章节使用了非完整性审查: {invalid_items}\")\n        message = f\"参数错误:目录章节(catalogue)仅支持完整性审查(completeness_check)。无效配置项: {invalid_items}\"\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\views\\construction_review\\schemas\\error_schemas.py",
+      "line_number": 517,
+      "line_content": "        logger.error(f\"目录章节使用了非完整性审查: {invalid_items}\")",
+      "context": "    def catalogue_completeness_only(invalid_items: list = None):\n        \"\"\"目录章节仅支持完整性审查\"\"\"\n        logger.error(f\"目录章节使用了非完整性审查: {invalid_items}\")\n        message = f\"参数错误:目录章节(catalogue)仅支持完整性审查(completeness_check)。无效配置项: {invalid_items}\"\n        return create_http_exception(ErrorCodes.QDSC021, message)\n"
+    },
+    {
+      "file": "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\views\\construction_review\\schemas\\error_schemas.py",
+      "line_number": 518,
+      "line_content": "        message = f\"参数错误:目录章节(catalogue)仅支持完整性审查(completeness_check)。无效配置项: {invalid_items}\"",
+      "context": "        \"\"\"目录章节仅支持完整性审查\"\"\"\n        logger.error(f\"目录章节使用了非完整性审查: {invalid_items}\")\n        message = f\"参数错误:目录章节(catalogue)仅支持完整性审查(completeness_check)。无效配置项: {invalid_items}\"\n        return create_http_exception(ErrorCodes.QDSC021, message)\n\n"
+    }
+  ],
+  "count": 35,
+  "has_more": false
+}

+ 107 - 0
track/observations/search_files_1a59aac5.txt

@@ -0,0 +1,107 @@
+{
+  "success": true,
+  "files": [
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\__init__.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\base\\progress_manager.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\base\\redis_duplicate_checker.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\base\\sse_manager.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\base\\task_models.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\base\\workflow_manager.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\base\\__init__.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\__init__.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_write\\__init__.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\base\\words_detect\\__init__.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\base\\words_detect\\core\\prompt_builder.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\base\\words_detect\\core\\reviewer.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\base\\words_detect\\examples\\example.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\ai_review_engine.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\constants.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\document_processor.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\report_generator.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\__init__.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\workflows\\ai_review_workflow.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\workflows\\document_workflow.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\workflows\\report_workflow.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\workflows\\__init__.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\doc_worker\\interfaces.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\doc_worker\\pipeline.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\doc_worker\\__init__.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\infrastructure\\milvus.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\infrastructure\\parent_tool.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\infrastructure\\relevance.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\report\\__init__.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\base_reviewer.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\outline_check.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\reference_basis_reviewer.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\semantic_logic.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\sensitive_word_check.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\timeliness_basis_reviewer.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\__init__.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\doc_worker\\classification\\chunk_classifier.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\doc_worker\\classification\\hierarchy_classifier.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\doc_worker\\classification\\__init__.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\doc_worker\\config\\provider.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\doc_worker\\docx_worker\\cli.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\doc_worker\\docx_worker\\full_text_extractor.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\doc_worker\\docx_worker\\pipeline.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\doc_worker\\docx_worker\\text_splitter.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\doc_worker\\docx_worker\\toc_extractor.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\doc_worker\\docx_worker\\__init__.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\doc_worker\\pdf_worker\\adapter.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\doc_worker\\pdf_worker\\batch_cli.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\doc_worker\\pdf_worker\\classifier.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\doc_worker\\pdf_worker\\cli.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\doc_worker\\pdf_worker\\fulltext_extractor.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\doc_worker\\pdf_worker\\json_writer.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\doc_worker\\pdf_worker\\text_splitter.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\doc_worker\\pdf_worker\\toc_extractor.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\doc_worker\\pdf_worker\\__init__.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\doc_worker\\utils\\json_writer.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\doc_worker\\utils\\llm_client.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\doc_worker\\utils\\prompt_loader.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\doc_worker\\utils\\text_split_support.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\doc_worker\\utils\\title_matcher.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\doc_worker\\utils\\toc_level_identifier.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\doc_worker\\utils\\toc_pattern_matcher.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\catalogues_check\\catalogues_check.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\check_completeness\\interfaces.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\check_completeness\\main.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\utils\\ac_automaton.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\utils\\directory_extraction.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\utils\\inter_tool.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\utils\\prompt_loader.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\utils\\punctuation_checker.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\utils\\punctuation_result_processor.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\utils\\reference_matcher.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\utils\\sensitive_word_checker.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\utils\\text_split.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\utils\\timeliness_determiner.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\utils\\__init__.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\catalogues_check\\utils\\redis_utils.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\catalogues_check\\utils\\__init__.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\check_completeness\\components\\data_loader.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\check_completeness\\components\\keyword_checker.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\check_completeness\\components\\llm_client.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\check_completeness\\components\\prompt_builder.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\check_completeness\\components\\result_analyzer.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\check_completeness\\components\\result_processor.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\check_completeness\\components\\result_saver.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\check_completeness\\components\\review_pipeline.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\check_completeness\\components\\__init__.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\check_completeness\\utils\\file_utils.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\check_completeness\\utils\\redis_csv_utils.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\check_completeness\\utils\\yaml_utils.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\check_completeness\\utils\\__init__.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\utils\\llm_chain_client\\bootstrap.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\utils\\llm_chain_client\\main.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\utils\\llm_chain_client\\__init__.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\utils\\llm_chain_client\\implementations\\__init__.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\utils\\llm_chain_client\\interfaces\\chain_executor.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\utils\\llm_chain_client\\interfaces\\llm_client.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\utils\\llm_chain_client\\interfaces\\prompt_loader.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\utils\\llm_chain_client\\interfaces\\__init__.py",
+    "D:\\wx_work\\sichuan_luqiao\\LQAgentPlatform\\core\\construction_review\\component\\reviewers\\utils\\llm_chain_client\\orchestration\\prompt_chain_processor.py"
+  ],
+  "count": 100,
+  "has_more": true
+}

La diferencia del archivo ha sido suprimido porque es demasiado grande
+ 25 - 0
track/tracking_20260306_181157_64c82f4e.json


La diferencia del archivo ha sido suprimido porque es demasiado grande
+ 25 - 0
track/tracking_20260306_181214_a9e464c4.json


La diferencia del archivo ha sido suprimido porque es demasiado grande
+ 25 - 0
track/tracking_20260306_181218_e1d6c6c0.json


La diferencia del archivo ha sido suprimido porque es demasiado grande
+ 25 - 0
track/tracking_20260306_181508_35c4b4da.json


La diferencia del archivo ha sido suprimido porque es demasiado grande
+ 25 - 0
track/tracking_20260306_181511_a57ff3cb.json


Algunos archivos no se mostraron porque demasiados archivos cambiaron en este cambio