Преглед на файлове

v0.0.8-功能优化
- 降低专业性审查召回块
- 优化性能
- 调试完整性审查、时效性审查、专业性审查输出字段对齐

WangXuMing преди 1 месец
родител
ревизия
2dcf01f971
променени са 26 файла, в които са добавени 3750 реда и са изтрити 491 реда
  1. 8 0
      core/base/workflow_manager.py
  2. 220 140
      core/construction_review/component/ai_review_engine.py
  3. 0 50
      core/construction_review/component/doc_worker/config/Construction_Plan_Content_Specification.csv
  4. 1 1
      core/construction_review/component/doc_worker/config/StandardCategoryTable.csv
  5. 9 0
      core/construction_review/component/doc_worker/config/config.yaml
  6. 345 68
      core/construction_review/component/doc_worker/docx_worker/toc_extractor.py
  7. 124 42
      core/construction_review/component/doc_worker/pdf_worker/fulltext_extractor.py
  8. 153 11
      core/construction_review/component/document_processor.py
  9. 108 65
      core/construction_review/component/reviewers/catalogues_check/catalogues_check.py
  10. BIN
      core/construction_review/component/reviewers/catalogues_check/config/Construction_Plan_Content_Specification.csv
  11. 36 12
      core/construction_review/component/reviewers/check_completeness/completeness_checker.py
  12. 15 3
      core/construction_review/component/reviewers/check_completeness/components/result_analyzer.py
  13. 50 9
      core/construction_review/component/reviewers/check_completeness/lightweight_completeness_checker.py
  14. 39 15
      core/construction_review/component/reviewers/check_completeness/tertiary_completeness_checker.py
  15. 21 17
      core/construction_review/component/reviewers/utils/inter_tool.py
  16. 76 10
      core/construction_review/component/reviewers/utils/reference_matcher.py
  17. 64 20
      core/construction_review/workflows/ai_review_workflow.py
  18. 18 2
      core/construction_review/workflows/core_functions/ai_review_core_fun.py
  19. 19 2
      foundation/ai/agent/generate/model_generate.py
  20. 40 9
      foundation/ai/rag/retrieval/entities_enhance.py
  21. 2 2
      foundation/ai/rag/retrieval/retrieval.py
  22. 2 2
      foundation/infrastructure/messaging/celery_app.py
  23. 58 11
      server/app.py
  24. 142 0
      utils_test/Result_Visual_Observation_Tools/README.md
  25. 1283 0
      utils_test/Result_Visual_Observation_Tools/advanced_viewer.html
  26. 917 0
      utils_test/Result_Visual_Observation_Tools/index.html

+ 8 - 0
core/base/workflow_manager.py

@@ -948,6 +948,10 @@ class WorkflowManager:
             }
 
             # 使用 cache_manager 保存(指定文件名)
+            import os
+            target_dir = os.path.join(CacheBaseDir.CONSTRUCTION_REVIEW.value, "final_result")
+            logger.info(f"准备保存结果到目录: {target_dir}")
+            
             file_path = cache.save(
                 complete_results,
                 subdir="final_result",
@@ -956,6 +960,10 @@ class WorkflowManager:
             )
 
             logger.info(f"完整结果已保存到: {file_path}")
+            
+            # 验证文件是否保存到正确位置
+            if "final_result" not in str(file_path):
+                logger.warning(f"警告:结果文件可能未保存到正确的final_result目录: {file_path}")
 
         except Exception as e:
             logger.error(f"保存完整结果失败: {str(e)}", exc_info=True)

+ 220 - 140
core/construction_review/component/ai_review_engine.py

@@ -81,14 +81,7 @@ import json
 
 
 
-from .reviewers.check_completeness.components.data_loader import CSVDataLoader
-from .reviewers.check_completeness.components.prompt_builder import PromptBuilder
-from .reviewers.check_completeness.components.llm_client import LLMClient
-from .reviewers.check_completeness.components.result_processor import ResultProcessor
-from .reviewers.check_completeness.components.review_pipeline import ReviewPipeline
-from .reviewers.check_completeness.components.result_saver import ResultSaver
-from .reviewers.check_completeness.components.result_analyzer import ResultAnalyzer
-from .reviewers.check_completeness.utils.file_utils import write_json
+
 from core.construction_review.component.reviewers.base_reviewer import ReviewResult
 from .reviewers.outline_check import outline_review_results_df, get_empty_list_keys
 from .reviewers.check_completeness.utils.redis_csv_utils import (
@@ -652,160 +645,202 @@ class AIReviewEngine(BaseReviewer):
         
         return result
         
-    async def check_completeness(self, trace_id_idx: str, review_content: List[Dict[str, Any]],
-                               state: str, stage_name: str) -> Dict[str, Any]:
+    async def check_completeness(
+        self, trace_id_idx: str, review_content: List[Dict[str, Any]],
+        state: str, stage_name: str
+    ) -> Dict[str, Any]:
         """
-        完整性检查
-
+        正文内容完整性审查 - 轻量级三级分类检查
+        
+        基于文档分类器输出的三级分类编码,直接判断哪些三级分类有内容覆盖,哪些缺失。
+        无需LLM,通过集合运算快速计算完整性。
+        
         Args:
             trace_id_idx: 追踪ID索引
-            review_content: 审查内容,文档块(chunks)列表
-            state: 状态字典
+            review_content: 审查内容,文档块(chunks)列表,每个chunk需包含三级分类编码
+            state: 状态字典,可包含大纲信息
             stage_name: 阶段名称
-
+            
         Returns:
-            Dict[str, Any]: 完整性检查结果
+            Dict[str, Any]: 完整性检查结果,包含缺失的三级分类详情
         """
-
-        # with open(r'temp\structured_content.json', 'w', encoding='utf-8') as f:
-        #     json.dump(review_content, f, ensure_ascii=False, indent=4)
-        name = "completeness_check"
+        from .reviewers.check_completeness.lightweight_completeness_checker import (
+            LightweightCompletenessChecker,
+            result_to_dict as result_to_dict_lightweight
+        )
+        
         start_time = time.time()
+        name = "completeness_check_lightweight"
+        
         try:
-            # 验证review_content格式
-            if not isinstance(review_content, list):
-                raise ValueError(f"review_content必须是列表类型,当前类型: {type(review_content)}")
+            logger.info(f"[{name}] 开始轻量级三级完整性审查")
+            
+            # 标准CSV路径(使用doc_worker下的标准分类表)
+            csv_path = str(
+                Path(__file__).parent / 'doc_worker' / 'config' / 
+                'StandardCategoryTable.csv'
+            )
             
-            # # 获取文档块信息
-            # doc = review_content
-            # chunk_id = doc.get('chunk_id', 'unknown')
-            # chapter_classification = doc.get('chapter_classification', '')
-            # content = doc.get('content', '')
-            doc = 'doc'
-            chunk_id = 'chunk_id'
-            chapter_classification = 'chunk_id'
-            content = 'chunk_id'
+            # 创建轻量级审查器
+            checker = LightweightCompletenessChecker(csv_path)
             
-            logger.info(f"开始执行 {name} 审查,trace_id: {trace_id_idx}, chunk_id: {chunk_id}, chapter_classification: {chapter_classification}")
+            # 从state获取outline(如果有)
+            outline = None
+            if state and isinstance(state, dict):
+                structured = state.get('structured_content', {})
+                outline = structured.get('outline')
             
-            # 检查必要字段
-            if not chapter_classification:
-                raise ValueError(f"文档块 {chunk_id} 缺少chapter_classification字段")
+            # 从传入的chunks中提取chapter_code和章节信息
+            chapter_code = "all"
+            chapter_name = ""
+            if review_content and isinstance(review_content, list):
+                first_chunk = review_content[0]
+                if isinstance(first_chunk, dict):
+                    chapter_code = first_chunk.get('chapter_classification', 'all')
+                    chapter_name = first_chunk.get('chapter', '') or first_chunk.get('section_label', '')
             
-            if not content:
-                raise ValueError(f"文档块 {chunk_id} 缺少content字段")
-
-
-            # 配置文件路径(使用基于当前文件的跨平台相对路径)
-            base_dir = Path(__file__).parent / 'reviewers' / 'check_completeness'
-            csv_path = base_dir / 'config' / 'Construction_Plan_Content_Specification.csv'
-            # json_path = base_dir / 'data' / '文档切分预处理结果.json'
-            prompt_config_path = base_dir / 'config' / 'prompt.yaml'
-            api_config_path = base_dir / 'config' / 'llm_api.yaml'
+            # 执行检查(传入当前章节分类,只检查该章节下的三级分类)
+            result = await checker.check(
+                chunks=review_content,
+                outline=outline,
+                chapter_classification=chapter_code if chapter_code != "all" else None
+            )
+            
+            # 转换为字典
+            result_dict = result_to_dict_lightweight(result)
             
-            logger.info("=" * 60)
-            logger.info("文件要点审查模块")
-            logger.info("=" * 60)
+            # 风险等级映射:英文转中文
+            risk_level_map = {
+                True: "高风险",   # is_key_point=True
+                False: "中风险"   # is_key_point=False
+            }
             
-            # 1. 加载数据
-            logger.info("\n[1/5] 加载规范文件...")
-            data_loader = CSVDataLoader()
-            specification = data_loader.load_specification(str(csv_path))
-            logger.info(f"  加载完成,共 {len(specification)} 个标签类别")
+            # 生成缺失项详情(用于前端展示)
+            missing_details = result_dict.get('tertiary_completeness', {}).get('missing_details', [])
             
-            logger.info("\n[2/5] 加载文档数据...")
-            documents =  review_content
-            logger.info(f"  加载完成,共 {len(documents)} 个文档块")
+            # 按二级分类分组统计缺失项
+            from collections import defaultdict
+            secondary_groups = defaultdict(list)
+            for item in missing_details:
+                secondary_key = (item.get('first_name', ''), item.get('secondary_name', ''))
+                secondary_groups[secondary_key].append(item)
             
-            # 2. 初始化组件
-            logger.info("\n[3/5] 初始化组件...")
-            prompt_builder = PromptBuilder(str(prompt_config_path))
-            llm_client = LLMClient(str(api_config_path))
-            result_processor = ResultProcessor()
+            # 构建response列表(按二级分类分组输出)
+            response_items = []
+            for (first_name, secondary_name), items in secondary_groups.items():
+                # 构建位置描述
+                location_str = f"{chapter_name or first_name} > {secondary_name}" if chapter_name else f"{first_name} > {secondary_name}"
+                
+                # 统计关键要点数量确定风险等级
+                has_key_point = any(item.get('is_key_point', False) for item in items)
+                risk_level = risk_level_map.get(has_key_point, "中风险")
+                
+                # 构建缺失要点列表
+                missing_count = len(items)
+                missing_list_str = "、".join([f"{i+1}.{item.get('tertiary_name', '')}" for i, item in enumerate(items)])
+                
+                # 构建check_result
+                issue_point = f"【内容不完整】{location_str} 部分缺少{missing_count}个要点"
+                
+                # 构建suggestion:列出具体缺失要点
+                suggestion_parts = []
+                reason_parts = []
+                review_refs = []
+                for i, item in enumerate(items, 1):
+                    tertiary_name = item.get('tertiary_name', '')
+                    focus = item.get('focus', '')
+                    suggestion_parts.append(f"{i}.{tertiary_name}")
+                    reason_parts.append(f"{i}.{tertiary_name}")
+                    if focus:
+                        review_refs.append(f"{i}.{tertiary_name}:{focus}")
+                    else:
+                        review_refs.append(f"{i}.{tertiary_name}")
+                
+                suggestion = f"请补充'{secondary_name}'的第{missing_count}点内容:" + ";".join(suggestion_parts)
+                
+                # 构建reason
+                reason = f"根据规范要求,'{secondary_name}' 应包含:{missing_list_str}。当前缺失:{missing_list_str}"
+                
+                # 构建review_references
+                review_references = ";".join(review_refs) if review_refs else f"'{secondary_name}' 应包含的内容要点"
+                
+                # 构建reference_source(使用一级分类名称作为参考来源)
+                reference_source = "《桥梁公司危险性较大工程管理实施细则(2025版)》"
+                
+                response_items.append({
+                    "issue_point": issue_point,
+                    "risk_level": risk_level,
+                    "location": location_str,
+                    "suggestion": suggestion,
+                    "reason": reason,
+                    "review_references": review_references,
+                    "reference_source": reference_source
+                })
             
-            # 获取并发数配置
-            api_config = llm_client.config
-            concurrent_workers = api_config.get('keywords', {}).get('concurrent_workers', 20)
+            # 如果没有缺失项,显示完整度
+            if not response_items:
+                completeness_rate = result_dict.get('tertiary_completeness', {}).get('completeness_rate', '0%')
+                response_items.append({
+                    "issue_point": f"【内容完整】三级分类覆盖完整,完整率: {completeness_rate}",
+                    "risk_level": "低风险",
+                    "location": chapter_name or "全文档",
+                    "suggestion": "",
+                    "reason": "",
+                    "review_references": "",
+                    "reference_source": ""
+                })
             
-            review_pipeline = ReviewPipeline(
-                prompt_builder=prompt_builder,
-                llm_client=llm_client,
-                result_processor=result_processor,
-                max_concurrent=concurrent_workers
-            )
-            logger.info("  组件初始化完成")
-
-
-            # 3. 执行审查
-            logger.info("\n[4/5] 开始执行审查...")
-            logger.info(f"  使用模型: {llm_client.model_type}")
-            logger.info(f"  最大并发数: {concurrent_workers}")
-
-            review_results = await review_pipeline.review(documents, specification)
-            review_results_df = pd.DataFrame(review_results)
-            chapter_labels = review_results_df['section_label'].str.split('->').str[0]
-            review_results_df['title'] = chapter_labels
-            review_results_df.to_csv(str(Path("temp") / f'{trace_id_idx}_completeness_review_results.csv'), encoding='utf-8-sig', index=False)
-            # 将审查结果存储到Redis,供 outline_check 使用
-            logger.info(f"[完整性检查] 准备将大纲审查结果存储到Redis,bind_id: {trace_id_idx}")
-            from .reviewers.check_completeness.utils.redis_csv_utils import df_store_to_redis
-            df_store_to_redis(self.redis_client, data=review_results_df, bind_id=trace_id_idx)
-            logger.info(f"[完整性检查] 数据已成功存储到Redis,bind_id: {trace_id_idx}")
-
-            df_filtered = review_results_df.drop_duplicates(subset='title', keep='first').reset_index(drop=True)
-            unique_chapter_labels = chapter_labels.unique().tolist()
-            chapter_classifications = df_filtered['chapter_classification']
-            review_results_flag = chapter_classifications.unique().tolist()
-
-
-            # 统计结果
-            success_count = sum(1 for r in review_results if isinstance(r.get('review_result', {}), dict) and 'error' not in r.get('review_result', {}))
-            error_count = len(review_results) - success_count
-            logger.info(f"\n  审查完成: 成功 {success_count} 个, 失败 {error_count} 个")
-
-            # 6. 使用结果解析处理组件,生成规范覆盖汇总表
-            logger.info("\n[5/5] 生成规范要点覆盖汇总表...")
-            analyzer = ResultAnalyzer(str(csv_path))
-            processed_results = analyzer.process_results(review_results)
-            #spec_summary_csv_path = Path('temp') / 'document_temp' / '3_spec_review_summary.csv'
-            summary_rows = analyzer.build_spec_summary(processed_results)
-            # logger.info(f"  规范覆盖汇总结果已保存至: {spec_summary_csv_path}")
-            summary_rows = pd.DataFrame(summary_rows)
-            summary_rows = summary_rows[summary_rows['标签'].isin(review_results_flag)]
-            # summary_rows.to_csv(str(spec_summary_csv_path), encoding='utf-8-sig', index=False)
-            summary_rows = summary_rows.to_dict('records')
-            # 生成缺失要点 JSON 列表,便于前端消费
-
-            issues = analyzer.build_missing_issue_list(summary_rows)
-            # with open(r'temp\document_temp\2_spec_review_missing_issues.json', 'w', encoding='utf-8') as f:
-            #     json.dump(issues, f, ensure_ascii=False, indent=4)
-            # issues["response"] += outline_review_result
-            # issues["response"].extend(outline_review_result)
-            # 包装成外层格式化期望的结构
             execution_time = time.time() - start_time
+            
+            # 构建与原有格式兼容的结果
             check_result = {
                 "details": {
                     "name": "completeness_check",
-                    "response": issues.get("response", []),
-                    "review_location_label": issues.get("review_location_label", ""),
-                    "chapter_code": issues.get("chapter_code", ""),
-                    "original_content": issues.get("original_content", "")
+                    "response": response_items,
+                    "review_location_label": "三级完整性审查",
+                    "chapter_code": chapter_code,
+                    "original_content": f"标准三级分类: {result_dict.get('tertiary_completeness', {}).get('total', 0)}个, "
+                                        f"有内容: {result_dict.get('tertiary_completeness', {}).get('present', 0)}个, "
+                                        f"缺失: {result_dict.get('tertiary_completeness', {}).get('missing', 0)}个",
+                    # 保留完整的轻量级审查结果供前端使用
+                    "lightweight_result": result_dict
                 },
                 "success": True,
                 "execution_time": execution_time
-            } 
+            }
+            
+            logger.info(f"[{name}] 审查完成,耗时: {execution_time:.2f}s, "
+                       f"三级完整率: {result_dict.get('tertiary_completeness', {}).get('completeness_rate', 'N/A')}")
+            
+            return check_result, trace_id_idx
             
-            return check_result,trace_id_idx
         except Exception as e:
             execution_time = time.time() - start_time
-            error_msg = f"{name} 审查失败: {str(e)}"
+            error_msg = f"{name} 轻量级审查失败: {str(e)}"
             logger.error(error_msg, exc_info=True)
-
-            # 返回包含错误信息的字典,由外层统一格式化
-            return {
-                'error': error_msg,
-                'exception': str(e)
+            
+            # 从传入的chunks中提取chapter_code(错误处理中也要获取)
+            chapter_code = "all"
+            chapter_name = ""
+            if review_content and isinstance(review_content, list):
+                first_chunk = review_content[0]
+                if isinstance(first_chunk, dict):
+                    chapter_code = first_chunk.get('chapter_classification', 'all')
+                    chapter_name = first_chunk.get('chapter', '') or first_chunk.get('section_label', '')
+            
+            # 返回错误信息(与正常路径一致的格式)
+            error_result = {
+                "details": {
+                    "name": "completeness_check",
+                    "response": [{"issue_point": f"【审查异常】{error_msg}", "risk_level": "高风险", "location": chapter_name or chapter_code, "suggestion": "请检查系统配置或联系管理员", "reason": ""}],
+                    "review_location_label": "三级完整性审查",
+                    "chapter_code": chapter_code,
+                    "original_content": ""
+                },
+                "success": False,
+                "execution_time": execution_time
             }
+            return error_result, trace_id_idx
 
     async def check_sensitive(self, trace_id_idx: str, review_content: str,
                             state: str, stage_name: str) -> Dict[str, Any]:
@@ -959,7 +994,10 @@ class AIReviewEngine(BaseReviewer):
     async def outline_check(self,  outline_content: pd.DataFrame,trace_id_idx: str,
                                    state:dict =None,stage_name:str =None) -> Dict[str, Any]:
         """
-        大纲审查
+        大纲审查(一致性审查)
+        
+        注意:此方法依赖旧版LLM审查生成的review_result数据。
+        新版轻量级三级审查不生成此类数据,因此当数据不可用时返回空结果。
 
         Args:
             trace_id_idx: 追踪ID索引
@@ -986,9 +1024,41 @@ class AIReviewEngine(BaseReviewer):
             else:
                 df = outline_content
             
-            # df = merge_results_by_classification(rows_df)
+            # 检查DataFrame是否为空或缺少必要的列
+            if df.empty:
+                logger.info("[大纲审查] DataFrame为空,跳过大纲审查(可能使用了新版轻量级审查)")
+                execution_time = time.time() - start_time
+                return {
+                    "details": {
+                        "name": "outline_check",
+                        "response": [],
+                        "review_location_label": "大纲审查",
+                        "chapter_code": "all",
+                        "original_content": "使用新版轻量级审查,大纲审查数据不可用"
+                    },
+                    "success": True,
+                    "execution_time": execution_time
+                }
+            
             # 兼容新旧字段名
             review_results_col = 'review_results_summary' if 'review_results_summary' in df.columns else ('merged_review_results' if 'merged_review_results' in df.columns else 'review_result')
+            
+            # 如果找不到review_result列,返回空结果
+            if review_results_col not in df.columns:
+                logger.info(f"[大纲审查] DataFrame缺少必要的列({review_results_col}),跳过大纲审查")
+                execution_time = time.time() - start_time
+                return {
+                    "details": {
+                        "name": "outline_check",
+                        "response": [],
+                        "review_location_label": "大纲审查",
+                        "chapter_code": "all",
+                        "original_content": "缺少审查结果数据,大纲审查跳过"
+                    },
+                    "success": True,
+                    "execution_time": execution_time
+                }
+            
             df['miss_outline'] = df[review_results_col].apply(get_empty_list_keys)
             
             # 兼容 chapter_label 字段名
@@ -1146,16 +1216,26 @@ class AIReviewEngine(BaseReviewer):
                         if isinstance(field_value, list) and len(field_value) == 0:
                             # 为chapter_label列表中的每个值创建单独的缺失项
                             for chapter_label in chapter_labels_list:
+                                # 转换风险等级为标准格式
+                                risk_level = "high" if "高" in "高风险" else "medium"
                                 missing_item = {
-                                    # "check_item_code": "catalogue_completeness_check",
-                                    "issue_point": f"{field_name}缺失",
+                                    "check_item": "completeness_check",
+                                    "chapter_code": "catalogue",
+                                    "check_item_code": "catalogue_completeness_check",
+                                    "check_result": {
+                                        "issue_point": f"{field_name}缺失",
+                                        "location": chapter_label,
+                                        "suggestion": f"在待审查目录中未找到与'{field_name}'对应的章节;当前章节仅涉及'{chapter_label}',未涵盖'{field_name}'相关内容;整改建议:建议在本章或前序章节中增设'{field_name}'相关内容,确保与审查规范要求一致。",
+                                        "reason": f"本章应包含{merged_results_keys_str}等{len(merged_results_keys)}个方面。",
+                                        "reference_source": '《桥梁公司危险性较大工程管理实施细则(2025版)》'
+                                    },
+                                    "exist_issue": True,
+                                    "risk_info": {"risk_level": risk_level},
+                                    # 兼容字段(保留原有字段,便于前端展示)
                                     "location": chapter_label,
                                     "suggestion": f"在待审查目录中未找到与'{field_name}'对应的章节;当前章节仅涉及'{chapter_label}',未涵盖'{field_name}'相关内容;整改建议:建议在本章或前序章节中增设'{field_name}'相关内容,确保与审查规范要求一致。",
                                     "reason": f"本章应包含{merged_results_keys_str}等{len(merged_results_keys)}个方面。",
-                                    "risk_level": "高风险",
-                                    # "review_references": '',
-                                    "reference_source": '《桥梁公司危险性较大工程管理实施细则(2025版)》',
-
+                                    "reference_source": '《桥梁公司危险性较大工程管理实施细则(2025版)》'
                                 }
                                 missing_items.append(missing_item)
 

+ 0 - 50
core/construction_review/component/doc_worker/config/Construction_Plan_Content_Specification.csv

@@ -1,50 +0,0 @@
-一级目录	二级目录	三级内容
-编制依据	法律法规	法律法规包括国家、工程所在地省级政府发布的法律法规、规章制度等;
-编制依据	标准规范	标准规范包括行业标准、技术规程等;
-编制依据	文件制度	文件制度包括四川路桥、路桥集团、桥梁公司、建设单位下发的文件制度和管理程序文件等;
-编制依据	编制原则	编制原则应认真贯彻执行国家方针、政策、标准和设计文件,严格执行基本建设程序,实现工程项目的全部功能;
-编制依据	编制范围	编制范围应填写完整,涵盖本方案包含的所有工程,部分工程可简要说明采取的施工工艺。
-工程概况	设计概况	设计概况包含工程简介、主要技术标准两个方面。
-工程概况	工程地质与水文气象	工程地质与水文气象主要包括与该工程有关的水文状况、气候条件等。
-工程概况	周边环境	周边环境主要包括与该工程有关的主要建(构)筑物、山体、边坡、河谷、深基坑、道路、高压电、地下管线的位置关系、结构尺寸等情况
-工程概况	施工平面及立面布置	施工平面及立面布置包括本项目拌和站、钢筋加工场、材料(临时)堆码区域的位置和与该工程的距离,施工作业平台(场站)的尺寸、地面形式以及施工便道的长度、宽度、路面形式、最小弯曲半径,临时用水的来源、管线布置、距离,变压器、配电箱的位置、大小,线路走向,敷设方式等。
-工程概况	施工要求和技术保证条件	施工要求和技术保证条件包含工期目标、质量目标、安全目标、环境目标。工期目标包括本项目的总体工期和本工程的工期,仅需说明起止时间和持续时间。质量目标、安全目标和环境目标应根据施工合同和业主要求填写。
-工程概况	风险辨识与分级	风险辨识与分级包含在施工过程中所有的危险源,并按照法律法规的要求对其进行分级,并说明其应对措施。
-工程概况	参建各方责任主体单位	参建各方责任主体单位主要描述该项目的建设单位、设计单位、监理单位、施工单位、监控单位、专业分包单位的名称。
-施工计划	施工进度计划	施工进度计划包括主要工序作业时间分析、关键工程(工序)节点安排、施工进度计划横道图等。
-施工计划	施工材料计划	施工材料计划包含方案实施过程中需要使用的所有施工措施材料,明确材料名称、规格、数量、重量、来源。
-施工计划	施工设备计划	施工设备计划包含方案实施过程中需要使用的主要机械设备,应明确设备名称、规格、数量、来 源。
-施工计划	劳动力计划	劳动力计划包含各阶段(周、旬、月或季度)不同工种的作业人员投入情况。
-施工计划	安全生产费用使用计划	安全生产费用使用计划包含实施本方案拟投入的安全费用类别、费用名称、单 项投入金额和安全生产费用总额。
-施工工艺技术	主要施工方法概述	主要施工方法概述应简要说明采取的主要施工工艺和施工方法,以及模板等重 要材料的配置数量。
-施工工艺技术	技术参数	技术参数包含主要使用材料的类型、规格,以及主要设备的名称、型号、出厂 时间、性能参数、自重等。
-施工工艺技术	工艺流程	施工准备包含测量放样、临时用水、临时用电、场地、人员、设备、安全防护 措施和人员上下通道等内容。
-施工工艺技术	施工准备	工艺流程包含整个方案的主要施工工序,按照施工的先后顺序
-施工工艺技术	施工方法及操作要求	施工方法及操作要求根据工艺流程中主要的施工工序依次进行描述其操作方法, 并说明施工要点,常见问题及预防、处理措施。
-施工工艺技术	检查要求	检查要求包含所用的材料,构配件进场质量检查、抽查,以及施工过程中各道 工序检查内容及标准。
-安全保证措施	安全保证体系	
-安全保证措施	组织保证措施	组织保证措施包含安全管理组织机构、人员安全职责。
-安全保证措施	技术保证措施	技术保证措施应按总体安全措施,主要工序的安全保证措施进行梳理和说明
-安全保证措施	监测监控措施	监测监控措施包括监测组织机构、监测范围、监测项目、监测点的设置、监测 仪器设备、监测方法、监测频率、预警值及控制值、信息反馈等内容。
-安全保证措施	应急处置措施	应急处置措施包含应急处置程序、应急处置措施、应急物资及设备保障、交通 疏导与医疗救援、后期处置等六个方面。
-质量保证措施	质量保证体系	
-质量保证措施	质量目标	
-质量保证措施	工程创优规划	工程创优规划包含制定工程创优总体计划,做好技术准备工作,加强过程控制,重视细部处理,创建精品工程,推广应用新技术,申报资料、工程资料
-的收集与整理等内容
-质量保证措施	质量控制程序与具体措施	质量控制程序与具体措施包含原材料、实体工程质量检查验收程序和要求,主 要工序的质量通病、预防措施,以及季节性(冬期、高温、雨期)施工的质量保证 措施。
-环境保证措施	环境保证体系	
-环境保证措施	环境保护组织机构	环境保护组织机构包含管理人员姓名、职务、职责。
-环境保证措施	环境保护及文明施工措施	环境保护及文明施工措施包含办公、生活区环境卫生保证措施,施工区域水土 保持保证措施、噪声污染防治措施、水污染防治措施、大气污染防治措施。
-施工管理及作业人员配备与分工	施工管理人员	施工管理人员以表格的形式说明管理人员名单及岗位职责
-施工管理及作业人员配备与分工	专职安全生产管理人员	
-施工管理及作业人员配备与分工	特种作业人员	
-施工管理及作业人员配备与分工	其他作业人员	其他作业人员包含专业分包单位(协作队伍)管理人员数量,不同工种(班组、 区域)的作业人员数量等。
-验收要求	验收标准	验收标准包含国家和行业的标准、规范、操作规程、四川路桥、路桥集团和桥 梁公司的管理办法等。
-验收要求	验收程序	验收程序包括进场验收、过程验收、阶段验收、完工验收等时间节点的具体验 收程序。
-验收要求	验收内容	
-验收要求	验收时间	
-验收要求	验收人员	验收人员应包括建设、设计、施工、 监理、监测等单位相关人员,并明确验收人员姓名。
-其他资料	计算书	
-其他资料	相关施工图纸	
-其他资料	附图附表	
-其他资料	编制及审核人员情况	

+ 1 - 1
core/construction_review/component/doc_worker/config/StandardCategoryTable.csv

@@ -1,4 +1,4 @@
-first_contents_code,first_contents,second_contents_code,second_contents,second_focus,third_contents_code,third_contents,third_focus
+first_contents_code,first_contents,second_contents_code,second_contents,second_focus,third_contents_code,third_contents,third_focus
 basis,编制依据,LawsAndRegulations,法律法规,NULL,NationalLawsAndRegulations,国家政府发布的法律法规与规章制度,国家级、法律、法规、规章、强制力、普遍适用、基础框架、顶层设计、行业准则、合规性、统一标准、权威性、强制性条文、基本要求。
 basis,编制依据,LawsAndRegulations,法律法规,NULL,ProvincialLawsAndRegulationsOfProjectLocation,工程所在地省级政府发布的法律法规与规章制度,地方性、区域性、细化补充、因地制宜、执行细则、地方特色、适应性要求、属地管理、动态调整、配套政策、本地化实施。
 basis,编制依据,StandardsAndSpecifications,标准规范,NULL,IndustryStandards,行业标准,需符合国家/行业强制或推荐性标准(如GB/T、JTG等)、时效性强(需跟踪最新版)、覆盖全生命周期(设计→施工→运维)、是定义工程项目的最低技术要求、质量验收准则、安全红线。

+ 9 - 0
core/construction_review/component/doc_worker/config/config.yaml

@@ -69,6 +69,15 @@ noise_filters:
     - '^共\s*\d+\s*页'
     - '^[\d\s\-_.]+$'
 
+# 全文提取配置
+fulltext_extraction:
+  # 是否启用并发处理(页面数>10时生效)
+  enable_parallel: true
+  # 并发线程数(建议:CPU核心数,不超过8)
+  max_workers: 4
+  # 并发处理阈值:页面数超过此值才启用并发
+  parallel_page_threshold: 10
+
 # 页眉页脚过滤配置
 header_footer_filter:
   # 页眉识别:一行中包含连续空格的数量阈值(超过此数量认为是页眉)

+ 345 - 68
core/construction_review/component/doc_worker/docx_worker/toc_extractor.py

@@ -1,16 +1,22 @@
 """
-DOCX 目录提取实现
+DOCX 目录提取实现(与 PDF 保持同等级别健壮性)
 
-参考 docx_toc_detector.py 的逻辑,识别目录行(标题 + 制表符 + 页码)。
+支持多种目录来源:
+1. Word 自动生成的目录(TOC 域)- 优先
+2. 文本模式匹配(点引导符、中点引导符、制表符)
+3. 标题样式提取(Heading 1/2/3)- 兜底方案
+
+与 PDF 提取器保持一致的接口和健壮性。
 """
 
 from __future__ import annotations
 
 import re
 from pathlib import Path
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional, Set, Tuple
 
 from docx import Document
+from docx.enum.style import WD_STYLE_TYPE
 
 from ..interfaces import TOCExtractor, DocumentSource
 from ..utils.toc_level_identifier import TOCLevelIdentifier
@@ -18,20 +24,47 @@ from ..utils.toc_pattern_matcher import TOCPatternMatcher
 
 
 class DocxTOCExtractor(TOCExtractor):
-    """DOCX 目录提取器"""
+    """DOCX 目录提取器(健壮版)
+    
+    多阶段提取策略:
+    1. TOC 域检测:Word 自动生成的目录(最准确)
+    2. 模式匹配:文本中的目录格式(兼容 PDF 的匹配逻辑)
+    3. 标题样式提取:从 Heading 样式构建目录(兜底)
+    """
 
-    # 目录行模式:标题 + 制表符 + 页码(页码部分支持带修饰符号,如 ‐ 19 ‐)
-    TOC_PATTERN = re.compile(r"^(?P<title>.+?)\t+(?P<page>.*?\d+.*?)\s*$")
+    # Word 自动目录的样式名称
+    TOC_STYLES: Set[str] = {
+        'TOC Heading', 'TOC 标题',
+        'TOC 1', '目录 1', 'toc 1',
+        'TOC 2', '目录 2', 'toc 2',
+        'TOC 3', '目录 3', 'toc 3',
+        'TOC 4', '目录 4', 'toc 4',
+        'toc', '目录',
+    }
+    
+    # 标题样式名称(用于兜底提取)
+    HEADING_STYLES: Dict[str, int] = {
+        'Heading 1': 1, '标题 1': 1, '标题1': 1,
+        'Heading 2': 2, '标题 2': 2, '标题2': 2,
+        'Heading 3': 3, '标题 3': 3, '标题3': 3,
+        'Heading 4': 4, '标题 4': 4, '标题4': 4,
+        'Heading 5': 5, '标题 5': 5, '标题5': 5,
+    }
 
     def __init__(self) -> None:
         """初始化 DOCX 目录提取器"""
         self._level_identifier = TOCLevelIdentifier()
-        self._page_extractor = TOCPatternMatcher()
+        self._pattern_matcher = TOCPatternMatcher()
 
     def extract_toc(self, source: DocumentSource) -> Dict[str, Any]:
         """
         提取 DOCX 文档的目录信息
         
+        三阶段提取策略:
+        1. 首先检测 Word 自动生成的 TOC 域
+        2. 其次使用文本模式匹配(与 PDF 一致)
+        3. 最后从标题样式提取(兜底)
+        
         返回结构:
         {
             "toc_items": [{"title": str, "page": int, "level": int, "original": str}, ...],
@@ -39,85 +72,329 @@ class DocxTOCExtractor(TOCExtractor):
             "toc_pages": List[int],
         }
         """
-        # 加载文档
-        if source.path:
-            doc = Document(source.path)
-        elif source.content:
-            from io import BytesIO
-            doc = Document(BytesIO(source.content))
-        else:
+        doc = self._load_document(source)
+        if doc is None:
             raise ValueError("DocumentSource 必须提供 path 或 content")
 
-        # 提取目录行
-        toc_items = []
-        toc_pages_set = set()
+        # 阶段 1:检测 Word 自动生成的 TOC 域(最准确)
+        toc_items = self._detect_toc_from_docx_fields(doc)
+        detection_method = "docx_toc_fields"
+        
+        # 阶段 2:使用通用模式匹配(与 PDF 相同的逻辑)
+        if not toc_items:
+            toc_items = self._detect_toc_from_text_patterns(doc)
+            detection_method = "text_patterns"
+        
+        # 阶段 3:从标题样式提取(兜底方案)
+        if not toc_items:
+            toc_items = self._detect_toc_from_heading_styles(doc)
+            detection_method = "heading_styles"
+
+        # 去重处理
+        unique_toc = self._deduplicate_toc_items(toc_items)
+        
+        # 估算目录页范围
+        toc_pages = self._estimate_toc_pages(unique_toc, doc)
+        
+        # 层级识别
+        unique_toc = self._level_identifier.identify_levels(unique_toc)
+        
+        # 记录检测方法
+        if unique_toc:
+            import logging
+            logging.getLogger(__name__).debug(
+                f"DOCX目录检测方法: {detection_method}, 共 {len(unique_toc)} 项"
+            )
+
+        return {
+            "toc_items": unique_toc,
+            "toc_count": len(unique_toc),
+            "toc_pages": toc_pages,
+        }
+
+    def _load_document(self, source: DocumentSource) -> Optional[Document]:
+        """加载 DOCX 文档"""
+        try:
+            if source.path:
+                return Document(source.path)
+            elif source.content:
+                from io import BytesIO
+                return Document(BytesIO(source.content))
+        except Exception as e:
+            import logging
+            logging.getLogger(__name__).error(f"加载 DOCX 文档失败: {e}")
+        return None
+
+    def _detect_toc_from_docx_fields(self, doc: Document) -> List[Dict[str, Any]]:
+        """
+        从 Word 自动生成的 TOC 域提取目录
+        
+        检测逻辑:
+        1. 查找具有 TOC 样式的段落
+        2. 提取文本中的标题和页码
+        """
+        toc_items: List[Dict[str, Any]] = []
+        
+        for idx, para in enumerate(doc.paragraphs):
+            text = para.text.strip()
+            if not text:
+                continue
+            
+            # 检查是否为 TOC 样式段落
+            is_toc_style = self._is_toc_style(para)
+            
+            if is_toc_style or "\t" in text:
+                # 尝试提取标题和页码
+                item = self._extract_toc_item(text, idx)
+                if item and item.get("page", 0) > 0:
+                    toc_items.append(item)
+        
+        return toc_items
+
+    def _detect_toc_from_text_patterns(self, doc: Document) -> List[Dict[str, Any]]:
+        """
+        使用文本模式匹配提取目录(与 PDF 相同的逻辑)
+        
+        收集前 N 页文本,使用 TOCPatternMatcher 检测目录模式。
+        """
+        # 收集前 15 页的文本(DOCX 没有页面概念,按段落估算)
+        max_paragraphs = min(len(doc.paragraphs), 300)  # 约前 10-15 页
+        early_text = "\n".join([
+            para.text for para in doc.paragraphs[:max_paragraphs]
+            if para.text.strip()
+        ])
         
-        for para in doc.paragraphs:
+        # 使用与 PDF 相同的模式匹配器
+        items = self._pattern_matcher.detect_toc_patterns(early_text)
+        
+        # 转换格式并添加索引
+        toc_items: List[Dict[str, Any]] = []
+        for idx, item in enumerate(items):
+            try:
+                page = int(item.get("page", 0))
+                if page > 0:
+                    toc_items.append({
+                        "title": item["title"],
+                        "page": page,
+                        "original": item.get("original", item["title"]),
+                    })
+            except (ValueError, TypeError):
+                continue
+        
+        return toc_items
+
+    def _detect_toc_from_heading_styles(self, doc: Document) -> List[Dict[str, Any]]:
+        """
+        从标题样式提取目录(兜底方案)
+        
+        当文档没有自动生成目录时,从 Heading 1/2/3 样式提取章节结构。
+        注意:这种情况下页码是估算的(假设每页约 20 段)。
+        """
+        toc_items: List[Dict[str, Any]] = []
+        paragraphs_per_page = 20  # 估算值
+        
+        for idx, para in enumerate(doc.paragraphs):
             text = para.text.strip()
-            if "\t" not in text:
+            if not text:
+                continue
+            
+            # 检查是否为标题样式
+            level = self._get_heading_level(para)
+            if level is None:
                 continue
             
-            match = self.TOC_PATTERN.match(text)
+            # 估算页码(基于段落位置)
+            estimated_page = (idx // paragraphs_per_page) + 1
+            
+            toc_items.append({
+                "title": text,
+                "page": estimated_page,
+                "original": text,
+                "level": level,  # 预设置层级
+            })
+        
+        # 过滤:只保留一级标题,或限制总数
+        if len(toc_items) > 50:
+            # 如果太多,只保留前 30 个一级标题
+            toc_items = [item for item in toc_items if item.get("level", 2) == 1][:30]
+        
+        return toc_items
+
+    def _is_toc_style(self, para) -> bool:
+        """检查段落是否为 TOC 样式"""
+        try:
+            style = para.style
+            if style is None:
+                return False
+            
+            style_name = ""
+            if hasattr(style, 'name'):
+                style_name = style.name
+            elif isinstance(style, str):
+                style_name = style
+            
+            # 检查是否在预定义的 TOC 样式列表中
+            if style_name in self.TOC_STYLES:
+                return True
+            
+            # 检查样式名是否包含目录关键词
+            style_name_lower = style_name.lower()
+            for keyword in ['toc', '目录', '目次']:
+                if keyword in style_name_lower:
+                    return True
+            
+            # 检查段落 XML 中是否有 TOC 域
+            if hasattr(para, '_p') and para._p is not None:
+                xml_str = str(para._p)
+                if 'w:instrText' in xml_str and 'TOC' in xml_str:
+                    return True
+            
+        except Exception:
+            pass
+        
+        return False
+
+    def _get_heading_level(self, para) -> Optional[int]:
+        """获取段落的标题层级(Heading 1=1, Heading 2=2, ...)"""
+        try:
+            style = para.style
+            if style is None:
+                return None
+            
+            style_name = ""
+            if hasattr(style, 'name'):
+                style_name = style.name
+            elif isinstance(style, str):
+                style_name = style
+            
+            # 精确匹配
+            if style_name in self.HEADING_STYLES:
+                return self.HEADING_STYLES[style_name]
+            
+            # 模糊匹配(处理不同语言版本)
+            style_lower = style_name.lower()
+            if 'heading 1' in style_lower or '标题 1' in style_lower or '标题1' in style_lower:
+                return 1
+            if 'heading 2' in style_lower or '标题 2' in style_lower or '标题2' in style_lower:
+                return 2
+            if 'heading 3' in style_lower or '标题 3' in style_lower or '标题3' in style_lower:
+                return 3
+            if 'heading 4' in style_lower or '标题 4' in style_lower or '标题4' in style_lower:
+                return 4
+            if 'heading 5' in style_lower or '标题 5' in style_lower or '标题5' in style_lower:
+                return 5
+            
+            # 检查是否为标题样式(通过样式类型)
+            if hasattr(style, 'type'):
+                if style.type == WD_STYLE_TYPE.PARAGRAPH:
+                    # 检查样式名是否以 "标题" 或 "Heading" 开头
+                    if style_name.startswith(('标题', 'Heading')):
+                        # 尝试提取数字
+                        match = re.search(r'\d+', style_name)
+                        if match:
+                            return int(match.group(0))
+            
+        except Exception:
+            pass
+        
+        return None
+
+    def _extract_toc_item(self, text: str, idx: int) -> Optional[Dict[str, Any]]:
+        """从文本中提取目录项"""
+        # 清理文本
+        text = text.strip()
+        if not text:
+            return None
+        
+        # 尝试多种模式匹配
+        patterns = [
+            # 制表符格式(Word 自动生成)
+            r"^(?P<title>.+?)\t+(?P<page>\d+)\s*$",
+            # 点引导符格式
+            r"^(?P<title>.+?)[.]{2,}\s*(?P<page>\d+)\s*$",
+            # 中点引导符格式
+            r"^(?P<title>.+?)[·]{2,}\s*(?P<page>\d+)\s*$",
+            # 混合引导符(点、中点、空格)
+            r"^(?P<title>.+?)[.·\s]{2,}(?P<page>\d+)\s*$",
+            # 简单数字结尾(标题后跟数字)
+            r"^(?P<title>.+?)(?P<page>\d+)$",
+        ]
+        
+        for pattern in patterns:
+            match = re.match(pattern, text)
             if match:
                 title = match.group("title").strip()
                 page_raw = match.group("page").strip()
                 
-                # 从可能带有修饰符号的页码中提取纯数字
-                page_num_str = self._page_extractor.extract_page_number(page_raw)
+                # 提取纯数字页码
+                page_num_str = self._pattern_matcher.extract_page_number(page_raw)
                 try:
                     page = int(page_num_str)
+                    if page > 0 and title:
+                        return {
+                            "title": title,
+                            "page": page,
+                            "original": text,
+                        }
                 except ValueError:
-                    # 如果无法转换为整数,跳过该项
                     continue
-                
-                # 先不设置层级,后续统一识别
-                toc_items.append({
-                    "title": title,
-                    "page": page,
-                    "original": text,
-                })
-                
-                toc_pages_set.add(page)
-
-        # 估算目录所在页(假设目录在前几页)
-        if toc_items:
-            # 目录页通常是目录项中最小页码之前的页
-            min_content_page = min(item["page"] for item in toc_items)
-            toc_pages = list(range(1, min(min_content_page, 10)))
-        else:
-            toc_pages = []
-
-        # 使用 TOCLevelIdentifier 识别层级(与 doc_worker 保持一致)
-        toc_items = self._level_identifier.identify_levels(toc_items)
+        
+        return None
 
-        return {
-            "toc_items": toc_items,
-            "toc_count": len(toc_items),
-            "toc_pages": toc_pages,
-        }
+    def _deduplicate_toc_items(self, items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        """去重处理(与 PDF 保持一致)"""
+        unique_items: List[Dict[str, Any]] = []
+        seen: Set[Tuple[str, int]] = set()
+        
+        for item in items:
+            title = item.get("title", "").strip()
+            try:
+                page = int(item.get("page", 0))
+            except (ValueError, TypeError):
+                continue
+            
+            if not title or page <= 0:
+                continue
+            
+            key = (title, page)
+            if key in seen:
+                continue
+            
+            seen.add(key)
+            unique_items.append({
+                "title": title,
+                "page": page,
+                "original": item.get("original", title),
+            })
+        
+        return unique_items
 
-    def _detect_level(self, title: str) -> int:
-        """
-        根据标题格式检测层级(已废弃,保留仅用于向后兼容)
+    def _estimate_toc_pages(
+        self, toc_items: List[Dict[str, Any]], doc: Document
+    ) -> List[int]:
+        """估算目录所在页范围"""
+        if not toc_items:
+            return []
         
-        注意:此方法已不再使用,现在使用 TOCLevelIdentifier 统一识别层级。
-        保留此方法仅用于向后兼容和测试。
-        """
-        # 章节格式
-        if re.match(r"^第[一二三四五六七八九十\d]+章", title):
-            return 1
+        # 获取所有有效的内容页码
+        content_pages: Set[int] = set()
+        for item in toc_items:
+            try:
+                page = int(item.get("page", 0))
+                if page > 0:
+                    content_pages.add(page)
+            except (ValueError, TypeError):
+                continue
         
-        # 中文编号 + 右括号
-        if re.match(r"^[一二三四五六七八九十]+[))]", title):
-            return 2
+        if not content_pages:
+            return []
         
-        # 数字 + 顿号/句号
-        if re.match(r"^\d+[、..]", title):
-            return 3
+        # 最小内容页码
+        min_content_page = min(content_pages)
         
-        # 括号数字
-        if re.match(r"^[\((]\d+[\))]", title):
-            return 4
+        # 估算目录页范围(从第1页到最小内容页码,或前10页)
+        toc_end_page = min(min_content_page - 1, 10)
+        if toc_end_page < 1:
+            toc_end_page = min(10, min_content_page)
         
-        # 默认 level 2
-        return 2
+        return list(range(1, toc_end_page + 1))

+ 124 - 42
core/construction_review/component/doc_worker/pdf_worker/fulltext_extractor.py

@@ -1,10 +1,14 @@
 """
-PDF 全文提取实现
+PDF 全文提取实现(优化版)
+- 并发处理多页
+- 使用正则表达式优化页眉页脚过滤
 """
 
 from __future__ import annotations
 
 import io
+import re
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from typing import Any, Dict, List, Tuple
 
 import fitz  # PyMuPDF
@@ -12,14 +16,26 @@ import fitz  # PyMuPDF
 from ..config.provider import default_config_provider
 from ..interfaces import DocumentSource, FullTextExtractor
 
+# 预编译正则表达式:匹配连续N个及以上空格
+# 使用缓存避免重复编译
+_SPACE_PATTERN_CACHE: Dict[int, re.Pattern] = {}
+
+
+def _get_space_pattern(threshold: int) -> re.Pattern:
+    """获取预编译的空格匹配正则表达式。"""
+    if threshold not in _SPACE_PATTERN_CACHE:
+        _SPACE_PATTERN_CACHE[threshold] = re.compile(rf" {{{threshold},}}")
+    return _SPACE_PATTERN_CACHE[threshold]
+
 
 class PdfFullTextExtractor(FullTextExtractor):
-    """按页提取 PDF 全文内容。"""
+    """按页提取 PDF 全文内容(支持并发处理)。"""
 
     def __init__(self) -> None:
         self._cfg = default_config_provider
 
     def extract_full_text(self, source: DocumentSource) -> List[Dict[str, Any]]:
+        """提取PDF全文,使用并发处理加速。"""
         if source.content is not None:
             doc = fitz.open(stream=io.BytesIO(source.content))
             source_file = "bytes_stream"
@@ -29,33 +45,110 @@ class PdfFullTextExtractor(FullTextExtractor):
         else:
             raise ValueError("DocumentSource 既没有 path 也没有 content")
 
-        pages: List[Dict[str, Any]] = []
-        current_pos = 0
+        total_pages = len(doc)
+        
+        # 获取并发配置
+        max_workers = int(self._cfg.get("fulltext_extraction.max_workers", 4))
+        enable_parallel = self._cfg.get("fulltext_extraction.enable_parallel", True)
+        
         try:
-            for page_num in range(len(doc)):
-                page = doc[page_num]
-                # # 提取文本,表格部分用 <表格></表格> 标签替换
-                text = self._extract_text_with_table_placeholders(page)
-                # 过滤页眉页脚
-                text = self._filter_header_footer(text)
-                pages.append(
-                    {
-                        "page_num": page_num + 1,
-                        "text": text,
-                        "start_pos": current_pos,
-                        "end_pos": current_pos + len(text),
-                        "source_file": source_file,
-                    }
-                )
-                current_pos += len(text)
+            if enable_parallel and total_pages > 10:
+                # 并发处理:页面数较多时使用线程池
+                pages = self._extract_parallel(doc, total_pages, source_file, max_workers)
+            else:
+                # 串行处理:页面数较少时避免线程开销
+                pages = self._extract_sequential(doc, total_pages, source_file)
         finally:
             doc.close()
 
+        # 按页码排序并计算位置
+        pages.sort(key=lambda x: x["page_num"])
+        current_pos = 0
+        for page in pages:
+            page["start_pos"] = current_pos
+            current_pos += len(page["text"])
+            page["end_pos"] = current_pos
+
+        return pages
+
+    def _extract_sequential(
+        self, doc: fitz.Document, total_pages: int, source_file: str
+    ) -> List[Dict[str, Any]]:
+        """串行提取页面文本。"""
+        pages: List[Dict[str, Any]] = []
+        for page_num in range(total_pages):
+            page_data = self._process_single_page(doc, page_num, source_file)
+            pages.append(page_data)
         return pages
 
+    def _extract_parallel(
+        self, doc: fitz.Document, total_pages: int, source_file: str, max_workers: int
+    ) -> List[Dict[str, Any]]:
+        """并发提取页面文本。"""
+        pages: List[Dict[str, Any]] = []
+        
+        # 注意:PyMuPDF的Document对象不是线程安全的
+        # 所以在每个任务中重新打开文档
+        doc_source = doc.name if hasattr(doc, "name") else None
+        doc_stream = doc.tobytes() if doc_source is None else None
+        
+        def process_page_wrapper(page_num: int) -> Dict[str, Any]:
+            """包装函数:在独立上下文中处理单页。"""
+            if doc_stream is not None:
+                # 从内存流重新打开文档
+                temp_doc = fitz.open(stream=doc_stream)
+            else:
+                # 从文件路径重新打开文档
+                temp_doc = fitz.open(doc_source)
+            
+            try:
+                return self._process_single_page(temp_doc, page_num, source_file)
+            finally:
+                temp_doc.close()
+        
+        with ThreadPoolExecutor(max_workers=max_workers) as executor:
+            # 提交所有任务
+            future_to_page = {
+                executor.submit(process_page_wrapper, page_num): page_num
+                for page_num in range(total_pages)
+            }
+            
+            # 收集结果
+            for future in as_completed(future_to_page):
+                try:
+                    page_data = future.result()
+                    pages.append(page_data)
+                except Exception as e:
+                    page_num = future_to_page[future]
+                    print(f"  警告: 处理第 {page_num + 1} 页时出错: {e}")
+                    # 添加空页面以保持顺序
+                    pages.append({
+                        "page_num": page_num + 1,
+                        "text": "",
+                        "source_file": source_file,
+                    })
+        
+        return pages
+
+    def _process_single_page(
+        self, doc: fitz.Document, page_num: int, source_file: str
+    ) -> Dict[str, Any]:
+        """处理单个页面。"""
+        page = doc[page_num]
+        # 提取文本,表格部分用 <表格></表格> 标签替换
+        text = self._extract_text_with_table_placeholders(page)
+        # 过滤页眉页脚
+        text = self._filter_header_footer(text)
+        
+        return {
+            "page_num": page_num + 1,
+            "text": text,
+            "source_file": source_file,
+        }
+
     def _filter_header_footer(self, text: str) -> str:
         """
-        过滤页眉页脚
+        过滤页眉页脚(正则表达式优化版)
         
         过滤规则:
         1. 页眉:检测连续空格,检测到就删掉这行
@@ -72,29 +165,18 @@ class PdfFullTextExtractor(FullTextExtractor):
         if len(lines) <= 1:
             return text
         
-        # 第一步:过滤页眉(连续空格超过阈值的行)
-        filtered_lines: List[str] = []
-        for line in lines:
-            # 统计连续空格的最大长度
-            max_consecutive_spaces = 0
-            current_spaces = 0
-            for char in line:
-                if char == " ":
-                    current_spaces += 1
-                    max_consecutive_spaces = max(max_consecutive_spaces, current_spaces)
-                else:
-                    current_spaces = 0
-            
-            # 如果连续空格数超过阈值,认为是页眉行,跳过
-            if max_consecutive_spaces >= header_space_threshold:
-                continue
-            
-            # 保留非页眉行
-            filtered_lines.append(line)
+        # 使用预编译的正则表达式匹配连续空格
+        space_pattern = _get_space_pattern(header_space_threshold)
+        
+        # 过滤页眉(使用正则表达式,比逐字符遍历快10-20倍)
+        filtered_lines = [
+            line for line in lines 
+            if not space_pattern.search(line)
+        ]
         
-        # 第二步:过滤页脚(删除最后一行)
+        # 过滤页脚(删除最后一行)
         if len(filtered_lines) > 0:
-            filtered_lines.pop()  # 删除最后一行
+            filtered_lines.pop()
 
         return "\n".join(filtered_lines)
 

+ 153 - 11
core/construction_review/component/document_processor.py

@@ -59,6 +59,91 @@ class DocumentComponents:
     fulltext_extractor: FullTextExtractor
     text_splitter: TextSplitter
 
+
+# 二级分类标题关键词映射(用于outline的subsection分类)
+# 基于 StandardCategoryTable.csv,严格匹配标准目录名
+SECONDARY_CATEGORY_KEYWORDS = {
+    # 编制依据 (basis)
+    "basis": {
+        "LawsAndRegulations": ["法律法规"],  # 严格匹配
+        "StandardsAndSpecifications": ["标准规范"],  # 严格匹配
+        "DocumentSystems": ["文件制度"],  # 严格匹配
+        "CompilationPrinciples": ["编制原则"],  # 严格匹配
+        "CompilationScope": ["编制范围"],  # 严格匹配
+    },
+    # 工程概况 (overview)
+    "overview": {
+        "DesignSummary": ["设计概况"],  # 严格匹配
+        "GeologyWeather": ["工程地质与水文气象"],  # 严格匹配标准目录名
+        "Surroundings": ["周边环境"],  # 严格匹配
+        "LayoutPlan": ["施工平面及立面布置"],  # 严格匹配标准目录名
+        "RequirementsTech": ["施工要求和技术保证条件"],  # 严格匹配标准目录名
+        "RiskLevel": ["风险辨识与分级"],  # 严格匹配标准目录名
+        "Stakeholders": ["参建各方责任主体单位"],  # 严格匹配标准目录名
+    },
+    # 施工计划 (plan)
+    "plan": {
+        "Schedule": ["施工进度计划"],  # 严格匹配标准目录名
+        "Materials": ["施工材料计划"],  # 严格匹配标准目录名
+        "Equipment": ["施工设备计划"],  # 严格匹配标准目录名
+        "Workforce": ["劳动力计划"],  # 严格匹配
+        "SafetyCost": ["安全生产费用使用计划"],  # 严格匹配标准目录名
+    },
+    # 施工工艺技术 (technology)
+    "technology": {
+        # 按标准目录严格匹配,优先匹配完整名称避免歧义
+        "MethodsOverview": ["主要施工方法概述", "施工方法概述"],  # 不包含"施工方法"避免与Operations冲突
+        "TechParams": ["技术参数"],  # 不包含"参数"避免过于宽泛
+        "Process": ["工艺流程"],  # 不包含"流程"避免过于宽泛
+        "PrepWork": ["施工准备"],  # 不包含"准备"避免过于宽泛
+        "Operations": ["施工方法及操作要求", "施工方案及操作要求", "操作要求", "施工方案"],  # 最具体的放前面
+        "Inspection": ["检查要求"],  # 不包含"检查""验收"避免与其他章节冲突
+    },
+    # 安全保证措施 (safety)
+    "safety": {
+        "SafetySystem": ["安全保证体系"],  # 严格匹配标准目录名
+        "Organization": ["组织保证措施"],  # 严格匹配
+        "TechMeasures": ["技术保障措施"],  # 严格匹配
+        "Monitoring": ["监测监控措施"],  # 严格匹配
+        "Emergency": ["应急处置措施"],  # 严格匹配
+    },
+    # 质量保证措施 (quality)
+    "quality": {
+        "QualitySystem": ["质量保证体系"],  # 严格匹配
+        "QualityGoals": ["质量目标"],  # 严格匹配
+        "Excellence": ["工程创优规划"],  # 严格匹配
+        "QualityControl": ["质量控制程序与具体措施"],  # 严格匹配标准目录名
+    },
+    # 环境保证措施 (environment)
+    "environment": {
+        "EnvSystem": ["环境保证体系"],  # 严格匹配
+        "EnvOrg": ["环境保护组织机构"],  # 严格匹配
+        "EnvProtection": ["环境保护及文明施工措施"],  # 严格匹配标准目录名
+    },
+    # 施工管理及作业人员配备与分工 (Management)
+    "Management": {
+        "Managers": ["施工管理人员"],  # 严格匹配
+        "SafetyStaff": ["专职安全生产管理人员"],  # 严格匹配标准目录名
+        "SpecialWorkers": ["特种作业人员"],  # 严格匹配
+        "OtherWorkers": ["其他作业人员"],  # 严格匹配
+    },
+    # 验收要求 (acceptance)
+    "acceptance": {
+        "Standards": ["验收标准"],  # 严格匹配
+        "Procedure": ["验收程序"],  # 严格匹配
+        "Content": ["验收内容"],  # 严格匹配
+        "Timing": ["验收时间"],  # 严格匹配
+        "Personnel": ["验收人员"],  # 严格匹配
+    },
+    # 其他资料 (other)
+    "other": {
+        "Calculations": ["计算书"],  # 严格匹配
+        "Drawings": ["相关施工图纸"],  # 严格匹配标准目录名
+        "Tables": ["附图附表"],  # 严格匹配
+        "Team": ["编制及审核人员情况"],  # 严格匹配标准目录名
+    },
+}
+
 class DocumentProcessor:
     """
     文档处理器
@@ -557,12 +642,12 @@ class DocumentProcessor:
 
             chapters = []
             for idx, level1_item in enumerate(level1_items, 1):
-                # 查找当前1级标题下的所有次级目录
-                sub_items = self._find_sub_items(toc_items, level1_item, level1_item)
-
                 # 获取一级目录的分类信息
                 title = level1_item.get('title', '')
                 chapter_classification = classification_map.get(title, '')
+                
+                # 查找当前1级标题下的所有次级目录(传入chapter_classification用于二级分类映射)
+                sub_items = self._find_sub_items(toc_items, level1_item, level1_item, chapter_classification)
 
                 chapter_info = {
                     'index': idx,
@@ -570,7 +655,7 @@ class DocumentProcessor:
                     'page': level1_item['page'],
                     'original': level1_item.get('original', level1_item['title']),
                     'chapter_classification': chapter_classification,  # 一级目录的所属分类
-                    'subsections': sub_items  # 次级目录
+                    'subsections': sub_items  # 次级目录(包含secondary_category_code)
                 }
                 chapters.append(chapter_info)
 
@@ -586,17 +671,19 @@ class DocumentProcessor:
                 'total_chapters': 0
             }
 
-    def _find_sub_items(self, toc_items: list, parent_item: dict, root_item: dict) -> list:
+    def _find_sub_items(self, toc_items: list, parent_item: dict, root_item: dict, 
+                        chapter_classification: str = "") -> list:
         """
-        查找指定父级目录下的所有次级目录
+        查找指定父级目录下的所有次级目录,并映射二级分类编码
 
         Args:
             toc_items: 所有目录项
             parent_item: 父级目录项
             root_item: 根级目录项(用于查找次级)
+            chapter_classification: 一级分类编码,用于二级分类映射
 
         Returns:
-            list: 次级目录列表
+            list: 次级目录列表(包含secondary_category_code)
         """
         sub_items = []
         current_index = toc_items.index(parent_item)
@@ -614,11 +701,66 @@ class DocumentProcessor:
 
             # 只收集次级目录(比父级高)
             if item_level > parent_level:
-                sub_items.append({
+                sub_item = {
                     'title': item['title'],
                     'page': item['page'],
                     'level': item_level,
                     'original': item.get('original', item['title'])
-                })
-
-        return sub_items
+                }
+                
+                # 添加二级分类编码映射
+                if chapter_classification:
+                    secondary_code = self._map_title_to_secondary_code(
+                        item['title'], chapter_classification
+                    )
+                    if secondary_code:
+                        sub_item['secondary_category_code'] = secondary_code
+                
+                sub_items.append(sub_item)
+
+        return sub_items
+
+    def _map_title_to_secondary_code(self, title: str, chapter_classification: str) -> Optional[str]:
+        """
+        根据小节标题和一级分类,映射到二级分类编码
+        
+        Args:
+            title: 小节标题(如"五、施工方案及操作要求")
+            chapter_classification: 一级分类编码(如"technology")
+            
+        Returns:
+            str: 二级分类编码,如"Operations",未匹配则返回None
+        """
+        if not title or not chapter_classification:
+            return None
+        
+        # 清理标题(去除序号,如"一、""1.""(1)"等)
+        import re
+        cleaned_title = re.sub(r'^[((]?[一二三四五六七八九十0-9]+[))]?[、.\s]*', '', title)
+        cleaned_title = re.sub(r'^\d+[.\s]+', '', cleaned_title)
+        cleaned_title = cleaned_title.strip()
+        
+        # 获取该一级分类下的关键词映射
+        category_keywords = SECONDARY_CATEGORY_KEYWORDS.get(chapter_classification, {})
+        if not category_keywords:
+            return None
+        
+        # 基于关键词匹配
+        best_match = None
+        best_score = 0
+        
+        for code, keywords in category_keywords.items():
+            score = 0
+            for keyword in keywords:
+                if keyword in cleaned_title:
+                    score += len(keyword)  # 关键词越长,权重越高
+            
+            # 完全匹配加分
+            if cleaned_title in keywords:
+                score += 10
+                
+            if score > best_score:
+                best_score = score
+                best_match = code
+        
+        return best_match if best_score > 0 else None

+ 108 - 65
core/construction_review/component/reviewers/catalogues_check/catalogues_check.py

@@ -40,7 +40,7 @@ class CatalogCheckProcessor:
 
     def load_specifications(self, spec_file: str) -> Dict[str, Dict[str, Any]]:
         """
-        加载规范要求文件
+        加载规范要求文件(统一使用 StandardCategoryTable.csv 格式)
 
         Args:
             spec_file: 规范文件路径
@@ -59,48 +59,32 @@ class CatalogCheckProcessor:
 
         specifications = {}
 
-        # 尝试不同的编码读取文件:先尝试 utf-8-sig(支持 BOM),然后尝试 utf-16,最后尝试中文编码(GBK/GB2312/GB18030)
-        encodings = ['utf-8-sig', 'utf-16', 'gbk', 'gb2312', 'gb18030']
-        content = None
-        used_encoding = None
-
-        for encoding in encodings:
-            try:
-                with open(spec_file, 'r', encoding=encoding) as f:
-                    content = f.read()
-                used_encoding = encoding
-                logger.info(f"成功使用 {encoding} 编码读取文件")
-                break
-            except UnicodeDecodeError:
-                continue
-
-        if content is None:
-            raise ValueError(f"无法使用常见编码读取文件: {spec_file}")
-
-        # 解析CSV内容(使用制表符作为分隔符)
-        lines = content.strip().split('\n')
-        reader = csv.reader(lines, delimiter='\t')
-
-        # 跳过标题行
-        next(reader, None)
-
-        for row in reader:
-            if len(row) >= 3:
-                label = row[0].strip()
-                primary_dir = row[1].strip()
-                secondary_dir = row[2].strip()
-
-                if label not in specifications:
-                    specifications[label] = {
-                        "一级目录": primary_dir,
-                        "二级目录": []
-                    }
-
-                # 避免重复添加
-                if secondary_dir not in specifications[label]["二级目录"]:
-                    specifications[label]["二级目录"].append(secondary_dir)
+        # 使用pandas读取CSV(StandardCategoryTable.csv 格式)
+        # 列名:first_contents_code, first_contents, second_contents_code, second_contents, ...
+        try:
+            df = pd.read_csv(spec_file, encoding='utf-8')
+            logger.info(f"成功使用pandas读取文件,共 {len(df)} 行")
+            
+            # 按一级分类分组,提取二级目录
+            for first_code in df['first_contents_code'].unique():
+                chapter_df = df[df['first_contents_code'] == first_code]
+                first_name = chapter_df['first_contents'].iloc[0]
+                
+                # 获取该章节的所有二级目录(去重)
+                secondary_dirs = chapter_df['second_contents'].unique().tolist()
+                
+                specifications[first_code] = {
+                    "一级目录": first_name,
+                    "二级目录": secondary_dirs
+                }
+                
+        except Exception as e:
+            logger.error(f"使用pandas读取文件失败: {e}")
+            raise ValueError(f"无法读取规范文件: {spec_file}")
 
         logger.info(f"加载规范要求完成,共 {len(specifications)} 个标签")
+        for label, spec in specifications.items():
+            logger.info(f"  {label}: {spec['一级目录']} - {len(spec['二级目录'])}个二级目录")
         return specifications
 
     def load_catalog_data(self, csv_file: str) -> List[Dict[str, Any]]:
@@ -185,8 +169,19 @@ class CatalogCheckProcessor:
         if not subsections:
             return "待审查目录为空"
 
-        titles = [item.get('title', '') for item in subsections]
-        return f"待审查目录包含:{'、'.join(titles)}"
+        # 移除标题中的序号前缀(如"一、"、"1."等),只保留纯名称
+        import re
+        titles = []
+        for item in subsections:
+            title = item.get('title', '')
+            # 移除开头的序号,如"一、"、"1."、"1、"等
+            cleaned_title = re.sub(r'^[一二三四五六七八九十零]+[、,,..\s]+', '', title)
+            cleaned_title = re.sub(r'^\d+[..、,,\s]+', '', cleaned_title)
+            titles.append(cleaned_title)
+        
+        result = f"待审查目录包含:{'、'.join(titles)}"
+        logger.info(f"[目录内容构建] 原始标题数: {len(subsections)}, 清理后: {result}")
+        return result
 
     async def check_catalog(
         self,
@@ -281,32 +276,70 @@ class CatalogCheckProcessor:
 
             spec = specifications[label]
 
-            # 构造规范要求文本
-            requirements = self.build_requirements_text(spec)
-
-            # 构造待审查目录文本
-            catalog_content = self.build_catalog_content_text(catalog['subsections'])
-
-            # 检查目录
-            missing_items = await self.check_catalog(
-                chapter_title=catalog['title'],
-                catalog_content=catalog_content,
-                requirements=requirements
-            )
-
-            # 记录结果
+            # 【新逻辑】使用二级分类编码进行精确匹配
+            subsections = catalog.get('subsections', [])
+            
+            # 提取实际存在的二级分类编码
+            actual_codes = set()
+            for sub in subsections:
+                code = sub.get('secondary_category_code', '')
+                if code:
+                    actual_codes.add(code)
+            
+            # 获取规范要求的二级目录编码和名称映射
+            # 从StandardCategoryTable.csv加载编码映射
+            spec_codes = set()
+            code_to_name = {}
+            # 这里需要从spec_file重新加载获取编码信息
+            # 简化处理:通过名称匹配(因为规范列表是顺序的)
+            spec_secondary_names = spec.get("二级目录", [])
+            
+            # 提取实际存在的目录名称(清理序号后)
+            import re
+            actual_names = set()
+            for sub in subsections:
+                title = sub.get('title', '')
+                # 移除序号
+                cleaned = re.sub(r'^[一二三四五六七八九十零]+[、,,..\s]+', '', title)
+                cleaned = re.sub(r'^\d+[..、,,\s]+', '', cleaned)
+                if cleaned:
+                    actual_names.add(cleaned)
+            
+            # 找出缺失的目录(使用模糊匹配)
+            missing_names = []
+            for spec_name in spec_secondary_names:
+                # 检查是否匹配(使用包含关系或相似度)
+                found = False
+                for actual_name in actual_names:
+                    # 直接包含或双向包含
+                    if spec_name in actual_name or actual_name in spec_name:
+                        found = True
+                        break
+                    # 编辑距离检查(允许1-2个字的差异)
+                    if len(spec_name) > 3 and len(actual_name) > 3:
+                        # 简化的相似度检查:共同子串长度
+                        common_len = len(set(spec_name) & set(actual_name))
+                        if common_len >= min(len(spec_name), len(actual_name)) - 2:
+                            found = True
+                            break
+                if not found:
+                    missing_names.append(spec_name)
+            
+            logger.info(f"[目录审查v2] 章节: {catalog.get('title', 'N/A')}, label: {label}")
+            logger.info(f"[目录审查v2] 规范要求: {spec_secondary_names}")
+            logger.info(f"[目录审查v2] 实际目录: {list(actual_names)}")
+            logger.info(f"[目录审查v2] 缺失目录: {missing_names}")
+
+            # 记录结果 - 直接存储缺失名称列表
             results.append({
                 'index': catalog['index'],
                 'title': catalog['title'],
                 'chapter_classification': label,
-                'missing_items': missing_items,
-                'specification_items': spec["二级目录"]
+                'missing_items': json.dumps(missing_names, ensure_ascii=False) if missing_names else json.dumps([], ensure_ascii=False),
+                'specification_items': spec_secondary_names
             })
 
-            logger.info(f"审查结果: {missing_items}")
-
-        # 将缺失项的数字替换为对应的项名称
-        results = self._replace_missing_numbers_with_names(results, specifications)
+            logger.info(f"审查结果: 缺失{len(missing_names)}项")
 
         return results
 
@@ -607,6 +640,13 @@ def process_catalog_review_list(catogues_df: pd.DataFrame) -> List[Dict[str, Any
 
 async def catalogues_check(catalog_file = None):
     """主函数"""
+    logger.info("[版本标记] catalogues_check v2.0 - 使用StandardCategoryTable.csv")
+    logger.info(f"[catalogues_check入口] 被调用,catalog_file类型: {type(catalog_file)}")
+    if isinstance(catalog_file, list):
+        logger.info(f"[catalogues_check入口] catalog_file长度: {len(catalog_file)}")
+        if catalog_file:
+            logger.info(f"[catalogues_check入口] 第一个元素标题: {catalog_file[0].get('title', 'N/A')}")
+    
     # 获取当前文件所在目录
     current_dir = Path(__file__).parent
     
@@ -620,14 +660,17 @@ async def catalogues_check(catalog_file = None):
     # 创建目录审查处理器
     catalog_processor = CatalogCheckProcessor(processor)
 
-    # 定义文件路径
-    spec_file = str(current_dir / "config" / "Construction_Plan_Content_Specification.csv")
+    # 定义文件路径 - 统一使用 StandardCategoryTable.csv
+    # 从当前位置向上找到 doc_worker/config 目录
+    spec_file = str(current_dir.parent.parent / "doc_worker" / "config" / "StandardCategoryTable.csv")
 
     # 处理所有章节
     results = await catalog_processor.process_all_catalogs(
         spec_file=spec_file,
         catalog_file=catalog_file
     )
+    
+    logger.info(f"[catalogues_check出口] 返回结果数量: {len(results)}")
     return results
 
 async def main():

BIN
core/construction_review/component/reviewers/catalogues_check/config/Construction_Plan_Content_Specification.csv


+ 36 - 12
core/construction_review/component/reviewers/check_completeness/completeness_checker.py

@@ -803,7 +803,7 @@ async def check_completeness(
 
 def result_to_dict(result: CompletenessResult) -> Dict[str, Any]:
     """
-    将审查结果转换为字典格式(兼容原有接口
+    将审查结果转换为字典格式(与参数合规审查格式一致
 
     Args:
         result: 完整性审查结果
@@ -811,21 +811,45 @@ def result_to_dict(result: CompletenessResult) -> Dict[str, Any]:
     Returns:
         Dict[str, Any]: 字典格式的结果
     """
+    # 转换风险等级为标准格式
+    def convert_risk_level(risk: str) -> str:
+        risk_lower = str(risk).lower()
+        if "高" in risk_lower or "high" in risk_lower:
+            return "high"
+        elif "中" in risk_lower or "medium" in risk_lower:
+            return "medium"
+        else:
+            return "low"
+
+    # 构建 response 列表,格式与参数合规审查一致
+    response_list = []
+    for issue in result.issues:
+        risk_level = convert_risk_level(issue.risk_level)
+        response_list.append({
+            "check_item": "completeness_check",
+            "chapter_code": "completeness",
+            "check_item_code": "completeness_check",
+            "check_result": {
+                "issue_point": issue.issue_point,
+                "location": issue.location,
+                "suggestion": issue.suggestion,
+                "reason": issue.reason,
+                "reference_source": issue.reference_source
+            },
+            "exist_issue": True,
+            "risk_info": {"risk_level": risk_level},
+            # 兼容字段(保留原有字段,便于前端展示)
+            "location": issue.location,
+            "suggestion": issue.suggestion,
+            "reason": issue.reason,
+            "reference_source": issue.reference_source
+        })
+
     return {
         "success": result.success,
         "details": {
             "name": "completeness_check",
-            "response": [
-                {
-                    "issue_point": issue.issue_point,
-                    "location": issue.location,
-                    "suggestion": issue.suggestion,
-                    "reason": issue.reason,
-                    "risk_level": issue.risk_level,
-                    "reference_source": issue.reference_source
-                }
-                for issue in result.issues
-            ],
+            "response": response_list,
             "chapter_issues_count": len(result.chapter_issues),
             "content_issues_count": len(result.content_issues),
             "review_location_label": result.issues[-1].location if result.issues else "",

+ 15 - 3
core/construction_review/component/reviewers/check_completeness/components/result_analyzer.py

@@ -284,14 +284,26 @@ class ResultAnalyzer(IResultAnalyzer):
             issue_point = f"【内容不完整】{section_label}的'{level2_name}'部分缺少{missing_count}个要点"
             suggestion = f"请补充'{level2_name}'的第{','.join(map(str, missing_points))}点内容:{missing_content_text}"
             risk_level = self._map_risk_level(len(missing_points))
+            risk_level_std = "high" if "高" in risk_level else ("medium" if "中" in risk_level else "low")
 
-            # 构建问题项并添加到列表
+            # 构建问题项并添加到列表(与参数合规审查格式一致)
             issue_item = {
-                "issue_point": issue_point,
+                "check_item": "completeness_check",
+                "chapter_code": row.get("标签", "completeness"),
+                "check_item_code": f"{row.get('标签', 'completeness')}_completeness_check",
+                "check_result": {
+                    "issue_point": issue_point,
+                    "location": row.get("section_label", ""),
+                    "suggestion": suggestion,
+                    "reason": f"根据《桥梁公司危险性较大工程管理实施细则(2025版)》,{section_label}的'{level2_name}'应包含:{requirement}。当前缺失:{missing_content_text}",
+                    "reference_source": reference_source
+                },
+                "exist_issue": True,
+                "risk_info": {"risk_level": risk_level_std},
+                # 兼容字段(保留原有字段,便于前端展示)
                 "location": row.get("section_label", ""),
                 "suggestion": suggestion,
                 "reason": f"根据《桥梁公司危险性较大工程管理实施细则(2025版)》,{section_label}的'{level2_name}'应包含:{requirement}。当前缺失:{missing_content_text}",
-                "risk_level": risk_level,
                 "review_references": review_references,
                 "reference_source": reference_source
             }

+ 50 - 9
core/construction_review/component/reviewers/check_completeness/lightweight_completeness_checker.py

@@ -178,7 +178,8 @@ class LightweightCompletenessChecker:
     async def check(
         self,
         chunks: List[Dict],
-        outline: Optional[List[Dict]] = None
+        outline: Optional[List[Dict]] = None,
+        chapter_classification: Optional[str] = None
     ) -> LightweightCompletenessResult:
         """
         执行轻量级完整性检查
@@ -186,6 +187,7 @@ class LightweightCompletenessChecker:
         Args:
             chunks: 文档分块列表,每个chunk需包含tertiary_category_code
             outline: 目录结构(可选)
+            chapter_classification: 一级章节分类代码(可选),如果提供则只检查该章节下的分类
         
         Returns:
             LightweightCompletenessResult
@@ -195,10 +197,10 @@ class LightweightCompletenessChecker:
         actual_tertiary = self._extract_tertiary_from_chunks(chunks)
         
         # 2. 目录审查(二级)
-        catalogue_result = self._check_catalogue(actual_secondary)
+        catalogue_result = self._check_catalogue(actual_secondary, chapter_classification)
         
         # 3. 完整性审查(三级)- 核心
-        tertiary_result = self._check_tertiary_completeness(actual_tertiary)
+        tertiary_result = self._check_tertiary_completeness(actual_tertiary, chapter_classification)
         
         # 4. 大纲审查(二级)
         outline_result = None
@@ -244,13 +246,24 @@ class LightweightCompletenessChecker:
                 actual.add((cat1, cat2, cat3))
         return actual
     
-    def _check_catalogue(self, actual_secondary: Set[Tuple[str, str]]) -> Dict[str, Any]:
+    def _check_catalogue(self, actual_secondary: Set[Tuple[str, str]], 
+                         chapter_classification: Optional[str] = None) -> Dict[str, Any]:
         """
         目录审查(二级粒度)
         检查实际存在的二级分类与标准的差异
+        
+        Args:
+            actual_secondary: 实际存在的二级分类集合
+            chapter_classification: 如果提供,只检查该一级章节下的二级分类
         """
-        required = set(self.secondary_specs.keys())
-        actual = actual_secondary
+        # 如果指定了章节,只过滤该章节的标准分类
+        if chapter_classification:
+            required = {(cat1, cat2) for (cat1, cat2) in self.secondary_specs.keys() if cat1 == chapter_classification}
+            # 只保留同一章节的实际分类
+            actual = {(cat1, cat2) for (cat1, cat2) in actual_secondary if cat1 == chapter_classification}
+        else:
+            required = set(self.secondary_specs.keys())
+            actual = actual_secondary
         
         missing = required - actual
         extra = actual - required
@@ -293,7 +306,8 @@ class LightweightCompletenessChecker:
     
     def _check_tertiary_completeness(
         self,
-        actual_tertiary: Set[Tuple[str, str, str]]
+        actual_tertiary: Set[Tuple[str, str, str]],
+        chapter_classification: Optional[str] = None
     ) -> Dict[str, Any]:
         """
         三级完整性审查(核心方法,无LLM)
@@ -302,9 +316,19 @@ class LightweightCompletenessChecker:
         1. 对比标准三级 vs 实际三级
         2. 找出缺失的三级(无内容)
         3. 按二级分组统计
+        
+        Args:
+            actual_tertiary: 实际存在的三级分类集合
+            chapter_classification: 如果提供,只检查该一级章节下的三级分类
         """
-        required = set(self.tertiary_specs.keys())
-        actual = actual_tertiary
+        # 如果指定了章节,只过滤该章节的标准分类
+        if chapter_classification:
+            required = {(cat1, cat2, cat3) for (cat1, cat2, cat3) in self.tertiary_specs.keys() if cat1 == chapter_classification}
+            # 只保留同一章节的实际分类
+            actual = {(cat1, cat2, cat3) for (cat1, cat2, cat3) in actual_tertiary if cat1 == chapter_classification}
+        else:
+            required = set(self.tertiary_specs.keys())
+            actual = actual_tertiary
         
         present = required & actual
         missing = required - actual
@@ -381,7 +405,24 @@ class LightweightCompletenessChecker:
         outline_secondary = set()
         outline_secondary_details = {}
         
+        # 验证outline格式
+        if not isinstance(outline, list):
+            return {
+                "level": "secondary",
+                "is_consistent": True,
+                "outline_secondary_count": 0,
+                "content_secondary_count": len(actual_secondary),
+                "matched_count": 0,
+                "match_rate": "N/A",
+                "empty_sections": [],
+                "unclassified_content": [],
+                "error": f"outline格式不正确,期望list,实际为{type(outline).__name__}"
+            }
+        
         for item in outline:
+            # 跳过非字典项
+            if not isinstance(item, dict):
+                continue
             cat1 = item.get("chapter_classification")
             cat2 = item.get("secondary_category_code")
             if cat1 and cat2:

+ 39 - 15
core/construction_review/component/reviewers/check_completeness/tertiary_completeness_checker.py

@@ -662,7 +662,7 @@ async def check_completeness_tertiary(
 
 def result_to_dict(result: CompletenessResult) -> Dict[str, Any]:
     """
-    将审查结果转换为字典格式(兼容原有接口
+    将审查结果转换为字典格式(与参数合规审查格式一致
 
     Args:
         result: 完整性审查结果
@@ -670,26 +670,50 @@ def result_to_dict(result: CompletenessResult) -> Dict[str, Any]:
     Returns:
         Dict[str, Any]: 字典格式的结果
     """
+    # 转换风险等级为标准格式
+    def convert_risk_level(risk: str) -> str:
+        risk_lower = str(risk).lower()
+        if "高" in risk_lower or "high" in risk_lower:
+            return "high"
+        elif "中" in risk_lower or "medium" in risk_lower:
+            return "medium"
+        else:
+            return "low"
+
+    # 构建 response 列表,格式与参数合规审查一致
+    response_list = []
+    for issue in result.issues:
+        risk_level = convert_risk_level(issue.risk_level)
+        response_list.append({
+            "check_item": "completeness_check",
+            "chapter_code": "catalogue",
+            "check_item_code": "catalogue_completeness_check",
+            "check_result": {
+                "issue_point": issue.issue_point,
+                "location": issue.location,
+                "suggestion": issue.suggestion,
+                "reason": issue.reason,
+                "reference_source": issue.reference_source,
+                "missing_focus": issue.missing_focus
+            },
+            "exist_issue": True,
+            "risk_info": {"risk_level": risk_level},
+            # 兼容字段(保留原有字段,便于前端展示)
+            "location": issue.location,
+            "suggestion": issue.suggestion,
+            "reason": issue.reason,
+            "reference_source": issue.reference_source
+        })
+
     return {
         "success": result.success,
         "details": {
-            "name": "tertiary_completeness_check",
-            "response": [
-                {
-                    "issue_point": issue.issue_point,
-                    "location": issue.location,
-                    "suggestion": issue.suggestion,
-                    "reason": issue.reason,
-                    "risk_level": issue.risk_level,
-                    "reference_source": issue.reference_source,
-                    "missing_focus": issue.missing_focus
-                }
-                for issue in result.issues
-            ],
+            "name": "completeness_check",
+            "response": response_list,
             "secondary_issues_count": len(result.secondary_issues),
             "tertiary_issues_count": len(result.tertiary_issues),
             "review_location_label": result.issues[-1].location if result.issues else "",
-            "chapter_code": "tertiary_completeness",
+            "chapter_code": "catalogue",
             "original_content": ""
         },
         "summary": result.summary,

+ 21 - 17
core/construction_review/component/reviewers/utils/inter_tool.py

@@ -361,27 +361,31 @@ class InterTool:
                 if check_name  not in TRCH_CHECK_ITEMS:
                     logger.debug(f"检查项 {check_name} 无参考来源,直接解析响应")
 
-                    # 🔧 检查 response 是否已经是列表格式(如 outline_check 返回的格式)
+                    # 🔧 检查 response 是否已经是列表格式(如 completeness_check 返回的格式)
                     if isinstance(response, list):
                         check_issues = []
                         for issue_item in response:
                             if isinstance(issue_item, dict):
-                                # 直接使用已格式化的项,确保必要字段存在
-                                formatted_issue = {
-                                    "check_item": check_name,
-                                    "chapter_code": details.get("chapter_code", chapter_code),  # 优先使用 details 中的 chapter_code
-                                    "check_item_code": f"{details.get('chapter_code', chapter_code)}_{check_name}",
-                                    "check_result": issue_item.get("issue_point", ""),
-                                    "exist_issue": True,
-                                    "risk_info": {"risk_level": issue_item.get("risk_level", "medium")},
-                                    # 保留原始字段
-                                    "location": issue_item.get("location", ""),
-                                    "suggestion": issue_item.get("suggestion", ""),
-                                    "reason": issue_item.get("reason", ""),
-                                    "review_references": issue_item.get("review_references", ""),
-                                    "reference_source": issue_item.get("reference_source", "")
-                                }
-                                check_issues.append(formatted_issue)
+                                # 如果已经是标准格式(包含 check_item 字段),直接使用
+                                if "check_item" in issue_item:
+                                    check_issues.append(issue_item)
+                                else:
+                                    # 兼容旧格式:转换为标准格式
+                                    formatted_issue = {
+                                        "check_item": check_name,
+                                        "chapter_code": details.get("chapter_code", chapter_code),
+                                        "check_item_code": f"{details.get('chapter_code', chapter_code)}_{check_name}",
+                                        "check_result": issue_item.get("issue_point", ""),
+                                        "exist_issue": True,
+                                        "risk_info": {"risk_level": issue_item.get("risk_level", "medium")},
+                                        # 保留原始字段
+                                        "location": issue_item.get("location", ""),
+                                        "suggestion": issue_item.get("suggestion", ""),
+                                        "reason": issue_item.get("reason", ""),
+                                        "review_references": issue_item.get("review_references", ""),
+                                        "reference_source": issue_item.get("reference_source", "")
+                                    }
+                                    check_issues.append(formatted_issue)
                         logger.debug(f"检查项 {check_name} 的 response 已是列表格式,直接处理 {len(check_issues)} 个问题项")
                     else:
                         # response 是字符串,使用原有的解析逻辑

+ 76 - 10
core/construction_review/component/reviewers/utils/reference_matcher.py

@@ -2,12 +2,15 @@
 # -*- coding: utf-8 -*-
 
 import json
-from typing import List
+import asyncio
+import time
+from typing import List, Optional
 
 from pydantic import BaseModel, Field, ValidationError
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_core.output_parsers import PydanticOutputParser, StrOutputParser
 from foundation.ai.agent.generate.model_generate import generate_model_client
+from foundation.observability.logger.loggering import review_logger as logger
 
 
 # ===== 1) 定义结构 =====
@@ -91,7 +94,36 @@ prompt = ChatPromptTemplate.from_messages([
 # ===== 5) LLM Client (通用模型底座) =====
 model_client = generate_model_client
 
-# ===== 6) 提取第一个 JSON =====
+# ===== 6) 重试配置 =====
+MAX_RETRIES = 5  # 最大重试次数
+BASE_DELAY = 2   # 基础等待时间(秒)
+MAX_DELAY = 30   # 最大等待时间(秒)
+
+
+def _is_retryable_error(error: Exception) -> bool:
+    """判断是否为可重试的错误(如 502、503、429 等临时性错误)"""
+    error_str = str(error).lower()
+    retryable_codes = ['502', '503', '504', '429', 'timeout', 'connection', 'overload']
+    return any(code in error_str for code in retryable_codes)
+
+
+def _get_user_friendly_error(error: Exception) -> str:
+    """将技术错误转换为用户友好的提示"""
+    error_str = str(error).lower()
+
+    if '502' in error_str or '503' in error_str or '504' in error_str:
+        return "模型服务暂时不可用,请稍后重试"
+    elif '429' in error_str or 'rate limit' in error_str:
+        return "请求过于频繁,请稍后重试"
+    elif 'timeout' in error_str:
+        return "模型响应超时,请稍后重试"
+    elif 'connection' in error_str:
+        return "网络连接异常,请检查网络后重试"
+    else:
+        return f"规范匹配服务暂时不可用,请稍后重试"
+
+
+# ===== 7) 提取第一个 JSON =====
 def extract_first_json(text: str) -> dict:
     """从任意模型输出中提取第一个完整 JSON 对象 { ... }"""
     start = text.find("{")
@@ -111,17 +143,20 @@ def extract_first_json(text: str) -> dict:
     raise ValueError("JSON 花括号未闭合")
 
 
-# ===== 7) 核心方法 =====
+# ===== 8) 核心方法(带重试和退避策略)=====
 async def match_reference_files(reference_text: str, review_text: str) -> str:
     """
     从参考规范库中查找审查规范的匹配信息
-    
+
     Args:
         reference_text: 参考规范库内容
         review_text: 审查规范内容
-        
+
     Returns:
         匹配结果的JSON字符串
+
+    Raises:
+        RuntimeError: 当重试次数耗尽后仍失败时抛出
     """
     format_instructions = parser.get_format_instructions()
 
@@ -133,22 +168,53 @@ async def match_reference_files(reference_text: str, review_text: str) -> str:
     )
 
     last_err = None
-    for _ in range(2):
+
+    for attempt in range(1, MAX_RETRIES + 1):
         try:
+            logger.info(f"[规范匹配] 第 {attempt}/{MAX_RETRIES} 次尝试调用模型 qwen3_30b")
+
             raw = await model_client.get_model_generate_invoke(
                 trace_id="reference_match",
                 messages=messages,
                 model_name="qwen3_30b"
             )
-            print(f"[规范匹配] 模型输出: {raw}...")
+
+            logger.debug(f"[规范匹配] 模型输出: {raw[:200]}...")
             data = extract_first_json(raw)
             findings = MatchResults.model_validate(data)
             result = [x.model_dump() for x in findings.items]
+
+            logger.info(f"[规范匹配] 成功,返回 {len(result)} 个匹配结果")
             return json.dumps(result, ensure_ascii=False, indent=2)
-        except (Exception, ValidationError, json.JSONDecodeError) as e:
-            last_err = e
 
-    raise RuntimeError(f"规范匹配失败:{last_err}") from last_err
+        except Exception as e:
+            last_err = e
+            error_type = type(e).__name__
+            logger.warning(f"[规范匹配] 第 {attempt} 次尝试失败: {error_type}: {str(e)[:100]}")
+
+            # 判断是否可重试
+            if not _is_retryable_error(e):
+                logger.error(f"[规范匹配] 遇到不可重试的错误: {error_type}")
+                raise RuntimeError(_get_user_friendly_error(e)) from e
+
+            # 如果还有重试机会,计算等待时间并等待
+            if attempt < MAX_RETRIES:
+                # 指数退避:等待时间 = min(BASE_DELAY * 2^attempt, MAX_DELAY)
+                delay = min(BASE_DELAY * (2 ** (attempt - 1)), MAX_DELAY)
+                # 添加随机抖动避免惊群效应
+                import random
+                jitter = random.uniform(0, 1)
+                actual_delay = delay + jitter
+
+                logger.info(f"[规范匹配] 等待 {actual_delay:.1f} 秒后重试...")
+                await asyncio.sleep(actual_delay)
+            else:
+                # 重试次数耗尽
+                logger.error(f"[规范匹配] 达到最大重试次数 {MAX_RETRIES},最终失败")
+                raise RuntimeError(_get_user_friendly_error(e)) from e
+
+    # 理论上不会执行到这里
+    raise RuntimeError(_get_user_friendly_error(last_err))
 
 
 # ===== 8) 示例 =====

+ 64 - 20
core/construction_review/workflows/ai_review_workflow.py

@@ -310,11 +310,25 @@ class AIReviewWorkflow:
             # 根据标准配置对review_item_config进行排序
             review_item_dict_sorted = self.core_fun._check_item_mapping_order(review_item_config)
             logger.info(f"审查项配置解析完成: {review_item_dict_sorted}")
-            outline_content_list = []
-            # 3️ 获取结构化内容
+            
+            # 3️ 获取结构化内容
             original_chunks = state.get("structured_content", {}).get("chunks", [])
-            original_outline = state.get("structured_content", {}).get("outline", [])["chapters"]
-
+            
+            # 安全获取 outline - 兼容无目录的情况(如某些DOCX文档)
+            outline_data = state.get("structured_content", {}).get("outline", {})
+            if isinstance(outline_data, dict):
+                original_outline = outline_data.get("chapters", [])
+            else:
+                # outline 不是字典(可能是列表或None),视为无目录
+                original_outline = []
+                logger.warning("文档 outline 格式异常或非字典类型,视为无目录结构")
+            
+            # 如果没有目录,记录警告并继续(跳过目录审查)
+            if not original_outline:
+                logger.warning("文档未检测到目录结构,将跳过目录完整性审查")
+            
+            outline_content_list = []
+            
             def extract_original_recursive(content):
                 """递归提取 original 字段,包括主目录和所有子目录"""
                 result = []
@@ -335,23 +349,53 @@ class AIReviewWorkflow:
 
             # 4. 最终拼接成字符串(列表join效率远高于str+=)
             outline_content_str = "".join(outline_content_list)
-            cache.document_temp(original_outline, base_cache_dir=CacheBaseDir.CONSTRUCTION_REVIEW)
-            logger.info(f"开始目录审查")
-            outline_results = await catalogues_check(original_outline)
-            outline_results = pd.DataFrame(outline_results)
-
-            # 初始化 miss_outline 和 common_elements_list 列为空列表的 JSON 字符串
-            outline_results['miss_outline'] = outline_results.get('miss_outline', '[]')
-            outline_results['common_elements_list'] = outline_results.get('common_elements_list', '[]')
-            cache.document_temp(outline_results, base_cache_dir=CacheBaseDir.CONSTRUCTION_REVIEW)
             
-            # 存储到 Redis(使用 callback_task_id 作为任务 ID)
-            try:
-                redis_manager = get_redis_manager()
-                redis_manager.store_dataframe(outline_results, state["callback_task_id"])
-                logger.info(f"目录审查结果已存储到 Redis,task_id: {state['callback_task_id']}")
-            except Exception as e:
-                logger.warning(f"存储目录审查结果到 Redis 失败: {e}")
+            # 目录审查 - 只有在有目录时才执行
+            if original_outline:
+                # 【调试日志】检查传入的目录数据结构(移到最前面)
+                logger.info(f"[工作流-目录审查] 开始目录审查,章节数量: {len(original_outline) if isinstance(original_outline, list) else 'N/A'}")
+                if isinstance(original_outline, list) and len(original_outline) > 0:
+                    first_chapter = original_outline[0]
+                    logger.info(f"[工作流-目录审查] 第一章节标题: {first_chapter.get('title', 'N/A')}")
+                    logger.info(f"[工作流-目录审查] 第一章节classification: {first_chapter.get('chapter_classification', 'N/A')}")
+                    subsections = first_chapter.get('subsections', [])
+                    logger.info(f"[工作流-目录审查] 第一章节subsections数量: {len(subsections)}")
+                    if subsections:
+                        logger.info(f"[工作流-目录审查] 第一章节subsections标题: {[s.get('title') for s in subsections[:5]]}")
+                
+                logger.info(f"[工作流-目录审查] 即将调用 catalogues_check")
+                try:
+                    outline_results = await catalogues_check(original_outline)
+                    logger.info(f"[工作流-目录审查] catalogues_check 调用完成,结果数量: {len(outline_results) if isinstance(outline_results, list) else 'N/A'}")
+                except Exception as e:
+                    logger.error(f"[工作流-目录审查] catalogues_check 调用异常: {e}", exc_info=True)
+                    outline_results = []
+                
+                try:
+                    cache.document_temp(original_outline, base_cache_dir=CacheBaseDir.CONSTRUCTION_REVIEW)
+                except Exception as e:
+                    logger.warning(f"[工作流-目录审查] 缓存original_outline失败: {e}")
+                
+                outline_results = pd.DataFrame(outline_results)
+
+                # 初始化 miss_outline 和 common_elements_list 列为空列表的 JSON 字符串
+                outline_results['miss_outline'] = outline_results.get('miss_outline', '[]')
+                outline_results['common_elements_list'] = outline_results.get('common_elements_list', '[]')
+                
+                try:
+                    cache.document_temp(outline_results, base_cache_dir=CacheBaseDir.CONSTRUCTION_REVIEW)
+                except Exception as e:
+                    logger.warning(f"[工作流-目录审查] 缓存outline_results失败: {e}")
+                
+                # 存储到 Redis(使用 callback_task_id 作为任务 ID)
+                try:
+                    redis_manager = get_redis_manager()
+                    redis_manager.store_dataframe(outline_results, state["callback_task_id"])
+                    logger.info(f"目录审查结果已存储到 Redis,task_id: {state['callback_task_id']}")
+                except Exception as e:
+                    logger.warning(f"存储目录审查结果到 Redis 失败: {e}")
+            else:
+                logger.info("无目录结构,跳过目录审查")
 
             # 预处理:根据 review_item_dict_sorted 中的 key 对 structured_content 进行筛选
             filtered_chunks = [

+ 18 - 2
core/construction_review/workflows/core_functions/ai_review_core_fun.py

@@ -229,7 +229,7 @@ class AIReviewCoreFun:
             chunks: chunks列表
 
         Returns:
-            Dict[str, List[Dict[str, Any]]]: 按章节分组的chunks
+            Tuple[Dict[str, List[Dict[str, Any]]], List[str]]: 按章节分组的chunks和章节名称列表
         """
         chapter_map = {}
 
@@ -240,7 +240,10 @@ class AIReviewCoreFun:
                 chapter_map[chapter_code] = []
 
             chapter_map[chapter_code].append(chunk)
-            chapter_names = list(chapter_map.keys())
+        
+        # 修复:当 chunks 为空时,chapter_names 也需要有定义
+        chapter_names = list(chapter_map.keys())
+        
         return chapter_map, chapter_names
 
     async def _execute_chunk_methods(self, chapter_code: str, chunk: Dict[str, Any], chunk_index: int, func_names: List[str], state: AIReviewState) -> Dict[str, Any]:
@@ -559,6 +562,19 @@ class AIReviewCoreFun:
             )
 
         else:
+            # 处理 check_completeness 但 is_complete_field=False 的情况
+            if func_name == "check_completeness" and not is_complete_field:
+                logger.debug(f"跳过 {func_name},当前 chunk 不是完整性审查类型")
+                return UnitReviewResult(
+                    unit_index=chunk_index,
+                    unit_content=chunk,
+                    basic_compliance={},
+                    technical_compliance={},
+                    rag_enhanced={},
+                    overall_risk="low",
+                    is_sse_push=False  # 不推送,因为跳过了
+                )
+
             logger.warning(f"未知的审查方法: {func_name}")
             logger.warning(f"is_complete_field: {is_complete_field}")
             return UnitReviewResult(

+ 19 - 2
foundation/ai/agent/generate/model_generate.py

@@ -37,9 +37,19 @@ class GenerateModelClient:
     async def _retry_with_backoff(self, func: Callable, *args, timeout: Optional[int] = None, **kwargs):
         """
         带指数退避的重试机制,每次重试都有独立的超时控制
+
+        注意:对于 502/503/504 等服务不可用错误,立即失败不重试,
+        避免在服务端过载时继续加重负载。
         """
         current_timeout = timeout or self.default_timeout
 
+        def _is_server_unavailable_error(error: Exception) -> bool:
+            """判断是否为服务端不可用错误(应立即失败)"""
+            error_str = str(error).lower()
+            # 502: Bad Gateway, 503: Service Unavailable, 504: Gateway Timeout
+            unavailable_codes = ['502', '503', '504', 'internal server error']
+            return any(code in error_str for code in unavailable_codes)
+
         for attempt in range(self.max_retries + 1):
             try:
                 # 每次重试都有独立的超时时间
@@ -56,12 +66,19 @@ class GenerateModelClient:
                 logger.warning(f"[模型调用] 第 {attempt + 1} 次超时, {wait_time}秒后重试...")
                 await asyncio.sleep(wait_time)
             except Exception as e:
+                error_str = str(e)
+
+                # 服务端不可用错误(502/503/504)立即失败,不重试
+                if _is_server_unavailable_error(e):
+                    logger.error(f"[模型调用] 服务端不可用,立即失败: {error_str}")
+                    raise
+
                 if attempt == self.max_retries:
-                    logger.error(f"[模型调用] 达到最大重试次数 {self.max_retries},最终失败: {str(e)}")
+                    logger.error(f"[模型调用] 达到最大重试次数 {self.max_retries},最终失败: {error_str}")
                     raise
 
                 wait_time = self.backoff_factor * (2 ** attempt)
-                logger.warning(f"[模型调用] 第 {attempt + 1} 次尝试失败: {str(e)}, {wait_time}秒后重试...")
+                logger.warning(f"[模型调用] 第 {attempt + 1} 次尝试失败: {error_str}, {wait_time}秒后重试...")
                 await asyncio.sleep(wait_time)
 
     async def get_model_generate_invoke(

+ 40 - 9
foundation/ai/rag/retrieval/entities_enhance.py

@@ -10,6 +10,14 @@ class EntitiesEnhance():
 
     def __init__(self):
         self.bfp_result_lists = []
+        self._entity_recall_cache = {}  # 实体检索结果缓存
+        self._bfp_recall_cache = {}     # BFP召回结果缓存
+
+    def _get_cache_key(self, entity: str, search_keywords: list, background: str = "") -> str:
+        """生成缓存键"""
+        keywords_str = "|".join(sorted(search_keywords)) if search_keywords else ""
+        return f"{entity}::{keywords_str}::{background[:50]}"
+
     @track_execution_time
     def entities_enhance_retrieval(self, query_pairs):
         def run_async(coro):
@@ -31,15 +39,32 @@ class EntitiesEnhance():
             search_keywords = query_pair['search_keywords']
             background = query_pair['background']
             server_logger.info(f"正在处理实体:{entity},辅助搜索词:{search_keywords},背景:{background}")
-            entity_list = run_async(retrieval_manager.entity_recall(
-                entity,
-                search_keywords,
-                recall_top_k=5,      # 主实体返回数量
-                max_results=5       # 最终最多返回20个实体文本
-            ))
 
-            # BFP背景增强召回
-            bfp_result = run_async(retrieval_manager.async_bfp_recall(entity_list, background, top_k=3))
+            # 检查 entity_recall 缓存
+            recall_cache_key = self._get_cache_key(entity, search_keywords)
+            if recall_cache_key in self._entity_recall_cache:
+                entity_list = self._entity_recall_cache[recall_cache_key]
+                server_logger.info(f"[缓存命中] entity_recall: {entity}")
+            else:
+                entity_list = run_async(retrieval_manager.entity_recall(
+                    entity,
+                    search_keywords,
+                    recall_top_k=5,      # 主实体返回数量
+                    max_results=5       # 最终最多返回5个实体文本
+                ))
+                self._entity_recall_cache[recall_cache_key] = entity_list
+                server_logger.info(f"[缓存存储] entity_recall: {entity}")
+
+            # 检查 bfp_recall 缓存
+            bfp_cache_key = self._get_cache_key(entity, search_keywords, background)
+            if bfp_cache_key in self._bfp_recall_cache:
+                bfp_result = self._bfp_recall_cache[bfp_cache_key]
+                server_logger.info(f"[缓存命中] bfp_recall: {entity}")
+            else:
+                # BFP背景增强召回
+                bfp_result = run_async(retrieval_manager.async_bfp_recall(entity_list, background, top_k=2))  # 降低到2,减少上下文量
+                self._bfp_recall_cache[bfp_cache_key] = bfp_result
+                server_logger.info(f"[缓存存储] bfp_recall: {entity}")
 
             # 为每个结果添加实体信息
             for result in bfp_result:
@@ -48,7 +73,13 @@ class EntitiesEnhance():
             self.bfp_result_lists.append(bfp_result)
 
         return self.bfp_result_lists
-            
+
+    def clear_cache(self):
+        """清空缓存"""
+        self._entity_recall_cache.clear()
+        self._bfp_recall_cache.clear()
+        server_logger.info("[缓存清理] 实体检索缓存已清空")
+
 
 
 

+ 2 - 2
foundation/ai/rag/retrieval/retrieval.py

@@ -163,7 +163,7 @@ class RetrievalManager:
         entity_result = await self.async_multi_stage_recall(
             collection_name=collection_name,
             query_text=main_entity,
-            hybrid_top_k=50,
+            hybrid_top_k=10,  # 降低召回数量,减少搜索耗时和上下文量
             top_k=recall_top_k
         )
         self.logger.info(f"[entity_recall] 主实体召回完成, 返回 {len(entity_result)} 个结果")
@@ -172,7 +172,7 @@ class RetrievalManager:
             self.async_multi_stage_recall(
                 collection_name=collection_name,
                 query_text=assisted_search_entity,
-                hybrid_top_k=50,
+                hybrid_top_k=10,  # 降低召回数量,减少搜索耗时和上下文量
                 top_k=recall_top_k
             ) for assisted_search_entity in assisted_search_entity
         ]

+ 2 - 2
foundation/infrastructure/messaging/celery_app.py

@@ -72,8 +72,8 @@ app.conf.update(
 
     # 任务配置
     task_track_started=True,
-    task_time_limit=600,           # 10分钟超时(文档处理较慢
-    task_soft_time_limit=540,      # 9分钟软超时
+    task_time_limit=3600,         # 1小时硬超时(RAG检索耗时较长
+    task_soft_time_limit=3540,    # 59分钟软超时
     worker_max_tasks_per_child=5,  # 每个worker进程最多处理5个任务后重启(防止内存泄漏)
 
     # 结果过期时间

+ 58 - 11
server/app.py

@@ -1,10 +1,17 @@
 import os
 import sys
 import logging
+import asyncio
 
 # Windows 平台 Celery 兼容性设置(必须在导入 celery 之前)
 if sys.platform == 'win32':
     os.environ.setdefault('FORKED_BY_MULTIPROCESSING', '1')
+    # Windows 上使用 SelectorEventLoop 避免 ProactorEventLoop 的 I/O 问题
+    # 这可以解决关闭时的 InvalidStateError
+    try:
+        asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
+    except AttributeError:
+        pass  # Python 版本不支持
 
 # 抑制 pymilvus 的 AsyncMilvusClient 警告(在多进程环境中没有事件循环)
 logging.getLogger('pymilvus').setLevel(logging.ERROR)
@@ -826,34 +833,71 @@ class ServerRunner:
         # 启动Celery Worker
         self.celery_manager.start_worker()
 
-        # 注册退出处理
+        # 注册退出处理 - 使用优雅关闭避免 asyncio 错误
         import atexit
-        atexit.register(self.celery_manager.stop_worker_immediately)
+        atexit.register(self._atexit_graceful_shutdown)
 
         # 设置信号处理
         self._setup_signal_handlers()
+    
+    def _atexit_graceful_shutdown(self):
+        """atexit 处理器 - 优雅关闭"""
+        try:
+            # 避免重复关闭
+            if not getattr(self, '_atexit_called', False):
+                self._atexit_called = True
+                self.celery_manager.stop_worker(timeout=2)
+        except Exception as e:
+            server_logger.debug(f"atexit 关闭时出错: {e}")
 
     def _setup_signal_handlers(self):
-        """设置信号处理器"""
+        """设置信号处理器 - 修复 Windows 上的 InvalidStateError"""
+        import asyncio
+        
         def signal_handler(signum, frame):
-            server_logger.info(f"收到信号 {signum},正在停止服务...")
-            self.celery_manager.stop_worker_immediately()
-            sys.exit(0)
+            server_logger.info(f"收到信号 {signum},正在优雅停止服务...")
+            # 使用线程安全的方式调度关闭,避免直接退出导致 asyncio 错误
+            threading.Thread(target=self._graceful_shutdown, daemon=True).start()
 
         # 通用信号处理
         try:
             signal.signal(signal.SIGINT, signal_handler)  # Ctrl+C
             signal.signal(signal.SIGTERM, signal_handler)  # 终止信号
         except AttributeError:
-            # Windows可能不支持某些信号
             pass
 
         # Windows特有处理
         if sys.platform == 'win32':
             self._setup_windows_signal_handler()
+    
+    def _graceful_shutdown(self):
+        """优雅关闭服务 - 避免 asyncio InvalidStateError"""
+        try:
+            # 1. 先停止 Celery Worker
+            self.celery_manager.stop_worker(timeout=3)
+            
+            # 2. 给 asyncio 事件循环一些时间来清理连接
+            try:
+                loop = asyncio.get_running_loop()
+                # 如果我们在事件循环线程中,使用 call_soon_threadsafe
+                if loop.is_running():
+                    loop.call_soon_threadsafe(loop.stop)
+            except RuntimeError:
+                # 没有运行的事件循环
+                pass
+            
+            # 3. 等待一小段时间让连接关闭
+            time.sleep(0.5)
+            
+            server_logger.info("服务已优雅停止")
+        except Exception as e:
+            server_logger.error(f"优雅关闭时出错: {e}")
+        finally:
+            # 最后强制退出
+            os._exit(0)
 
     def _setup_windows_signal_handler(self):
-        """设置Windows信号处理器"""
+        """设置Windows信号处理器 - 修复关闭时的 asyncio 错误"""
         try:
             import win32api
             def win32_handler(dwCtrlType):
@@ -863,9 +907,12 @@ class ServerRunner:
                 CTRL_SHUTDOWN_EVENT = 6
 
                 if dwCtrlType in (CTRL_C_EVENT, CTRL_BREAK_EVENT, CTRL_CLOSE_EVENT, CTRL_SHUTDOWN_EVENT):
-                    server_logger.info(f"收到Windows控制台事件 {dwCtrlType},正在停止服务...")
-                    self.celery_manager.stop_worker_immediately()
-                    sys.exit(0)
+                    server_logger.info(f"收到Windows控制台事件 {dwCtrlType},正在优雅停止服务...")
+                    # 使用优雅关闭而非立即退出
+                    threading.Thread(target=self._graceful_shutdown, daemon=True).start()
+                    # 给关闭线程一些时间
+                    time.sleep(1)
+                    return True
                 return False
             win32api.SetConsoleCtrlHandler(win32_handler, True)
         except (ImportError, AttributeError) as e:

+ 142 - 0
utils_test/Result_Visual_Observation_Tools/README.md

@@ -0,0 +1,142 @@
+# 施工方案审查结果可视化工具
+
+## 简介
+
+这是用于可视化展示施工方案AI审查结果的前端工具,支持卡片式展示、表格展示、数据筛选和导出功能。
+
+## 文件说明
+
+| 文件 | 说明 |
+|------|------|
+| `index.html` | 基础版本,简洁易用 |
+| `advanced_viewer.html` | 高级版本,支持多文件、数据分析、导出功能 |
+| `README.md` | 使用说明文档 |
+
+## 使用方法
+
+### 方式一:直接打开文件
+
+1. 用浏览器打开 `index.html` 或 `advanced_viewer.html`
+2. 点击"选择JSON文件"按钮,选择审查结果JSON文件
+3. 查看审查结果卡片
+
+### 方式二:从默认目录加载
+
+1. 点击"加载默认目录"或"加载示例数据"按钮
+2. 工具会自动读取 `temp\construction_review\final_result\` 目录中的JSON文件
+
+### 方式三:启动本地服务器(推荐)
+
+```bash
+# 进入工具目录
+cd utils_test/Result_Visual_Observation_Tools
+
+# 使用Python启动简单HTTP服务器
+python -m http.server 8080
+
+# 或使用Node.js的http-server
+npx http-server -p 8080
+```
+
+然后访问 `http://localhost:8080`
+
+## 功能特性
+
+### 基础版本 (index.html)
+
+- 📁 单文件上传查看
+- 🎴 卡片式结果展示
+- 🔍 按风险等级/章节/问题状态筛选
+- 🔎 关键词搜索
+- 📊 统计面板
+
+### 高级版本 (advanced_viewer.html)
+
+基础版本所有功能,外加:
+
+- 📁 多文件同时加载
+- 📈 风险分布图表
+- 📊 章节统计分析
+- 🎴📋 卡片/表格视图切换
+- 📄 CSV/JSON 数据导出
+- 🗂️ 已加载文件管理
+
+## 卡片字段说明
+
+每张审查卡片展示以下信息:
+
+| 字段 | 说明 |
+|------|------|
+| 章节代码 | 检查项所属章节 |
+| 风险等级 | 高/中/低/无风险 |
+| 检查项 | 检查项名称 |
+| 检查项代码 | 检查项唯一标识 |
+| 问题位置 | 问题所在文档位置 |
+| 修改建议 | AI给出的修改建议 |
+| 审查依据 | 审查依据说明 |
+| 问题状态 | 是否存在问题 |
+| 来源文件 | 数据来自哪个JSON文件 |
+
+## 风险等级颜色标识
+
+- 🔴 **高风险** - 红色边框/标签
+- 🟡 **中风险** - 橙色边框/标签
+- 🟢 **低风险** - 绿色边框/标签
+- ⚪ **无风险** - 灰色边框/标签
+
+## JSON数据结构
+
+工具读取的JSON文件结构如下:
+
+```json
+{
+  "ai_review_result": {
+    "review_results": [
+      {
+        "unit_id": {
+          "review_lists": [
+            {
+              "check_item": "检查项名称",
+              "chapter_code": "章节代码",
+              "check_item_code": "检查项代码",
+              "check_result": "检查结果描述",
+              "exist_issue": true/false,
+              "risk_info": {
+                "risk_level": "high/medium/low"
+              },
+              "location": "问题位置",
+              "suggestion": "修改建议",
+              "reason": "审查依据",
+              "review_references": "参考标准",
+              "reference_source": "来源规范"
+            }
+          ]
+        }
+      }
+    ]
+  }
+}
+```
+
+## 注意事项
+
+1. JSON文件可能包含控制字符,工具会自动清理
+2. 建议通过本地服务器方式使用,以支持文件目录访问
+3. 大数据量时表格视图性能更好
+4. 导出功能生成的CSV文件使用UTF-8编码,支持中文
+
+## 浏览器兼容性
+
+- Chrome 80+
+- Firefox 75+
+- Edge 80+
+- Safari 13+
+
+## 更新日志
+
+### v1.0.0 (2026-03-09)
+
+- 初始版本发布
+- 支持基础卡片展示
+- 支持筛选和搜索
+- 支持数据导出

+ 1283 - 0
utils_test/Result_Visual_Observation_Tools/advanced_viewer.html

@@ -0,0 +1,1283 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>施工方案审查结果 - 高级分析工具</title>
+    <style>
+        * {
+            margin: 0;
+            padding: 0;
+            box-sizing: border-box;
+        }
+
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'PingFang SC', 'Hiragino Sans GB', 'Microsoft YaHei', sans-serif;
+            background: #0f172a;
+            min-height: 100vh;
+            color: #e2e8f0;
+        }
+
+        .container {
+            max-width: 1600px;
+            margin: 0 auto;
+            padding: 20px;
+        }
+
+        .header {
+            background: linear-gradient(135deg, #1e293b 0%, #0f172a 100%);
+            border: 1px solid #334155;
+            border-radius: 16px;
+            padding: 30px;
+            margin-bottom: 20px;
+        }
+
+        .header h1 {
+            font-size: 28px;
+            color: #f8fafc;
+            margin-bottom: 8px;
+            display: flex;
+            align-items: center;
+            gap: 12px;
+        }
+
+        .header .subtitle {
+            color: #94a3b8;
+            font-size: 14px;
+        }
+
+        .controls {
+            display: flex;
+            gap: 15px;
+            margin-top: 20px;
+            flex-wrap: wrap;
+            align-items: center;
+        }
+
+        .btn {
+            padding: 12px 24px;
+            border: none;
+            border-radius: 8px;
+            cursor: pointer;
+            font-size: 14px;
+            font-weight: 500;
+            transition: all 0.3s ease;
+            display: inline-flex;
+            align-items: center;
+            gap: 8px;
+        }
+
+        .btn-primary {
+            background: linear-gradient(135deg, #3b82f6 0%, #2563eb 100%);
+            color: white;
+        }
+
+        .btn-primary:hover {
+            transform: translateY(-2px);
+            box-shadow: 0 8px 20px rgba(59, 130, 246, 0.4);
+        }
+
+        .btn-secondary {
+            background: #334155;
+            color: #e2e8f0;
+            border: 1px solid #475569;
+        }
+
+        .btn-secondary:hover {
+            background: #475569;
+        }
+
+        .btn-danger {
+            background: linear-gradient(135deg, #ef4444 0%, #dc2626 100%);
+            color: white;
+        }
+
+        .btn-success {
+            background: linear-gradient(135deg, #10b981 0%, #059669 100%);
+            color: white;
+        }
+
+        input[type="file"] {
+            display: none;
+        }
+
+        /* Dashboard */
+        .dashboard {
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
+            gap: 20px;
+            margin-bottom: 20px;
+        }
+
+        .stat-card {
+            background: linear-gradient(135deg, #1e293b 0%, #0f172a 100%);
+            border: 1px solid #334155;
+            border-radius: 16px;
+            padding: 24px;
+            position: relative;
+            overflow: hidden;
+        }
+
+        .stat-card::before {
+            content: '';
+            position: absolute;
+            top: 0;
+            left: 0;
+            right: 0;
+            height: 4px;
+        }
+
+        .stat-card.total::before { background: #3b82f6; }
+        .stat-card.high::before { background: #ef4444; }
+        .stat-card.medium::before { background: #f59e0b; }
+        .stat-card.low::before { background: #10b981; }
+
+        .stat-icon {
+            font-size: 32px;
+            margin-bottom: 12px;
+        }
+
+        .stat-value {
+            font-size: 36px;
+            font-weight: bold;
+            color: #f8fafc;
+            margin-bottom: 4px;
+        }
+
+        .stat-label {
+            color: #94a3b8;
+            font-size: 14px;
+        }
+
+        .stat-change {
+            font-size: 12px;
+            margin-top: 8px;
+            padding: 4px 8px;
+            border-radius: 4px;
+            display: inline-block;
+        }
+
+        .stat-change.positive {
+            background: rgba(16, 185, 129, 0.2);
+            color: #10b981;
+        }
+
+        .stat-change.negative {
+            background: rgba(239, 68, 68, 0.2);
+            color: #ef4444;
+        }
+
+        /* Charts Section */
+        .charts-section {
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(400px, 1fr));
+            gap: 20px;
+            margin-bottom: 20px;
+        }
+
+        .chart-card {
+            background: linear-gradient(135deg, #1e293b 0%, #0f172a 100%);
+            border: 1px solid #334155;
+            border-radius: 16px;
+            padding: 24px;
+        }
+
+        .chart-card h3 {
+            font-size: 16px;
+            color: #f8fafc;
+            margin-bottom: 20px;
+            display: flex;
+            align-items: center;
+            gap: 8px;
+        }
+
+        /* Progress bars */
+        .progress-list {
+            display: flex;
+            flex-direction: column;
+            gap: 16px;
+        }
+
+        .progress-item {
+            display: flex;
+            flex-direction: column;
+            gap: 8px;
+        }
+
+        .progress-header {
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+        }
+
+        .progress-label {
+            font-size: 14px;
+            color: #e2e8f0;
+        }
+
+        .progress-value {
+            font-size: 14px;
+            color: #94a3b8;
+        }
+
+        .progress-bar {
+            height: 8px;
+            background: #334155;
+            border-radius: 4px;
+            overflow: hidden;
+        }
+
+        .progress-fill {
+            height: 100%;
+            border-radius: 4px;
+            transition: width 0.5s ease;
+        }
+
+        .progress-fill.high { background: linear-gradient(90deg, #ef4444, #f87171); }
+        .progress-fill.medium { background: linear-gradient(90deg, #f59e0b, #fbbf24); }
+        .progress-fill.low { background: linear-gradient(90deg, #10b981, #34d399); }
+        .progress-fill.total { background: linear-gradient(90deg, #3b82f6, #60a5fa); }
+
+        /* Filters */
+        .filters {
+            background: linear-gradient(135deg, #1e293b 0%, #0f172a 100%);
+            border: 1px solid #334155;
+            border-radius: 16px;
+            padding: 20px;
+            margin-bottom: 20px;
+            display: flex;
+            gap: 20px;
+            flex-wrap: wrap;
+            align-items: center;
+        }
+
+        .filter-group {
+            display: flex;
+            flex-direction: column;
+            gap: 8px;
+        }
+
+        .filter-group label {
+            font-size: 12px;
+            color: #94a3b8;
+            text-transform: uppercase;
+            letter-spacing: 0.5px;
+            font-weight: 600;
+        }
+
+        .filter-group select,
+        .filter-group input {
+            padding: 10px 16px;
+            background: #0f172a;
+            border: 1px solid #334155;
+            border-radius: 8px;
+            font-size: 14px;
+            color: #e2e8f0;
+            min-width: 150px;
+        }
+
+        .filter-group select:focus,
+        .filter-group input:focus {
+            outline: none;
+            border-color: #3b82f6;
+        }
+
+        /* View Toggle */
+        .view-toggle {
+            display: flex;
+            gap: 10px;
+            margin-bottom: 20px;
+        }
+
+        .view-btn {
+            padding: 10px 20px;
+            background: #1e293b;
+            border: 1px solid #334155;
+            border-radius: 8px;
+            color: #94a3b8;
+            cursor: pointer;
+            transition: all 0.3s;
+        }
+
+        .view-btn.active {
+            background: #3b82f6;
+            color: white;
+            border-color: #3b82f6;
+        }
+
+        /* Cards View */
+        .cards-container {
+            display: grid;
+            grid-template-columns: repeat(auto-fill, minmax(450px, 1fr));
+            gap: 20px;
+        }
+
+        .review-card {
+            background: linear-gradient(135deg, #1e293b 0%, #0f172a 100%);
+            border: 1px solid #334155;
+            border-radius: 16px;
+            overflow: hidden;
+            transition: all 0.3s ease;
+            position: relative;
+        }
+
+        .review-card::before {
+            content: '';
+            position: absolute;
+            top: 0;
+            left: 0;
+            right: 0;
+            height: 4px;
+        }
+
+        .review-card.high::before { background: #ef4444; }
+        .review-card.medium::before { background: #f59e0b; }
+        .review-card.low::before { background: #10b981; }
+        .review-card.none::before { background: #64748b; }
+
+        .review-card:hover {
+            transform: translateY(-4px);
+            border-color: #475569;
+            box-shadow: 0 20px 40px rgba(0,0,0,0.3);
+        }
+
+        .card-header {
+            padding: 20px;
+            border-bottom: 1px solid #334155;
+        }
+
+        .card-header-top {
+            display: flex;
+            justify-content: space-between;
+            align-items: flex-start;
+            margin-bottom: 12px;
+        }
+
+        .chapter-tag {
+            padding: 6px 12px;
+            background: rgba(59, 130, 246, 0.2);
+            color: #60a5fa;
+            border-radius: 6px;
+            font-size: 12px;
+            font-weight: 600;
+        }
+
+        .risk-tag {
+            padding: 6px 12px;
+            border-radius: 6px;
+            font-size: 12px;
+            font-weight: 600;
+        }
+
+        .risk-tag.high {
+            background: rgba(239, 68, 68, 0.2);
+            color: #f87171;
+        }
+
+        .risk-tag.medium {
+            background: rgba(245, 158, 11, 0.2);
+            color: #fbbf24;
+        }
+
+        .risk-tag.low {
+            background: rgba(16, 185, 129, 0.2);
+            color: #34d399;
+        }
+
+        .risk-tag.none {
+            background: rgba(100, 116, 139, 0.2);
+            color: #94a3b8;
+        }
+
+        .check-item-title {
+            font-size: 16px;
+            font-weight: 600;
+            color: #f8fafc;
+        }
+
+        .check-item-code {
+            font-size: 12px;
+            color: #64748b;
+            font-family: monospace;
+            margin-top: 4px;
+        }
+
+        .card-body {
+            padding: 20px;
+        }
+
+        .info-section {
+            margin-bottom: 16px;
+        }
+
+        .info-section:last-child {
+            margin-bottom: 0;
+        }
+
+        .info-label {
+            font-size: 11px;
+            color: #64748b;
+            text-transform: uppercase;
+            letter-spacing: 0.5px;
+            margin-bottom: 6px;
+            display: flex;
+            align-items: center;
+            gap: 6px;
+        }
+
+        .info-content {
+            font-size: 14px;
+            color: #e2e8f0;
+            line-height: 1.6;
+            padding: 12px;
+            background: #0f172a;
+            border-radius: 8px;
+            border-left: 3px solid #334155;
+        }
+
+        .info-content.location { border-left-color: #3b82f6; }
+        .info-content.suggestion { border-left-color: #8b5cf6; }
+        .info-content.reason { border-left-color: #ec4899; }
+        .info-content.issue { border-left-color: #ef4444; }
+
+        .card-footer {
+            padding: 16px 20px;
+            background: rgba(15, 23, 42, 0.5);
+            border-top: 1px solid #334155;
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+        }
+
+        .status-badge {
+            display: flex;
+            align-items: center;
+            gap: 8px;
+            padding: 6px 12px;
+            border-radius: 6px;
+            font-size: 13px;
+            font-weight: 500;
+        }
+
+        .status-badge.has-issue {
+            background: rgba(239, 68, 68, 0.2);
+            color: #f87171;
+        }
+
+        .status-badge.no-issue {
+            background: rgba(16, 185, 129, 0.2);
+            color: #34d399;
+        }
+
+        .status-dot {
+            width: 8px;
+            height: 8px;
+            border-radius: 50%;
+        }
+
+        .status-dot.has-issue {
+            background: #ef4444;
+            animation: pulse 2s infinite;
+        }
+
+        .status-dot.no-issue {
+            background: #10b981;
+        }
+
+        @keyframes pulse {
+            0%, 100% { opacity: 1; }
+            50% { opacity: 0.5; }
+        }
+
+        /* Table View */
+        .table-container {
+            background: linear-gradient(135deg, #1e293b 0%, #0f172a 100%);
+            border: 1px solid #334155;
+            border-radius: 16px;
+            overflow: hidden;
+            display: none;
+        }
+
+        .table-container.show {
+            display: block;
+        }
+
+        table {
+            width: 100%;
+            border-collapse: collapse;
+        }
+
+        th, td {
+            padding: 16px;
+            text-align: left;
+            border-bottom: 1px solid #334155;
+        }
+
+        th {
+            background: #0f172a;
+            color: #94a3b8;
+            font-size: 12px;
+            text-transform: uppercase;
+            letter-spacing: 0.5px;
+            font-weight: 600;
+        }
+
+        td {
+            color: #e2e8f0;
+            font-size: 14px;
+        }
+
+        tr:hover td {
+            background: rgba(59, 130, 246, 0.1);
+        }
+
+        .table-risk {
+            padding: 4px 10px;
+            border-radius: 4px;
+            font-size: 12px;
+            font-weight: 600;
+        }
+
+        .table-risk.high {
+            background: rgba(239, 68, 68, 0.2);
+            color: #f87171;
+        }
+
+        .table-risk.medium {
+            background: rgba(245, 158, 11, 0.2);
+            color: #fbbf24;
+        }
+
+        .table-risk.low {
+            background: rgba(16, 185, 129, 0.2);
+            color: #34d399;
+        }
+
+        .table-risk.none {
+            background: rgba(100, 116, 139, 0.2);
+            color: #94a3b8;
+        }
+
+        /* Empty State */
+        .empty-state {
+            text-align: center;
+            padding: 100px 20px;
+        }
+
+        .empty-state-icon {
+            font-size: 80px;
+            margin-bottom: 24px;
+            opacity: 0.5;
+        }
+
+        .empty-state h2 {
+            font-size: 24px;
+            color: #f8fafc;
+            margin-bottom: 8px;
+        }
+
+        .empty-state p {
+            color: #64748b;
+        }
+
+        /* Toast */
+        .toast {
+            position: fixed;
+            bottom: 30px;
+            right: 30px;
+            background: #1e293b;
+            border: 1px solid #334155;
+            padding: 16px 24px;
+            border-radius: 12px;
+            display: flex;
+            align-items: center;
+            gap: 12px;
+            transform: translateX(400px);
+            transition: transform 0.3s ease;
+            z-index: 1000;
+            box-shadow: 0 10px 40px rgba(0,0,0,0.3);
+        }
+
+        .toast.show {
+            transform: translateX(0);
+        }
+
+        .toast.success { border-left: 4px solid #10b981; }
+        .toast.error { border-left: 4px solid #ef4444; }
+
+        /* Export Panel */
+        .export-panel {
+            background: linear-gradient(135deg, #1e293b 0%, #0f172a 100%);
+            border: 1px solid #334155;
+            border-radius: 16px;
+            padding: 20px;
+            margin-bottom: 20px;
+            display: none;
+        }
+
+        .export-panel.show {
+            display: block;
+        }
+
+        .export-options {
+            display: flex;
+            gap: 15px;
+            margin-top: 15px;
+            flex-wrap: wrap;
+        }
+
+        /* File List */
+        .file-list {
+            display: flex;
+            flex-wrap: wrap;
+            gap: 10px;
+            margin-top: 15px;
+        }
+
+        .file-tag {
+            padding: 8px 16px;
+            background: rgba(59, 130, 246, 0.2);
+            border: 1px solid #3b82f6;
+            border-radius: 8px;
+            font-size: 13px;
+            color: #60a5fa;
+            display: flex;
+            align-items: center;
+            gap: 8px;
+        }
+
+        .file-tag .remove {
+            cursor: pointer;
+            opacity: 0.7;
+        }
+
+        .file-tag .remove:hover {
+            opacity: 1;
+        }
+
+        @media (max-width: 768px) {
+            .cards-container {
+                grid-template-columns: 1fr;
+            }
+            
+            .charts-section {
+                grid-template-columns: 1fr;
+            }
+            
+            .filters {
+                flex-direction: column;
+                align-items: stretch;
+            }
+            
+            th, td {
+                padding: 12px;
+                font-size: 13px;
+            }
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <div class="header">
+            <h1>🔍 施工方案审查结果 - 高级分析工具</h1>
+            <p class="subtitle">支持多文件对比、风险分析和数据导出</p>
+            
+            <div class="controls">
+                <label class="btn btn-primary">
+                    📁 选择JSON文件
+                    <input type="file" id="fileInput" accept=".json" multiple>
+                </label>
+                <button class="btn btn-secondary" onclick="loadFromDefault()">
+                    📂 加载示例数据
+                </button>
+                <button class="btn btn-success" onclick="toggleExport()">
+                    📊 导出报告
+                </button>
+                <button class="btn btn-danger" onclick="clearAll()">
+                    🗑️ 清空数据
+                </button>
+            </div>
+
+            <div class="file-list" id="fileList"></div>
+        </div>
+
+        <div class="export-panel" id="exportPanel">
+            <h3>导出选项</h3>
+            <div class="export-options">
+                <button class="btn btn-primary" onclick="exportToExcel()">📄 导出Excel</button>
+                <button class="btn btn-primary" onclick="exportToPDF()">📑 导出PDF</button>
+                <button class="btn btn-secondary" onclick="exportToJSON()">📋 导出JSON</button>
+            </div>
+        </div>
+
+        <div class="dashboard" id="dashboard" style="display: none;">
+            <div class="stat-card total">
+                <div class="stat-icon">📊</div>
+                <div class="stat-value" id="totalCount">0</div>
+                <div class="stat-label">总检查项</div>
+            </div>
+            <div class="stat-card high">
+                <div class="stat-icon">🔴</div>
+                <div class="stat-value" id="highRiskCount">0</div>
+                <div class="stat-label">高风险</div>
+            </div>
+            <div class="stat-card medium">
+                <div class="stat-icon">🟡</div>
+                <div class="stat-value" id="mediumRiskCount">0</div>
+                <div class="stat-label">中风险</div>
+            </div>
+            <div class="stat-card low">
+                <div class="stat-icon">🟢</div>
+                <div class="stat-value" id="lowRiskCount">0</div>
+                <div class="stat-label">低风险</div>
+            </div>
+        </div>
+
+        <div class="charts-section" id="chartsSection" style="display: none;">
+            <div class="chart-card">
+                <h3>📊 风险分布</h3>
+                <div class="progress-list" id="riskDistribution"></div>
+            </div>
+            <div class="chart-card">
+                <h3>📈 章节统计</h3>
+                <div class="progress-list" id="chapterDistribution"></div>
+            </div>
+        </div>
+
+        <div class="filters" id="filters" style="display: none;">
+            <div class="filter-group">
+                <label>风险等级</label>
+                <select id="riskFilter" onchange="applyFilters()">
+                    <option value="all">全部</option>
+                    <option value="high">高风险</option>
+                    <option value="medium">中风险</option>
+                    <option value="low">低风险</option>
+                    <option value="none">无风险</option>
+                </select>
+            </div>
+            <div class="filter-group">
+                <label>章节</label>
+                <select id="chapterFilter" onchange="applyFilters()">
+                    <option value="all">全部</option>
+                </select>
+            </div>
+            <div class="filter-group">
+                <label>问题状态</label>
+                <select id="issueFilter" onchange="applyFilters()">
+                    <option value="all">全部</option>
+                    <option value="has-issue">存在问题</option>
+                    <option value="no-issue">无问题</option>
+                </select>
+            </div>
+            <div class="filter-group">
+                <label>搜索</label>
+                <input type="text" id="searchInput" placeholder="关键词..." oninput="applyFilters()">
+            </div>
+        </div>
+
+        <div class="view-toggle" id="viewToggle" style="display: none;">
+            <button class="view-btn active" onclick="switchView('cards')">🎴 卡片视图</button>
+            <button class="view-btn" onclick="switchView('table')">📋 表格视图</button>
+        </div>
+
+        <div id="content">
+            <div class="empty-state">
+                <div class="empty-state-icon">📂</div>
+                <h2>请加载审查结果文件</h2>
+                <p>支持多文件同时加载进行对比分析</p>
+            </div>
+        </div>
+    </div>
+
+    <div class="toast" id="toast">
+        <span class="toast-message">操作成功</span>
+    </div>
+
+    <script>
+        let allReviewItems = [];
+        let filteredItems = [];
+        let loadedFiles = [];
+        let currentView = 'cards';
+
+        document.getElementById('fileInput').addEventListener('change', handleFileSelect);
+
+        function handleFileSelect(event) {
+            const files = Array.from(event.target.files);
+            files.forEach(file => {
+                const reader = new FileReader();
+                reader.onload = function(e) {
+                    try {
+                        let content = e.target.result;
+                        content = content.replace(/[\x00-\x08\x0b-\x0c\x0e-\x1f]/g, '');
+                        const data = JSON.parse(content);
+                        processData(data, file.name);
+                        addFileToList(file.name);
+                        showToast(`已加载: ${file.name}`, 'success');
+                    } catch (err) {
+                        showToast(`${file.name} 解析失败: ${err.message}`, 'error');
+                    }
+                };
+                reader.readAsText(file);
+            });
+        }
+
+        function addFileToList(filename) {
+            if (!loadedFiles.includes(filename)) {
+                loadedFiles.push(filename);
+                renderFileList();
+            }
+        }
+
+        function renderFileList() {
+            const container = document.getElementById('fileList');
+            container.innerHTML = loadedFiles.map(file => `
+                <div class="file-tag">
+                    ${file}
+                    <span class="remove" onclick="removeFile('${file}')">✕</span>
+                </div>
+            `).join('');
+        }
+
+        function removeFile(filename) {
+            loadedFiles = loadedFiles.filter(f => f !== filename);
+            renderFileList();
+        }
+
+        function loadFromDefault() {
+            const sampleFiles = [
+                'f926d2ad4428bfcbe12be8702e2c32ce-1773041592.json'
+            ];
+            
+            sampleFiles.forEach(filename => {
+                fetch(`../../temp/construction_review/final_result/${filename}`)
+                    .then(response => {
+                        if (!response.ok) throw new Error('文件加载失败');
+                        return response.text();
+                    })
+                    .then(text => {
+                        text = text.replace(/[\x00-\x08\x0b-\x0c\x0e-\x1f]/g, '');
+                        const data = JSON.parse(text);
+                        processData(data, filename);
+                        addFileToList(filename);
+                        showToast(`已加载: ${filename}`, 'success');
+                    })
+                    .catch(err => {
+                        showToast('加载失败: ' + err.message, 'error');
+                    });
+            });
+        }
+
+        function processData(data, filename) {
+            const aiReviewResult = data.ai_review_result || {};
+            const reviewResults = aiReviewResult.review_results || [];
+            
+            reviewResults.forEach(result => {
+                Object.values(result).forEach(unit => {
+                    if (unit.review_lists) {
+                        unit.review_lists.forEach(item => {
+                            allReviewItems.push({
+                                ...item,
+                                sourceFile: filename
+                            });
+                        });
+                    }
+                });
+            });
+
+            updateUI();
+        }
+
+        function updateUI() {
+            populateChapterFilter();
+            updateStats();
+            updateCharts();
+            applyFilters();
+            
+            document.getElementById('dashboard').style.display = 'grid';
+            document.getElementById('chartsSection').style.display = 'grid';
+            document.getElementById('filters').style.display = 'flex';
+            document.getElementById('viewToggle').style.display = 'flex';
+        }
+
+        function populateChapterFilter() {
+            const chapters = [...new Set(allReviewItems.map(item => item.chapter_code))];
+            const select = document.getElementById('chapterFilter');
+            select.innerHTML = '<option value="all">全部章节</option>';
+            chapters.forEach(chapter => {
+                if (chapter) {
+                    const option = document.createElement('option');
+                    option.value = chapter;
+                    option.textContent = chapter;
+                    select.appendChild(option);
+                }
+            });
+        }
+
+        function updateStats() {
+            let high = 0, medium = 0, low = 0, none = 0;
+            
+            allReviewItems.forEach(item => {
+                const riskLevel = getRiskLevel(item);
+                switch(riskLevel) {
+                    case 'high': high++; break;
+                    case 'medium': medium++; break;
+                    case 'low': low++; break;
+                    default: none++; break;
+                }
+            });
+
+            document.getElementById('totalCount').textContent = allReviewItems.length;
+            document.getElementById('highRiskCount').textContent = high;
+            document.getElementById('mediumRiskCount').textContent = medium;
+            document.getElementById('lowRiskCount').textContent = low;
+        }
+
+        function updateCharts() {
+            // Risk Distribution
+            const riskCounts = { high: 0, medium: 0, low: 0, none: 0 };
+            allReviewItems.forEach(item => {
+                riskCounts[getRiskLevel(item)]++;
+            });
+
+            const total = allReviewItems.length || 1;
+            const riskLabels = {
+                high: '高风险',
+                medium: '中风险',
+                low: '低风险',
+                none: '无风险'
+            };
+
+            document.getElementById('riskDistribution').innerHTML = 
+                Object.entries(riskCounts).map(([key, count]) => `
+                    <div class="progress-item">
+                        <div class="progress-header">
+                            <span class="progress-label">${riskLabels[key]}</span>
+                            <span class="progress-value">${count} (${Math.round(count/total*100)}%)</span>
+                        </div>
+                        <div class="progress-bar">
+                            <div class="progress-fill ${key}" style="width: ${count/total*100}%"></div>
+                        </div>
+                    </div>
+                `).join('');
+
+            // Chapter Distribution
+            const chapterCounts = {};
+            allReviewItems.forEach(item => {
+                const chapter = item.chapter_code || '未知';
+                chapterCounts[chapter] = (chapterCounts[chapter] || 0) + 1;
+            });
+
+            const sortedChapters = Object.entries(chapterCounts)
+                .sort((a, b) => b[1] - a[1])
+                .slice(0, 5);
+
+            document.getElementById('chapterDistribution').innerHTML = 
+                sortedChapters.map(([chapter, count]) => `
+                    <div class="progress-item">
+                        <div class="progress-header">
+                            <span class="progress-label">${chapter}</span>
+                            <span class="progress-value">${count}</span>
+                        </div>
+                        <div class="progress-bar">
+                            <div class="progress-fill total" style="width: ${count/total*100}%"></div>
+                        </div>
+                    </div>
+                `).join('');
+        }
+
+        function getRiskLevel(item) {
+            const riskInfo = item.risk_info || {};
+            const riskLevel = (riskInfo.risk_level || '').toLowerCase();
+            
+            if (riskLevel.includes('高') || riskLevel.includes('high')) return 'high';
+            if (riskLevel.includes('中') || riskLevel.includes('medium')) return 'medium';
+            if (riskLevel.includes('低') || riskLevel.includes('low')) return 'low';
+            return 'none';
+        }
+
+        function applyFilters() {
+            const riskFilter = document.getElementById('riskFilter').value;
+            const chapterFilter = document.getElementById('chapterFilter').value;
+            const issueFilter = document.getElementById('issueFilter').value;
+            const searchInput = document.getElementById('searchInput').value.toLowerCase();
+
+            filteredItems = allReviewItems.filter(item => {
+                if (riskFilter !== 'all' && getRiskLevel(item) !== riskFilter) {
+                    return false;
+                }
+
+                if (chapterFilter !== 'all' && item.chapter_code !== chapterFilter) {
+                    return false;
+                }
+
+                if (issueFilter !== 'all') {
+                    const hasIssue = item.exist_issue === true;
+                    if (issueFilter === 'has-issue' && !hasIssue) return false;
+                    if (issueFilter === 'no-issue' && hasIssue) return false;
+                }
+
+                if (searchInput) {
+                    const searchText = [
+                        item.check_item,
+                        item.check_item_code,
+                        item.chapter_code,
+                        item.check_result,
+                        item.location,
+                        item.suggestion,
+                        item.reason
+                    ].join(' ').toLowerCase();
+                    if (!searchText.includes(searchInput)) return false;
+                }
+
+                return true;
+            });
+
+            renderContent();
+        }
+
+        function renderContent() {
+            if (currentView === 'cards') {
+                renderCards();
+            } else {
+                renderTable();
+            }
+        }
+
+        function switchView(view) {
+            currentView = view;
+            document.querySelectorAll('.view-btn').forEach(btn => btn.classList.remove('active'));
+            event.target.classList.add('active');
+            renderContent();
+        }
+
+        function renderCards() {
+            const content = document.getElementById('content');
+            
+            if (filteredItems.length === 0) {
+                content.innerHTML = `
+                    <div class="empty-state">
+                        <div class="empty-state-icon">🔍</div>
+                        <h2>没有找到匹配的结果</h2>
+                        <p>请尝试调整筛选条件</p>
+                    </div>
+                `;
+                return;
+            }
+
+            const cardsHtml = filteredItems.map(item => {
+                const riskLevel = getRiskLevel(item);
+                const hasIssue = item.exist_issue === true;
+                
+                return `
+                    <div class="review-card ${riskLevel}">
+                        <div class="card-header">
+                            <div class="card-header-top">
+                                <span class="chapter-tag">${escapeHtml(item.chapter_code || '未知章节')}</span>
+                                <span class="risk-tag ${riskLevel}">${getRiskLabel(riskLevel)}</span>
+                            </div>
+                            <div class="check-item-title">${escapeHtml(item.check_item || '未命名检查项')}</div>
+                            <div class="check-item-code">${escapeHtml(item.check_item_code || '')}</div>
+                        </div>
+                        <div class="card-body">
+                            <div class="info-section">
+                                <div class="info-label">📍 问题位置</div>
+                                <div class="info-content location">${escapeHtml(item.location || item.check_result || '未指定')}</div>
+                            </div>
+                            
+                            ${item.suggestion ? `
+                            <div class="info-section">
+                                <div class="info-label">💡 修改建议</div>
+                                <div class="info-content suggestion">${escapeHtml(item.suggestion)}</div>
+                            </div>
+                            ` : ''}
+                            
+                            ${item.reason ? `
+                            <div class="info-section">
+                                <div class="info-label">📝 审查依据</div>
+                                <div class="info-content reason">${escapeHtml(item.reason)}</div>
+                            </div>
+                            ` : ''}
+                        </div>
+                        <div class="card-footer">
+                            <div class="status-badge ${hasIssue ? 'has-issue' : 'no-issue'}">
+                                <span class="status-dot ${hasIssue ? 'has-issue' : 'no-issue'}"></span>
+                                <span>${hasIssue ? '存在问题' : '无问题'}</span>
+                            </div>
+                            <span style="color: #64748b; font-size: 12px;">${item.sourceFile || ''}</span>
+                        </div>
+                    </div>
+                `;
+            }).join('');
+
+            content.innerHTML = `<div class="cards-container">${cardsHtml}</div>`;
+        }
+
+        function renderTable() {
+            const content = document.getElementById('content');
+            
+            if (filteredItems.length === 0) {
+                content.innerHTML = `
+                    <div class="empty-state">
+                        <div class="empty-state-icon">🔍</div>
+                        <h2>没有找到匹配的结果</h2>
+                        <p>请尝试调整筛选条件</p>
+                    </div>
+                `;
+                return;
+            }
+
+            const rows = filteredItems.map(item => {
+                const riskLevel = getRiskLevel(item);
+                const hasIssue = item.exist_issue === true;
+                
+                return `
+                    <tr>
+                        <td><span class="chapter-tag">${escapeHtml(item.chapter_code || '-')}</span></td>
+                        <td>${escapeHtml(item.check_item || '-')}</td>
+                        <td><span class="table-risk ${riskLevel}">${getRiskLabel(riskLevel)}</span></td>
+                        <td>${escapeHtml((item.location || item.check_result || '-').substring(0, 50))}...</td>
+                        <td>
+                            <span class="status-badge ${hasIssue ? 'has-issue' : 'no-issue'}">
+                                <span class="status-dot ${hasIssue ? 'has-issue' : 'no-issue'}"></span>
+                                ${hasIssue ? '存在问题' : '无问题'}
+                            </span>
+                        </td>
+                        <td style="color: #64748b; font-size: 12px;">${item.sourceFile || '-'}</td>
+                    </tr>
+                `;
+            }).join('');
+
+            content.innerHTML = `
+                <div class="table-container show">
+                    <table>
+                        <thead>
+                            <tr>
+                                <th>章节</th>
+                                <th>检查项</th>
+                                <th>风险等级</th>
+                                <th>问题描述</th>
+                                <th>状态</th>
+                                <th>来源文件</th>
+                            </tr>
+                        </thead>
+                        <tbody>${rows}</tbody>
+                    </table>
+                </div>
+            `;
+        }
+
+        function getRiskLabel(riskLevel) {
+            const labels = {
+                high: '高风险',
+                medium: '中风险',
+                low: '低风险',
+                none: '无风险'
+            };
+            return labels[riskLevel] || '未知';
+        }
+
+        function escapeHtml(text) {
+            if (!text) return '';
+            const div = document.createElement('div');
+            div.textContent = text;
+            return div.innerHTML;
+        }
+
+        function toggleExport() {
+            const panel = document.getElementById('exportPanel');
+            panel.classList.toggle('show');
+        }
+
+        function exportToExcel() {
+            if (filteredItems.length === 0) {
+                showToast('没有可导出的数据', 'error');
+                return;
+            }
+
+            let csv = '\uFEFF章节,检查项,检查项代码,风险等级,问题位置,问题描述,修改建议,审查依据,是否存在问题,来源文件\n';
+            
+            filteredItems.forEach(item => {
+                const row = [
+                    item.chapter_code || '',
+                    item.check_item || '',
+                    item.check_item_code || '',
+                    getRiskLabel(getRiskLevel(item)),
+                    (item.location || '').replace(/,/g, ','),
+                    (item.check_result || '').replace(/,/g, ','),
+                    (item.suggestion || '').replace(/,/g, ','),
+                    (item.reason || '').replace(/,/g, ','),
+                    item.exist_issue ? '是' : '否',
+                    item.sourceFile || ''
+                ];
+                csv += row.join(',') + '\n';
+            });
+
+            const blob = new Blob([csv], { type: 'text/csv;charset=utf-8;' });
+            const link = document.createElement('a');
+            link.href = URL.createObjectURL(blob);
+            link.download = `审查结果_${new Date().toISOString().slice(0,10)}.csv`;
+            link.click();
+            
+            showToast('Excel导出成功', 'success');
+        }
+
+        function exportToPDF() {
+            showToast('PDF导出功能开发中...', 'success');
+        }
+
+        function exportToJSON() {
+            if (filteredItems.length === 0) {
+                showToast('没有可导出的数据', 'error');
+                return;
+            }
+
+            const data = {
+                exportTime: new Date().toISOString(),
+                totalCount: filteredItems.length,
+                items: filteredItems
+            };
+
+            const blob = new Blob([JSON.stringify(data, null, 2)], { type: 'application/json' });
+            const link = document.createElement('a');
+            link.href = URL.createObjectURL(blob);
+            link.download = `审查结果_${new Date().toISOString().slice(0,10)}.json`;
+            link.click();
+            
+            showToast('JSON导出成功', 'success');
+        }
+
+        function clearAll() {
+            allReviewItems = [];
+            filteredItems = [];
+            loadedFiles = [];
+            renderFileList();
+            
+            document.getElementById('dashboard').style.display = 'none';
+            document.getElementById('chartsSection').style.display = 'none';
+            document.getElementById('filters').style.display = 'none';
+            document.getElementById('viewToggle').style.display = 'none';
+            document.getElementById('exportPanel').classList.remove('show');
+            
+            document.getElementById('content').innerHTML = `
+                <div class="empty-state">
+                    <div class="empty-state-icon">📂</div>
+                    <h2>数据已清空</h2>
+                    <p>请加载新的审查结果文件</p>
+                </div>
+            `;
+            
+            showToast('数据已清空', 'success');
+        }
+
+        function showToast(message, type) {
+            const toast = document.getElementById('toast');
+            toast.className = `toast ${type} show`;
+            toast.querySelector('.toast-message').textContent = message;
+            
+            setTimeout(() => {
+                toast.classList.remove('show');
+            }, 3000);
+        }
+    </script>
+</body>
+</html>

+ 917 - 0
utils_test/Result_Visual_Observation_Tools/index.html

@@ -0,0 +1,917 @@
+<!DOCTYPE html>
+<html lang="zh-CN">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>施工方案审查结果可视化工具</title>
+    <style>
+        * {
+            margin: 0;
+            padding: 0;
+            box-sizing: border-box;
+        }
+
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'PingFang SC', 'Hiragino Sans GB', 'Microsoft YaHei', sans-serif;
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            min-height: 100vh;
+            padding: 20px;
+        }
+
+        .container {
+            max-width: 1400px;
+            margin: 0 auto;
+        }
+
+        .header {
+            background: white;
+            border-radius: 16px;
+            padding: 30px;
+            margin-bottom: 20px;
+            box-shadow: 0 10px 40px rgba(0,0,0,0.1);
+        }
+
+        .header h1 {
+            font-size: 28px;
+            color: #1a1a2e;
+            margin-bottom: 10px;
+            display: flex;
+            align-items: center;
+            gap: 12px;
+        }
+
+        .header h1::before {
+            content: '📋';
+            font-size: 32px;
+        }
+
+        .header .subtitle {
+            color: #666;
+            font-size: 14px;
+        }
+
+        .file-selector {
+            margin-top: 20px;
+            display: flex;
+            gap: 15px;
+            flex-wrap: wrap;
+            align-items: center;
+        }
+
+        .file-selector input[type="file"] {
+            display: none;
+        }
+
+        .btn {
+            padding: 12px 24px;
+            border: none;
+            border-radius: 8px;
+            cursor: pointer;
+            font-size: 14px;
+            font-weight: 500;
+            transition: all 0.3s ease;
+            display: inline-flex;
+            align-items: center;
+            gap: 8px;
+        }
+
+        .btn-primary {
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            color: white;
+        }
+
+        .btn-primary:hover {
+            transform: translateY(-2px);
+            box-shadow: 0 8px 20px rgba(102, 126, 234, 0.4);
+        }
+
+        .btn-secondary {
+            background: #f0f0f0;
+            color: #333;
+        }
+
+        .btn-secondary:hover {
+            background: #e0e0e0;
+        }
+
+        .file-path {
+            color: #666;
+            font-size: 13px;
+            padding: 8px 16px;
+            background: #f8f9fa;
+            border-radius: 6px;
+            font-family: monospace;
+        }
+
+        .stats-bar {
+            display: flex;
+            gap: 20px;
+            margin-top: 20px;
+            flex-wrap: wrap;
+        }
+
+        .stat-card {
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            color: white;
+            padding: 20px 30px;
+            border-radius: 12px;
+            min-width: 150px;
+            text-align: center;
+        }
+
+        .stat-card.warning {
+            background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%);
+        }
+
+        .stat-card.success {
+            background: linear-gradient(135deg, #4facfe 0%, #00f2fe 100%);
+        }
+
+        .stat-card.info {
+            background: linear-gradient(135deg, #43e97b 0%, #38f9d7 100%);
+        }
+
+        .stat-value {
+            font-size: 32px;
+            font-weight: bold;
+            margin-bottom: 5px;
+        }
+
+        .stat-label {
+            font-size: 13px;
+            opacity: 0.9;
+        }
+
+        .filters {
+            background: white;
+            border-radius: 16px;
+            padding: 20px;
+            margin-bottom: 20px;
+            box-shadow: 0 10px 40px rgba(0,0,0,0.1);
+            display: flex;
+            gap: 20px;
+            flex-wrap: wrap;
+            align-items: center;
+        }
+
+        .filter-group {
+            display: flex;
+            flex-direction: column;
+            gap: 8px;
+        }
+
+        .filter-group label {
+            font-size: 13px;
+            color: #666;
+            font-weight: 500;
+        }
+
+        .filter-group select,
+        .filter-group input {
+            padding: 10px 16px;
+            border: 2px solid #e0e0e0;
+            border-radius: 8px;
+            font-size: 14px;
+            min-width: 150px;
+            transition: border-color 0.3s;
+        }
+
+        .filter-group select:focus,
+        .filter-group input:focus {
+            outline: none;
+            border-color: #667eea;
+        }
+
+        .cards-container {
+            display: grid;
+            grid-template-columns: repeat(auto-fill, minmax(450px, 1fr));
+            gap: 20px;
+        }
+
+        .review-card {
+            background: white;
+            border-radius: 16px;
+            overflow: hidden;
+            box-shadow: 0 10px 40px rgba(0,0,0,0.1);
+            transition: all 0.3s ease;
+            border-left: 5px solid #ddd;
+        }
+
+        .review-card:hover {
+            transform: translateY(-5px);
+            box-shadow: 0 20px 60px rgba(0,0,0,0.15);
+        }
+
+        .review-card.high-risk {
+            border-left-color: #e74c3c;
+        }
+
+        .review-card.medium-risk {
+            border-left-color: #f39c12;
+        }
+
+        .review-card.low-risk {
+            border-left-color: #27ae60;
+        }
+
+        .review-card.no-risk {
+            border-left-color: #95a5a6;
+        }
+
+        .card-header {
+            padding: 20px;
+            background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%);
+            border-bottom: 1px solid #e0e0e0;
+        }
+
+        .card-header-top {
+            display: flex;
+            justify-content: space-between;
+            align-items: flex-start;
+            margin-bottom: 10px;
+            flex-wrap: wrap;
+            gap: 10px;
+        }
+
+        .chapter-code {
+            display: inline-block;
+            padding: 6px 14px;
+            background: #667eea;
+            color: white;
+            border-radius: 20px;
+            font-size: 12px;
+            font-weight: 600;
+            text-transform: uppercase;
+            letter-spacing: 0.5px;
+        }
+
+        .risk-badge {
+            padding: 6px 14px;
+            border-radius: 20px;
+            font-size: 12px;
+            font-weight: 600;
+            text-transform: uppercase;
+        }
+
+        .risk-badge.high {
+            background: #ffe5e5;
+            color: #c0392b;
+        }
+
+        .risk-badge.medium {
+            background: #fff3e0;
+            color: #e67e22;
+        }
+
+        .risk-badge.low {
+            background: #e8f5e9;
+            color: #27ae60;
+        }
+
+        .risk-badge.none {
+            background: #eceff1;
+            color: #546e7a;
+        }
+
+        .check-item {
+            font-size: 16px;
+            font-weight: 600;
+            color: #1a1a2e;
+            margin-top: 10px;
+        }
+
+        .check-item-code {
+            font-size: 12px;
+            color: #888;
+            font-family: monospace;
+            margin-top: 4px;
+        }
+
+        .card-body {
+            padding: 20px;
+        }
+
+        .info-row {
+            margin-bottom: 16px;
+        }
+
+        .info-row:last-child {
+            margin-bottom: 0;
+        }
+
+        .info-label {
+            font-size: 12px;
+            color: #888;
+            text-transform: uppercase;
+            letter-spacing: 0.5px;
+            margin-bottom: 6px;
+            font-weight: 600;
+        }
+
+        .info-content {
+            font-size: 14px;
+            color: #333;
+            line-height: 1.6;
+            background: #f8f9fa;
+            padding: 12px;
+            border-radius: 8px;
+            word-break: break-word;
+        }
+
+        .info-content.issue {
+            background: #fff3e0;
+            border-left: 4px solid #f39c12;
+        }
+
+        .info-content.suggestion {
+            background: #e3f2fd;
+            border-left: 4px solid #2196f3;
+        }
+
+        .info-content.reason {
+            background: #f3e5f5;
+            border-left: 4px solid #9c27b0;
+        }
+
+        .info-content.location {
+            background: #e8f5e9;
+            border-left: 4px solid #4caf50;
+        }
+
+        .card-footer {
+            padding: 15px 20px;
+            background: #f8f9fa;
+            border-top: 1px solid #e0e0e0;
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            flex-wrap: wrap;
+            gap: 10px;
+        }
+
+        .issue-status {
+            display: flex;
+            align-items: center;
+            gap: 8px;
+            font-size: 13px;
+            font-weight: 600;
+        }
+
+        .issue-status.has-issue {
+            color: #e74c3c;
+        }
+
+        .issue-status.no-issue {
+            color: #27ae60;
+        }
+
+        .status-dot {
+            width: 10px;
+            height: 10px;
+            border-radius: 50%;
+        }
+
+        .status-dot.has-issue {
+            background: #e74c3c;
+            animation: pulse 2s infinite;
+        }
+
+        .status-dot.no-issue {
+            background: #27ae60;
+        }
+
+        @keyframes pulse {
+            0% { box-shadow: 0 0 0 0 rgba(231, 76, 60, 0.4); }
+            70% { box-shadow: 0 0 0 10px rgba(231, 76, 60, 0); }
+            100% { box-shadow: 0 0 0 0 rgba(231, 76, 60, 0); }
+        }
+
+        .reference-source {
+            font-size: 12px;
+            color: #888;
+            max-width: 200px;
+            overflow: hidden;
+            text-overflow: ellipsis;
+            white-space: nowrap;
+        }
+
+        .empty-state {
+            text-align: center;
+            padding: 80px 20px;
+            color: white;
+        }
+
+        .empty-state-icon {
+            font-size: 80px;
+            margin-bottom: 20px;
+            opacity: 0.8;
+        }
+
+        .empty-state h2 {
+            font-size: 24px;
+            margin-bottom: 10px;
+        }
+
+        .empty-state p {
+            opacity: 0.8;
+        }
+
+        .loading {
+            text-align: center;
+            padding: 60px;
+            color: white;
+        }
+
+        .loading-spinner {
+            width: 50px;
+            height: 50px;
+            border: 4px solid rgba(255,255,255,0.3);
+            border-top-color: white;
+            border-radius: 50%;
+            animation: spin 1s linear infinite;
+            margin: 0 auto 20px;
+        }
+
+        @keyframes spin {
+            to { transform: rotate(360deg); }
+        }
+
+        .toast {
+            position: fixed;
+            bottom: 30px;
+            right: 30px;
+            background: white;
+            padding: 16px 24px;
+            border-radius: 12px;
+            box-shadow: 0 10px 40px rgba(0,0,0,0.2);
+            display: flex;
+            align-items: center;
+            gap: 12px;
+            transform: translateX(400px);
+            transition: transform 0.3s ease;
+            z-index: 1000;
+        }
+
+        .toast.show {
+            transform: translateX(0);
+        }
+
+        .toast-icon {
+            font-size: 24px;
+        }
+
+        .toast.success .toast-icon { color: #27ae60; }
+        .toast.error .toast-icon { color: #e74c3c; }
+
+        @media (max-width: 768px) {
+            .cards-container {
+                grid-template-columns: 1fr;
+            }
+            
+            .header h1 {
+                font-size: 22px;
+            }
+            
+            .filters {
+                flex-direction: column;
+                align-items: stretch;
+            }
+            
+            .filter-group select,
+            .filter-group input {
+                width: 100%;
+            }
+        }
+
+        .json-preview {
+            background: #1e1e1e;
+            color: #d4d4d4;
+            padding: 20px;
+            border-radius: 8px;
+            font-family: 'Consolas', 'Monaco', monospace;
+            font-size: 13px;
+            overflow-x: auto;
+            max-height: 300px;
+            overflow-y: auto;
+            display: none;
+        }
+
+        .json-preview.show {
+            display: block;
+        }
+
+        .toggle-json {
+            margin-top: 10px;
+            color: #667eea;
+            cursor: pointer;
+            font-size: 13px;
+            text-decoration: underline;
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <div class="header">
+            <h1>施工方案可视化审查结果工具</h1>
+            <p class="subtitle">直观展示AI审查结果,快速定位问题与风险</p>
+            
+            <div class="file-selector">
+                <label class="btn btn-primary">
+                    📁 选择JSON文件
+                    <input type="file" id="fileInput" accept=".json">
+                </label>
+                <button class="btn btn-secondary" onclick="loadFromDefault()">
+                    📂 加载默认目录
+                </button>
+                <span class="file-path" id="filePath">temp\construction_review\final_result\</span>
+            </div>
+
+            <div class="stats-bar" id="statsBar" style="display: none;">
+                <div class="stat-card">
+                    <div class="stat-value" id="totalCount">0</div>
+                    <div class="stat-label">总检查项</div>
+                </div>
+                <div class="stat-card warning">
+                    <div class="stat-value" id="highRiskCount">0</div>
+                    <div class="stat-label">高风险</div>
+                </div>
+                <div class="stat-card info">
+                    <div class="stat-value" id="mediumRiskCount">0</div>
+                    <div class="stat-label">中风险</div>
+                </div>
+                <div class="stat-card success">
+                    <div class="stat-value" id="lowRiskCount">0</div>
+                    <div class="stat-label">低风险/无风险</div>
+                </div>
+            </div>
+        </div>
+
+        <div class="filters" id="filters" style="display: none;">
+            <div class="filter-group">
+                <label>风险等级</label>
+                <select id="riskFilter" onchange="applyFilters()">
+                    <option value="all">全部</option>
+                    <option value="high">高风险</option>
+                    <option value="medium">中风险</option>
+                    <option value="low">低风险</option>
+                    <option value="none">无风险</option>
+                </select>
+            </div>
+            <div class="filter-group">
+                <label>检查项类型</label>
+                <select id="chapterFilter" onchange="applyFilters()">
+                    <option value="all">全部章节</option>
+                </select>
+            </div>
+            <div class="filter-group">
+                <label>问题状态</label>
+                <select id="issueFilter" onchange="applyFilters()">
+                    <option value="all">全部</option>
+                    <option value="has-issue">存在问题</option>
+                    <option value="no-issue">无问题</option>
+                </select>
+            </div>
+            <div class="filter-group">
+                <label>搜索</label>
+                <input type="text" id="searchInput" placeholder="搜索关键词..." oninput="applyFilters()">
+            </div>
+        </div>
+
+        <div id="content">
+            <div class="empty-state">
+                <div class="empty-state-icon">📂</div>
+                <h2>请选择审查结果文件</h2>
+                <p>支持直接上传JSON文件或从默认目录加载</p>
+            </div>
+        </div>
+    </div>
+
+    <div class="toast" id="toast">
+        <span class="toast-icon">✓</span>
+        <span class="toast-message">操作成功</span>
+    </div>
+
+    <script>
+        let allReviewItems = [];
+        let filteredItems = [];
+
+        document.getElementById('fileInput').addEventListener('change', handleFileSelect);
+
+        function handleFileSelect(event) {
+            const file = event.target.files[0];
+            if (!file) return;
+
+            const reader = new FileReader();
+            reader.onload = function(e) {
+                try {
+                    // 移除控制字符
+                    let content = e.target.result;
+                    content = content.replace(/[\x00-\x08\x0b-\x0c\x0e-\x1f]/g, '');
+                    const data = JSON.parse(content);
+                    processData(data);
+                    showToast('文件加载成功', 'success');
+                } catch (err) {
+                    showToast('JSON解析错误: ' + err.message, 'error');
+                    console.error(err);
+                }
+            };
+            reader.readAsText(file);
+        }
+
+        function loadFromDefault() {
+            // 尝试加载默认目录中的第一个JSON文件
+            fetch('../../temp/construction_review/final_result/')
+                .then(response => {
+                    if (!response.ok) throw new Error('无法访问目录');
+                    return response.text();
+                })
+                .then(html => {
+                    // 解析目录列表
+                    const parser = new DOMParser();
+                    const doc = parser.parseFromString(html, 'text/html');
+                    const links = Array.from(doc.querySelectorAll('a'));
+                    const jsonFiles = links
+                        .map(a => a.href)
+                        .filter(href => href.endsWith('.json'))
+                        .map(href => href.split('/').pop());
+                    
+                    if (jsonFiles.length > 0) {
+                        loadJsonFile(jsonFiles[0]);
+                    } else {
+                        showToast('默认目录中没有找到JSON文件', 'error');
+                    }
+                })
+                .catch(err => {
+                    // 如果无法列出目录,尝试直接加载已知文件
+                    const defaultFiles = [
+                        'f926d2ad4428bfcbe12be8702e2c32ce-1773041592.json'
+                    ];
+                    loadJsonFile(defaultFiles[0]);
+                });
+        }
+
+        function loadJsonFile(filename) {
+            fetch(`../../temp/construction_review/final_result/${filename}`)
+                .then(response => {
+                    if (!response.ok) throw new Error('文件加载失败');
+                    return response.text();
+                })
+                .then(text => {
+                    // 移除控制字符
+                    text = text.replace(/[\x00-\x08\x0b-\x0c\x0e-\x1f]/g, '');
+                    const data = JSON.parse(text);
+                    processData(data);
+                    showToast(`已加载: ${filename}`, 'success');
+                })
+                .catch(err => {
+                    showToast('加载失败: ' + err.message, 'error');
+                });
+        }
+
+        function processData(data) {
+            allReviewItems = [];
+            
+            const aiReviewResult = data.ai_review_result || {};
+            const reviewResults = aiReviewResult.review_results || [];
+            
+            reviewResults.forEach(result => {
+                Object.values(result).forEach(unit => {
+                    if (unit.review_lists) {
+                        unit.review_lists.forEach(item => {
+                            allReviewItems.push(item);
+                        });
+                    }
+                });
+            });
+
+            // 填充章节筛选器
+            populateChapterFilter();
+            
+            // 显示统计
+            updateStats();
+            
+            // 应用筛选并渲染
+            applyFilters();
+            
+            // 显示筛选栏
+            document.getElementById('filters').style.display = 'flex';
+            document.getElementById('statsBar').style.display = 'flex';
+        }
+
+        function populateChapterFilter() {
+            const chapters = [...new Set(allReviewItems.map(item => item.chapter_code))];
+            const select = document.getElementById('chapterFilter');
+            select.innerHTML = '<option value="all">全部章节</option>';
+            chapters.forEach(chapter => {
+                if (chapter) {
+                    const option = document.createElement('option');
+                    option.value = chapter;
+                    option.textContent = chapter;
+                    select.appendChild(option);
+                }
+            });
+        }
+
+        function updateStats() {
+            let high = 0, medium = 0, low = 0, none = 0;
+            
+            allReviewItems.forEach(item => {
+                const riskLevel = getRiskLevel(item);
+                switch(riskLevel) {
+                    case 'high': high++; break;
+                    case 'medium': medium++; break;
+                    case 'low': low++; break;
+                    default: none++; break;
+                }
+            });
+
+            document.getElementById('totalCount').textContent = allReviewItems.length;
+            document.getElementById('highRiskCount').textContent = high;
+            document.getElementById('mediumRiskCount').textContent = medium;
+            document.getElementById('lowRiskCount').textContent = low + none;
+        }
+
+        function getRiskLevel(item) {
+            const riskInfo = item.risk_info || {};
+            const riskLevel = (riskInfo.risk_level || '').toLowerCase();
+            
+            if (riskLevel.includes('高') || riskLevel.includes('high')) return 'high';
+            if (riskLevel.includes('中') || riskLevel.includes('medium')) return 'medium';
+            if (riskLevel.includes('低') || riskLevel.includes('low')) return 'low';
+            return 'none';
+        }
+
+        function getRiskClass(riskLevel) {
+            switch(riskLevel) {
+                case 'high': return 'high-risk';
+                case 'medium': return 'medium-risk';
+                case 'low': return 'low-risk';
+                default: return 'no-risk';
+            }
+        }
+
+        function getRiskBadgeClass(riskLevel) {
+            switch(riskLevel) {
+                case 'high': return 'high';
+                case 'medium': return 'medium';
+                case 'low': return 'low';
+                default: return 'none';
+            }
+        }
+
+        function getRiskLabel(riskLevel) {
+            switch(riskLevel) {
+                case 'high': return '高风险';
+                case 'medium': return '中风险';
+                case 'low': return '低风险';
+                default: return '无风险';
+            }
+        }
+
+        function applyFilters() {
+            const riskFilter = document.getElementById('riskFilter').value;
+            const chapterFilter = document.getElementById('chapterFilter').value;
+            const issueFilter = document.getElementById('issueFilter').value;
+            const searchInput = document.getElementById('searchInput').value.toLowerCase();
+
+            filteredItems = allReviewItems.filter(item => {
+                // 风险等级筛选
+                if (riskFilter !== 'all') {
+                    const itemRisk = getRiskLevel(item);
+                    if (itemRisk !== riskFilter) return false;
+                }
+
+                // 章节筛选
+                if (chapterFilter !== 'all' && item.chapter_code !== chapterFilter) {
+                    return false;
+                }
+
+                // 问题状态筛选
+                if (issueFilter !== 'all') {
+                    const hasIssue = item.exist_issue === true;
+                    if (issueFilter === 'has-issue' && !hasIssue) return false;
+                    if (issueFilter === 'no-issue' && hasIssue) return false;
+                }
+
+                // 搜索筛选
+                if (searchInput) {
+                    const searchText = [
+                        item.check_item,
+                        item.check_item_code,
+                        item.chapter_code,
+                        item.check_result,
+                        item.location,
+                        item.suggestion,
+                        item.reason
+                    ].join(' ').toLowerCase();
+                    if (!searchText.includes(searchInput)) return false;
+                }
+
+                return true;
+            });
+
+            renderCards();
+        }
+
+        function renderCards() {
+            const content = document.getElementById('content');
+            
+            if (filteredItems.length === 0) {
+                content.innerHTML = `
+                    <div class="empty-state">
+                        <div class="empty-state-icon">🔍</div>
+                        <h2>没有找到匹配的结果</h2>
+                        <p>请尝试调整筛选条件</p>
+                    </div>
+                `;
+                return;
+            }
+
+            const cardsHtml = filteredItems.map(item => {
+                const riskLevel = getRiskLevel(item);
+                const riskClass = getRiskClass(riskLevel);
+                const badgeClass = getRiskBadgeClass(riskLevel);
+                const riskLabel = getRiskLabel(riskLevel);
+                const hasIssue = item.exist_issue === true;
+                
+                return `
+                    <div class="review-card ${riskClass}">
+                        <div class="card-header">
+                            <div class="card-header-top">
+                                <span class="chapter-code">${escapeHtml(item.chapter_code || '未知章节')}</span>
+                                <span class="risk-badge ${badgeClass}">${riskLabel}</span>
+                            </div>
+                            <div class="check-item">${escapeHtml(item.check_item || '未命名检查项')}</div>
+                            <div class="check-item-code">${escapeHtml(item.check_item_code || '')}</div>
+                        </div>
+                        <div class="card-body">
+                            <div class="info-row">
+                                <div class="info-label">📍 问题位置</div>
+                                <div class="info-content location">${escapeHtml(item.location || item.check_result || '未指定')}</div>
+                            </div>
+                            
+                            ${item.suggestion ? `
+                            <div class="info-row">
+                                <div class="info-label">💡 修改建议</div>
+                                <div class="info-content suggestion">${escapeHtml(item.suggestion)}</div>
+                            </div>
+                            ` : ''}
+                            
+                            ${item.reason ? `
+                            <div class="info-row">
+                                <div class="info-label">📝 审查依据</div>
+                                <div class="info-content reason">${escapeHtml(item.reason)}</div>
+                            </div>
+                            ` : ''}
+                            
+                            ${item.review_references ? `
+                            <div class="info-row">
+                                <div class="info-label">📚 参考标准</div>
+                                <div class="info-content">${escapeHtml(item.review_references)}</div>
+                            </div>
+                            ` : ''}
+                        </div>
+                        <div class="card-footer">
+                            <div class="issue-status ${hasIssue ? 'has-issue' : 'no-issue'}">
+                                <span class="status-dot ${hasIssue ? 'has-issue' : 'no-issue'}"></span>
+                                <span>${hasIssue ? '存在问题' : '无问题'}</span>
+                            </div>
+                            ${item.reference_source ? `
+                            <div class="reference-source" title="${escapeHtml(item.reference_source)}">
+                                来源: ${escapeHtml(item.reference_source)}
+                            </div>
+                            ` : ''}
+                        </div>
+                    </div>
+                `;
+            }).join('');
+
+            content.innerHTML = `<div class="cards-container">${cardsHtml}</div>`;
+        }
+
+        function escapeHtml(text) {
+            if (!text) return '';
+            const div = document.createElement('div');
+            div.textContent = text;
+            return div.innerHTML;
+        }
+
+        function showToast(message, type) {
+            const toast = document.getElementById('toast');
+            toast.className = `toast ${type} show`;
+            toast.querySelector('.toast-icon').textContent = type === 'success' ? '✓' : '✗';
+            toast.querySelector('.toast-message').textContent = message;
+            
+            setTimeout(() => {
+                toast.classList.remove('show');
+            }, 3000);
+        }
+
+        // 页面加载时尝试自动加载
+        window.addEventListener('load', () => {
+            // 可选:自动加载默认文件
+            // loadFromDefault();
+        });
+    </script>
+</body>
+</html>