před 1 měsícem · 3b1d8b5196
--- a/.design/施工方案审查应用/基础架构设计/施工审查应用基础架构设计.md
+++ b/.design/施工方案审查应用/基础架构设计/施工审查应用基础架构设计.md
@@ -0,0 +1,1326 @@
 
				+# LQAgentPlatform 最终架构设计
			
 
				+
			
 
				+> **版本**: v3.0
			
 
				+> **更新日期**: 2026-04-09
			
 
				+> **项目状态**: 核心功能已实现，生产环境运行中
			
 
				+> **v2.0→v3.0 变更**: 新增施工方案编写模块、脱敏模块、简化管道、标准匹配、LLM分类器v2、LLM链式客户端框架
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 最终架构设计图
			
 
				+
			
 
				+```
			
 
				+┌─────────────────────────────────────────────────────────────────────────────────────┐
			
 
				+│                         LQAgentPlatform - 实际架构实现 (v2.0)                       │
			
 
				+├─────────────────────────────────────────────────────────────────────────────────────┤
			
 
				+│                                                                                     │
			
 
				+│  ┌───────────────────────────────────────────────────────────────────────────────┐  │
			
 
				+│  │                           Server Layer (服务器层)                              │  │
			
 
				+│  │  ┌─────────────────────────────────────────────────────────────────────────┐  │  │
			
 
				+│  │  │  FastAPI Application (server/app.py)                                    │  │  │
			
 
				+│  │  │  • RouteManager - 路由管理                                               │  │  │
			
 
				+│  │  │  • CeleryWorkerManager - 异步任务管理                                     │  │  │
			
 
				+│  │  │  • ApplicationFactory - 应用工厂                                          │  │  │
			
 
				+│  │  └─────────────────────────────────────────────────────────────────────────┘  │  │
			
 
				+│  └───────────────────────────────────────────────────────────────────────────────┘  │
			
 
				+│                                        ▲                                            │
			
 
				+│                                        │ HTTP Request                                │
			
 
				+│                                        ▼                                            │
			
 
				+│  ┌───────────────────────────────────────────────────────────────────────────────┐  │
			
 
				+│  │                            Views Layer (视图层)                               │  │
			
 
				+│  │  ┌─────────────────────────────────────────────────────────────────────────┐  │  │
			
 
				+│  │  │  construction_review/ - 施工方案审查API ✅ (已实现)                        │  │  │
			
 
				+│  │  │  • file_upload.py - 文档上传接口                                          │  │  │
			
 
				+│  │  │  • launch_review.py - 启动审查接口                                        │  │  │
			
 
				+│  │  │  • review_results.py - 审查结果接口                                       │  │  │
			
 
				+│  │  │  • task_control.py - 任务控制接口                                         │  │  │
			
 
				+│  │  │  • desensitize_api.py - 脱敏API接口 🆕                                   │  │  │
			
 
				+│  │  │  • schemas/error_schemas.py - 错误模式定义                                │  │  │
			
 
				+│  │  └─────────────────────────────────────────────────────────────────────────┘  │  │
			
 
				+│  │  ┌─────────────────────────────────────────────────────────────────────────┐  │  │
			
 
				+│  │  │  construction_write/ - 施工方案编写API ✅ (已实现) 🆕                     │  │  │
			
 
				+│  │  │  • outline_views.py - 大纲生成接口                                       │  │  │
			
 
				+│  │  │  • content_completion.py - 内容补全接口                                   │  │  │
			
 
				+│  │  │  • regenerate_views.py - 重新生成接口                                    │  │  │
			
 
				+│  │  │  • task_cancel_views.py - 任务取消接口                                   │  │  │
			
 
				+│  │  │  • similar_plan_recommend.py - 相似方案推荐接口                           │  │  │
			
 
				+│  │  └─────────────────────────────────────────────────────────────────────────┘  │  │
			
 
				+│  │  ┌─────────────────────────────────────────────────────────────────────────┐  │  │
			
 
				+│  │  │  test_views.py - 测试接口                                                │  │  │
			
 
				+│  │  └─────────────────────────────────────────────────────────────────────────┘  │  │
			
 
				+│  └───────────────────────────────────────────────────────────────────────────────┘  │
			
 
				+│                                        ▲                                            │
			
 
				+│                                        │ 调用业务逻辑                                │
			
 
				+│                                        ▼                                            │
			
 
				+│  ┌───────────────────────────────────────────────────────────────────────────────┐  │
			
 
				+│  │                           Core Layer (核心业务层)                             │  │
			
 
				+│  │  ┌─────────────────────────────────────────────────────────────────────────┐  │  │
			
 
				+│  │  │  construction_review/ - 施工方案审查模块 ✅ (已实现)                       │  │  │
			
 
				+│  │  │  ┌─────────────────────────────────────────────────────────────────┐    │  │  │
			
 
				+│  │  │  │  workflows/ - 工作流层                                          │    │  │  │
			
 
				+│  │  │  │  • ai_review_workflow.py - AI审查工作流 (54KB, 核心)             │    │  │  │
			
 
				+│  │  │  │  • document_workflow.py - 文档处理工作流                         │    │  │  │
			
 
				+│  │  │  │  • report_workflow.py - 报告生成工作流                           │    │  │  │
			
 
				+│  │  │  │  • core_functions/ - 核心业务函数 🆕                             │    │  │  │
			
 
				+│  │  │  │  • types/ - 状态类型定义 (AIReviewState, TaskChainState) 🆕      │    │  │  │
			
 
				+│  │  │  └─────────────────────────────────────────────────────────────────┘    │  │  │
			
 
				+│  │  │  ┌─────────────────────────────────────────────────────────────────┐    │  │  │
			
 
				+│  │  │  │  component/ - 组件层                                            │    │  │  │
			
 
				+│  │  │  │  • ai_review_engine.py - AI审查引擎 (核心引擎)                   │    │  │  │
			
 
				+│  │  │  │  • document_processor.py - 文档处理器                            │    │  │  │
			
 
				+│  │  │  │  • report_generator.py - 报告生成器                              │    │  │  │
			
 
				+│  │  │  │  • outline_catalogue_matcher.py - 大纲目录匹配器 🆕               │    │  │  │
			
 
				+│  │  │  │  • constants.py - 常量定义 🆕                                   │    │  │  │
			
 
				+│  │  │  │  • check_completeness/ - 完整性检查组件                          │    │  │  │
			
 
				+│  │  │  │  • desensitize/ - 数据脱敏模块 🆕                                │    │  │  │
			
 
				+│  │  │  │    - engine.py, validator.py, dict_manager.py                   │    │  │  │
			
 
				+│  │  │  │    - model_client.py, remapper.py                               │    │  │  │
			
 
				+│  │  │  │    - processors/ (pii, geo, biz, financial)                     │    │  │  │
			
 
				+│  │  │  │  • doc_worker/ - 文档处理工作器                                  │    │  │  │
			
 
				+│  │  │  │    - classification/ (chunk, hierarchy, smart_local) 🆕         │    │  │  │
			
 
				+│  │  │  │    - pdf_worker/ (html_to_markdown) 🆕                          │    │  │  │
			
 
				+│  │  │  │    - config/ (StandardCategoryTable.csv, prompt.yaml)           │    │  │  │
			
 
				+│  │  │  │  • minimal_pipeline/ - 简化处理管道 🆕                            │    │  │  │
			
 
				+│  │  │  │    - ocr_processor.py, pdf_extractor.py                        │    │  │  │
			
 
				+│  │  │  │    - toc_builder.py, toc_detector.py                           │    │  │  │
			
 
				+│  │  │  │    - simple_processor.py, chunk_assembler.py                    │    │  │  │
			
 
				+│  │  │  │  • standard_matching/ - 标准匹配模块 🆕                          │    │  │  │
			
 
				+│  │  │  │    - standard_dao.py, standard_service.py                      │    │  │  │
			
 
				+│  │  │  │  • infrastructure/ - 基础设施组件                                │    │  │  │
			
 
				+│  │  │  │  • reviewers/ - 审查器集合                                       │    │  │  │
			
 
				+│  │  │  │    - base_reviewer.py - 基础审查器                               │    │  │  │
			
 
				+│  │  │  │    - completeness_reviewer.py - 完整性审查器                     │    │  │  │
			
 
				+│  │  │  │    - reference_basis_reviewer.py - 参考依据审查器               │    │  │  │
			
 
				+│  │  │  │    - standard_timeliness_reviewer.py - 标准时效性审查器 🆕       │    │  │  │
			
 
				+│  │  │  │    - timeliness_basis_reviewer.py - 时效性审查器                │    │  │  │
			
 
				+│  │  │  │    - timeliness_content_reviewer.py - 时效性内容审查器 🆕        │    │  │  │
			
 
				+│  │  │  │    - semantic_logic.py - 语义逻辑审查器                         │    │  │  │
			
 
				+│  │  │  │    - sensitive_word_check.py - 敏感词检查 🆕                    │    │  │  │
			
 
				+│  │  │  │    - utils/llm_content_classifier_v2/ - LLM分类器v2 🆕          │    │  │  │
			
 
				+│  │  │  │    - utils/llm_chain_client/ - LLM链式客户端框架 🆕             │    │  │  │
			
 
				+│  │  │  └─────────────────────────────────────────────────────────────────┘    │  │  │
			
 
				+│  │  └─────────────────────────────────────────────────────────────────────────┘  │  │
			
 
				+│  │  ┌─────────────────────────────────────────────────────────────────────────┐  │  │
			
 
				+│  │  │  construction_write/ - 施工方案编写模块 ✅ (已实现) 🆕                    │  │  │
			
 
				+│  │  │  ┌─────────────────────────────────────────────────────────────────┐    │  │  │
			
 
				+│  │  │  │  component/                                                     │    │  │  │
			
 
				+│  │  │  │  • outline_generator.py - 大纲生成器                             │    │  │  │
			
 
				+│  │  │  │  • state_models.py - 状态模型                                   │    │  │  │
			
 
				+│  │  │  │  • prompt/ - 提示词配置                                         │    │  │  │
			
 
				+│  │  │  └─────────────────────────────────────────────────────────────────┘    │  │  │
			
 
				+│  │  │  ┌─────────────────────────────────────────────────────────────────┐    │  │  │
			
 
				+│  │  │  │  workflows/                                                     │    │  │  │
			
 
				+│  │  │  │  • agent.py - 编写智能体                                        │    │  │  │
			
 
				+│  │  │  │  • outline_workflow.py - 大纲工作流                              │    │  │  │
			
 
				+│  │  │  └─────────────────────────────────────────────────────────────────┘    │  │  │
			
 
				+│  │  └─────────────────────────────────────────────────────────────────────────┘  │  │
			
 
				+│  │  ┌─────────────────────────────────────────────────────────────────────────┐  │  │
			
 
				+│  │  │  base/ - 基础组件 ✅ (已实现)                                           │  │  │
			
 
				+│  │  │  • progress_manager.py - 进度管理器                                     │  │  │
			
 
				+│  │  │  • workflow_manager.py - 工作流管理器                                   │  │  │
			
 
				+│  │  │  • sse_manager.py - SSE服务器推送事件管理器                             │  │  │
			
 
				+│  │  │  • redis_duplicate_checker.py - Redis去重检查器                         │  │  │
			
 
				+│  │  │  • task_models.py - 任务模型定义                                        │  │  │
			
 
				+│  │  │  • words_detect/ - 敏感词检测模块                                       │  │  │
			
 
				+│  │  └─────────────────────────────────────────────────────────────────────────┘  │  │
			
 
				+│  └───────────────────────────────────────────────────────────────────────────────┘  │
			
 
				+│                                        ▲                                            │
			
 
				+│                                        │ 调用基础设施                                │
			
 
				+│                                        ▼                                            │
			
 
				+│  ┌───────────────────────────────────────────────────────────────────────────────┐  │
			
 
				+│  │                        Foundation Layer (基础设施层)                           │  │
			
 
				+│  │  ┌─────────────────────────────────────────────────────────────────────────┐  │  │
			
 
				+│  │  │  ai/ - AI模块 ✅ (已实现)                                               │  │  │
			
 
				+│  │  │  ┌─────────────────────────────────────────────────────────────────┐    │  │  │
			
 
				+│  │  │  │  agent/ - AI智能体                                              │    │  │  │
			
 
				+│  │  │  │  • base_agent.py - 智能体基类                                    │    │  │  │
			
 
				+│  │  │  │  • test_agent.py - 测试智能体                                    │    │  │  │
			
 
				+│  │  │  │  • generate/ - 生成模块                                          │    │  │  │
			
 
				+│  │  │  │  • workflow/ - 工作流模块                                        │    │  │  │
			
 
				+│  │  │  └─────────────────────────────────────────────────────────────────┘    │  │  │
			
 
				+│  │  │  ┌─────────────────────────────────────────────────────────────────┐    │  │  │
			
 
				+│  │  │  │  models/ - 模型管理                                              │    │  │  │
			
 
				+│  │  │  │  • model_handler.py - 多模型管理器                               │    │  │  │
			
 
				+│  │  │  │  • rerank_model.py - 重排序模型                                  │    │  │  │
			
 
				+│  │  │  └─────────────────────────────────────────────────────────────────┘    │  │  │
			
 
				+│  │  │  ┌─────────────────────────────────────────────────────────────────┐    │  │  │
			
 
				+│  │  │  │  rag/ - RAG检索增强生成                                         │    │  │  │
			
 
				+│  │  │  │  • retrieval/ - 检索模块                                         │    │  │  │
			
 
				+│  │  │  │  • query_rewrite.py - 查询重写                                  │    │  │  │
			
 
				+│  │  │  │  • entities_enhance.py - 实体增强检索                            │    │  │  │
			
 
				+│  │  │  └─────────────────────────────────────────────────────────────────┘    │  │  │
			
 
				+│  │  └─────────────────────────────────────────────────────────────────────────┘  │  │
			
 
				+│  │  ┌─────────────────────────────────────────────────────────────────────────┐  │  │
			
 
				+│  │  │  database/ - 数据库模块 ✅ (已实现)                                    │  │  │
			
 
				+│  │  │  ┌─────────────────────────────────────────────────────────────────┐    │  │  │
			
 
				+│  │  │  │  base/ - 数据库基础层                                            │    │  │  │
			
 
				+│  │  │  │  • kg/ - 知识图谱 (Neo4j)                                       │    │  │  │
			
 
				+│  │  │  │  • sql/ - SQL数据库 (MySQL/PostgreSQL)                          │    │  │  │
			
 
				+│  │  │  │  • vector/ - 向量数据库 (Milvus/PG Vector)                       │    │  │  │
			
 
				+│  │  │  └─────────────────────────────────────────────────────────────────┘    │  │  │
			
 
				+│  │  │  • models/ - 数据模型定义                                             │  │  │
			
 
				+│  │  │  • repositories/ - 数据访问层                                        │  │  │
			
 
				+│  │  │  • migrations/ - 数据库迁移                                           │  │  │
			
 
				+│  │  └─────────────────────────────────────────────────────────────────────────┘  │  │
			
 
				+│  │  ┌─────────────────────────────────────────────────────────────────────────┐  │  │
			
 
				+│  │  │  infrastructure/ - 基础设施 ✅ (已实现)                                │  │  │
			
 
				+│  │  │  • cache/ - 缓存管理 (Redis)                                          │  │  │
			
 
				+│  │  │  • config/ - 配置管理 (config_handler)                                │  │  │
			
 
				+│  │  │  • messaging/ - 消息队列 (Celery)                                     │  │  │
			
 
				+│  │  │  • mysql/ - MySQL连接池                                              │  │  │
			
 
				+│  │  │  • tracing/ - 链路追踪                                                │  │  │
			
 
				+│  │  └─────────────────────────────────────────────────────────────────────────┘  │  │
			
 
				+│  │  ┌─────────────────────────────────────────────────────────────────────────┐  │  │
			
 
				+│  │  │  observability/ - 可观测性 ✅ (已实现)                                │  │  │
			
 
				+│  │  │  • logger/ - 日志管理                                                 │  │  │
			
 
				+│  │  │  • metrics/ - 指标收集                                                │  │  │
			
 
				+│  │  │  • monitoring/ - 监控 (ai_trace_monitor.py, time_statistics.py)       │  │  │
			
 
				+│  │  │  • monitoring/rag/ - RAG监控 🆕                                       │  │  │
			
 
				+│  │  │  • cachefiles/ - 缓存文件管理 🆕                                      │  │  │
			
 
				+│  │  └─────────────────────────────────────────────────────────────────────────┘  │  │
			
 
				+│  │  ┌─────────────────────────────────────────────────────────────────────────┐  │  │
			
 
				+│  │  │  schemas/ - 数据模式 ✅ (已实现)                                       │  │  │
			
 
				+│  │  │  utils/ - 工具函数 ✅ (已实现)                                         │  │  │
			
 
				+│  │  │  • common.py, redis_utils.py, yaml_utils.py, tool_utils.py            │  │  │
			
 
				+│  │  │  • md5.py - MD5工具 🆕                                                │  │  │
			
 
				+│  │  └─────────────────────────────────────────────────────────────────────────┘  │  │
			
 
				+│  └───────────────────────────────────────────────────────────────────────────────┘  │
			
 
				+│                                        ▲                                            │
			
 
				+│                                        │ 数据处理                                    │
			
 
				+│                                        ▼                                            │
			
 
				+│  ┌───────────────────────────────────────────────────────────────────────────────┐  │
			
 
				+│  │                      Data Pipeline Layer (数据管道层)                          │  │
			
 
				+│  │  ┌─────────────────────────────────────────────────────────────────────────┐  │  │
			
 
				+│  │  │  document/ - 文档处理 ✅ (部分实现)                                      │  │  │
			
 
				+│  │  │  • parsers/ - 文档解析器 (PDF, DOCX, OCR)                               │  │  │
			
 
				+│  │  │  • processors/ - 数据处理器                                             │  │  │
			
 
				+│  │  └─────────────────────────────────────────────────────────────────────────┘  │  │
			
 
				+│  │  ┌─────────────────────────────────────────────────────────────────────────┐  │  │
			
 
				+│  │  │  RAG_recall/ - RAG检索 ✅ (已实现)                                      │  │  │
			
 
				+│  │  │  • rag_miluvs/ - Milvus RAG实现                                        │  │  │
			
 
				+│  │  └─────────────────────────────────────────────────────────────────────────┘  │  │
			
 
				+│  │  ┌─────────────────────────────────────────────────────────────────────────┐  │  │
			
 
				+│  │  │  milvus_inbound_script/ - Milvus入库脚本                                │  │  │
			
 
				+│  │  │  training_data/ - 训练数据处理                                          │  │  │
			
 
				+│  │  └─────────────────────────────────────────────────────────────────────────┘  │  │
			
 
				+│  └───────────────────────────────────────────────────────────────────────────────┘  │
			
 
				+│                                        ▲                                            │
			
 
				+│                                        │ 数据存储                                    │
			
 
				+│                                        ▼                                            │
			
 
				+│  ┌───────────────────────────────────────────────────────────────────────────────┐  │
			
 
				+│  │                    External Services (外部服务)                                │  │
			
 
				+│  │  ┌─────────────┐  ┌─────────────┐  ┌─────────────┐  ┌─────────────┐           │  │
			
 
				+│  │  │   MySQL     │  │   Milvus    │  │   Redis     │  │   Neo4j     │           │  │
			
 
				+│  │  │  关系数据库   │  │  向量数据库   │  │   缓存      │  │  知识图谱    │           │  │
			
 
				+│  │  └─────────────┘  └─────────────┘  └─────────────┘  └─────────────┘           │  │
			
 
				+│  │  ┌─────────────┐  ┌─────────────┐  ┌─────────────┐                          │  │
			
 
				+│  │  │   AI Models │  │   MinerU    │  │   Langfuse  │                          │  │
			
 
				+│  │  │  本地/云端   │  │   OCR服务   │  │   监控系统   │                          │  │
			
 
				+│  │  └─────────────┘  └─────────────┘  └─────────────┘                          │  │
			
 
				+│  └───────────────────────────────────────────────────────────────────────────────┘  │
			
 
				+│                                                                                     │
			
 
				+└─────────────────────────────────────────────────────────────────────────────────────┘
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 实际目录结构
			
 
				+
			
 
				+```
			
 
				+LQAgentPlatform/
			
 
				+├── server/                          # 服务器层 ✅
			
 
				+│   └── app.py                      # FastAPI应用入口
			
 
				+│
			
 
				+├── views/                           # 视图层 (API接口) ✅
			
 
				+│   ├── __init__.py                 # Lifespan管理（敏感词初始化、DB连接池）
			
 
				+│   ├── construction_review/         # 施工方案审查API ✅
			
 
				+│   │   ├── file_upload.py          # 文档上传接口
			
 
				+│   │   ├── launch_review.py        # 启动审查接口
			
 
				+│   │   ├── review_results.py       # 审查结果接口
			
 
				+│   │   ├── task_control.py         # 任务控制接口
			
 
				+│   │   ├── desensitize_api.py      # 脱敏API接口 🆕
			
 
				+│   │   └── schemas/
			
 
				+│   │       └── error_schemas.py    # 错误模式定义
			
 
				+│   ├── construction_write/          # 施工方案编写API ✅ 🆕
			
 
				+│   │   ├── outline_views.py        # 大纲生成接口
			
 
				+│   │   ├── content_completion.py   # 内容补全接口
			
 
				+│   │   ├── regenerate_views.py     # 重新生成接口
			
 
				+│   │   ├── task_cancel_views.py    # 任务取消接口
			
 
				+│   │   └── similar_plan_recommend.py # 相似方案推荐接口
			
 
				+│   └── test_views.py               # 测试接口
			
 
				+│
			
 
				+├── core/                            # 核心业务层 ✅
			
 
				+│   ├── base/                       # 基础组件 ✅
			
 
				+│   │   ├── progress_manager.py     # 进度管理器
			
 
				+│   │   ├── workflow_manager.py     # 工作流管理器
			
 
				+│   │   ├── sse_manager.py          # SSE推送管理
			
 
				+│   │   ├── redis_duplicate_checker.py  # Redis去重
			
 
				+│   │   ├── task_models.py          # 任务模型
			
 
				+│   │   └── words_detect/           # 敏感词检测
			
 
				+│   │
			
 
				+│   ├── construction_review/        # 施工方案审查模块 ✅
			
 
				+│   │   ├── workflows/              # 工作流层 ✅
			
 
				+│   │   │   ├── ai_review_workflow.py       # AI审查工作流 (核心)
			
 
				+│   │   │   ├── document_workflow.py        # 文档工作流
			
 
				+│   │   │   ├── report_workflow.py          # 报告工作流
			
 
				+│   │   │   ├── core_functions/             # 核心业务函数 🆕
			
 
				+│   │   │   │   └── ai_review_core_fun.py   # AI审查核心逻辑
			
 
				+│   │   │   └── types/                      # 状态类型 🆕
			
 
				+│   │   │       └── __init__.py             # AIReviewState, TaskChainState
			
 
				+│   │   │
			
 
				+│   │   └── component/              # 组件层 ✅
			
 
				+│   │       ├── ai_review_engine.py          # AI审查引擎 (核心)
			
 
				+│   │       ├── document_processor.py        # 文档处理器
			
 
				+│   │       ├── report_generator.py          # 报告生成器
			
 
				+│   │       ├── outline_catalogue_matcher.py # 大纲目录匹配器 🆕
			
 
				+│   │       ├── constants.py                 # 常量定义 🆕
			
 
				+│   │       │
			
 
				+│   │       ├── desensitize/                 # 数据脱敏模块 🆕
			
 
				+│   │       │   ├── engine.py                # 脱敏引擎核心
			
 
				+│   │       │   ├── validator.py             # 黑白名单校验
			
 
				+│   │       │   ├── dict_manager.py          # 脱敏词典管理
			
 
				+│   │       │   ├── model_client.py          # 本地LLM脱敏客户端
			
 
				+│   │       │   ├── remapper.py              # 审查结果反向映射
			
 
				+│   │       │   └── processors/              # 四维度处理器
			
 
				+│   │       │       ├── base_processor.py    # 基础处理器
			
 
				+│   │       │       ├── pii_processor.py     # 个人信息 (姓名/电话/身份证)
			
 
				+│   │       │       ├── geo_processor.py     # 地理位置 (桩号/位置/高程)
			
 
				+│   │       │       ├── biz_processor.py     # 业务信息 (公司/项目名称)
			
 
				+│   │       │       └── financial_processor.py # 财务信息 (金额/价格)
			
 
				+│   │       │
			
 
				+│   │       ├── doc_worker/                  # 文档处理工作器
			
 
				+│   │       │   ├── interfaces.py            # 数据接口/契约
			
 
				+│   │       │   ├── classification/          # 内容分类 🆕
			
 
				+│   │       │   │   ├── chunk_classifier.py  # 分块分类器
			
 
				+│   │       │   │   ├── hierarchy_classifier.py # 层级分类器
			
 
				+│   │       │   │   └── smart_local_classifier.py # 智能本地分类
			
 
				+│   │       │   ├── pdf_worker/              # PDF处理 🆕
			
 
				+│   │       │   │   └── html_to_markdown.py  # HTML转Markdown
			
 
				+│   │       │   ├── config/                  # 配置
			
 
				+│   │       │   │   ├── StandardCategoryTable.csv # 分类标准表
			
 
				+│   │       │   │   ├── prompt.yaml          # LLM提示词
			
 
				+│   │       │   │   └── config.yaml
			
 
				+│   │       │   ├── models/                  # 数据模型
			
 
				+│   │       │   │   ├── document_structure.py # 文档结构模型
			
 
				+│   │       │   │   └── converters.py        # 模型转换器
			
 
				+│   │       │   └── utils/
			
 
				+│   │       │       ├── text_split_support.py # 文本切分支持
			
 
				+│   │       │       └── prompt_loader.py     # 提示词加载
			
 
				+│   │       │
			
 
				+│   │       ├── minimal_pipeline/            # 简化处理管道 🆕
			
 
				+│   │       │   ├── simple_processor.py      # 简化处理器
			
 
				+│   │       │   ├── ocr_processor.py         # OCR处理（解耦模块）
			
 
				+│   │       │   ├── pdf_extractor.py         # PDF提取
			
 
				+│   │       │   ├── chunk_assembler.py       # 分块组装器
			
 
				+│   │       │   ├── catalog_reviewer.py      # 目录审查
			
 
				+│   │       │   ├── toc_builder.py           # 目录构建器
			
 
				+│   │       │   └── toc_detector.py          # 目录检测器
			
 
				+│   │       │
			
 
				+│   │       ├── standard_matching/           # 标准匹配模块 🆕
			
 
				+│   │       │   ├── standard_dao.py          # 标准数据访问
			
 
				+│   │       │   └── standard_service.py      # 标准匹配服务
			
 
				+│   │       │
			
 
				+│   │       ├── infrastructure/              # 基础设施组件
			
 
				+│   │       │   ├── milvus.py                # Milvus向量库客户端
			
 
				+│   │       │   ├── parent_tool.py           # 父块工具
			
 
				+│   │       │   └── relevance.py             # 相关性评分
			
 
				+│   │       │
			
 
				+│   │       ├── check_completeness/          # 完整性检查
			
 
				+│   │       │   └── components/
			
 
				+│   │       │       └── result_analyzer.py   # 结果分析器
			
 
				+│   │       │
			
 
				+│   │       ├── report/                      # 报告生成
			
 
				+│   │       │   └── prompt/
			
 
				+│   │       │       └── report_reviewers.yaml
			
 
				+│   │       │
			
 
				+│   │       └── reviewers/                   # 审查器集合 ✅
			
 
				+│   │           ├── base_reviewer.py         # 基础审查器
			
 
				+│   │           ├── completeness_reviewer.py # 完整性审查器
			
 
				+│   │           ├── reference_basis_reviewer.py # 参考依据审查
			
 
				+│   │           ├── standard_timeliness_reviewer.py # 标准时效性审查 🆕
			
 
				+│   │           ├── timeliness_basis_reviewer.py  # 时效性审查
			
 
				+│   │           ├── timeliness_content_reviewer.py # 时效性内容审查 🆕
			
 
				+│   │           ├── semantic_logic.py        # 语义逻辑审查
			
 
				+│   │           ├── sensitive_word_check.py  # 敏感词检查 🆕
			
 
				+│   │           ├── check_completeness/
			
 
				+│   │           │   └── components/
			
 
				+│   │           │       └── result_analyzer.py
			
 
				+│   │           ├── prompt/                  # 审查器提示词
			
 
				+│   │           │   ├── ai_suggestion.yaml
			
 
				+│   │           │   ├── basic_reviewers.yaml
			
 
				+│   │           │   ├── outline_reviewers.yaml
			
 
				+│   │           │   ├── query_extract.yaml
			
 
				+│   │           │   ├── rag_reviewers.yaml
			
 
				+│   │           │   ├── reference_basis_reviewer.yaml
			
 
				+│   │           │   ├── technical_reviewers.yaml
			
 
				+│   │           │   └── timeliness_basis_reviewer.yaml
			
 
				+│   │           ├── sensitive_words/         # 敏感词词典
			
 
				+│   │           └── utils/                   # 审查工具函数
			
 
				+│   │               ├── ac_automaton.py      # AC自动机
			
 
				+│   │               ├── directory_extraction.py
			
 
				+│   │               ├── llm_content_classifier_v2/ # LLM分类器v2 🆕
			
 
				+│   │               │   ├── main_classifier.py    # 主入口
			
 
				+│   │               │   ├── content_classifier.py # 核心分类逻辑
			
 
				+│   │               │   ├── category_loaders.py   # 分类加载器
			
 
				+│   │               │   ├── chunks_converter.py   # 分块转换
			
 
				+│   │               │   ├── models.py             # 数据模型
			
 
				+│   │               │   ├── prompt.py             # 分类提示词
			
 
				+│   │               │   └── embedding_client.py   # Embedding客户端
			
 
				+│   │               └── llm_chain_client/          # LLM链式客户端框架 🆕
			
 
				+│   │                   ├── bootstrap.py           # 客户端工厂
			
 
				+│   │                   ├── interfaces/            # 接口定义
			
 
				+│   │                   │   ├── chain_executor.py
			
 
				+│   │                   │   ├── llm_client.py
			
 
				+│   │                   │   └── prompt_loader.py
			
 
				+│   │                   ├── implementations/       # 实现
			
 
				+│   │                   │   ├── chains/async_chain_executor.py
			
 
				+│   │                   │   ├── clients/ (base, deepseek, doubao, gemini, qwen)
			
 
				+│   │                   │   └── loaders/yaml_prompt_loader.py
			
 
				+│   │                   └── orchestration/prompt_chain_processor.py
			
 
				+│   │
			
 
				+│   └── construction_write/          # 施工方案编写模块 ✅ 🆕
			
 
				+│       ├── component/
			
 
				+│       │   ├── outline_generator.py # 大纲生成器
			
 
				+│       │   ├── state_models.py      # 状态模型
			
 
				+│       │   └── prompt/
			
 
				+│       │       └── keyword_rules_3.json # 关键字规则
			
 
				+│       └── workflows/
			
 
				+│           ├── agent.py             # 编写智能体
			
 
				+│           └── outline_workflow.py  # 大纲工作流
			
 
				+│
			
 
				+├── foundation/                      # 基础设施层 ✅
			
 
				+│   ├── ai/                         # AI模块 ✅
			
 
				+│   │   ├── agent/                  # AI智能体
			
 
				+│   │   │   └── generate/           # 生成模块
			
 
				+│   │   │       └── model_generate.py
			
 
				+│   │   ├── models/                 # 模型管理 ✅
			
 
				+│   │   │   ├── model_handler.py    # 多模型管理器
			
 
				+│   │   │   ├── model_config_loader.py # 模型配置加载器 🆕
			
 
				+│   │   │   └── rerank_model.py     # 重排序模型
			
 
				+│   │   └── rag/                    # RAG检索增强 ✅
			
 
				+│   │       └── retrieval/          # 检索模块
			
 
				+│   │           ├── query_rewrite.py         # 查询重写
			
 
				+│   │           ├── retrieval.py             # 检索管理器
			
 
				+│   │           └── entities_enhance.py      # 实体增强检索
			
 
				+│   │
			
 
				+│   ├── database/                   # 数据库模块 ✅
			
 
				+│   │   ├── base/                   # 数据库基础层
			
 
				+│   │   │   ├── kg/                 # 知识图谱 (Neo4j)
			
 
				+│   │   │   ├── sql/                # SQL数据库 (MySQL/PostgreSQL)
			
 
				+│   │   │   └── vector/             # 向量数据库
			
 
				+│   │   ├── models/                 # 数据模型定义 ✅
			
 
				+│   │   ├── repositories/           # 数据访问层 ✅
			
 
				+│   │   └── migrations/             # 数据库迁移
			
 
				+│   │
			
 
				+│   ├── infrastructure/             # 基础设施 ✅
			
 
				+│   │   ├── cache/                  # 缓存管理 (Redis)
			
 
				+│   │   ├── config/                 # 配置管理 ✅
			
 
				+│   │   │   └── config.py           # config_handler
			
 
				+│   │   ├── messaging/              # 消息队列 (Celery) ✅
			
 
				+│   │   ├── mysql/                  # MySQL连接池 ✅
			
 
				+│   │   └── tracing/                # 链路追踪 ✅
			
 
				+│   │
			
 
				+│   ├── observability/              # 可观测性 ✅
			
 
				+│   │   ├── logger/                 # 日志管理
			
 
				+│   │   ├── metrics/                # 指标收集
			
 
				+│   │   ├── monitoring/             # 监控 ✅
			
 
				+│   │   │   ├── ai_trace_monitor.py # AI追踪监控
			
 
				+│   │   │   ├── time_statistics.py  # 时间统计
			
 
				+│   │   │   └── rag/                # RAG监控 🆕
			
 
				+│   │   └── cachefiles/             # 缓存文件管理 🆕
			
 
				+│   │
			
 
				+│   ├── schemas/                    # 数据模式 ✅
			
 
				+│   └── utils/                      # 工具函数 ✅
			
 
				+│       ├── common.py
			
 
				+│       ├── redis_utils.py
			
 
				+│       ├── yaml_utils.py
			
 
				+│       ├── tool_utils.py
			
 
				+│       └── md5.py                  # MD5工具 🆕
			
 
				+│
			
 
				+├── data_pipeline/                  # 数据管道层 ✅
			
 
				+│   ├── document/                   # 文档处理
			
 
				+│   │   ├── parsers/               # 文档解析器
			
 
				+│   │   └── processors/            # 数据处理器
			
 
				+│   ├── RAG_recall/                # RAG检索 ✅
			
 
				+│   │   └── rag_miluvs/            # Milvus RAG实现
			
 
				+│   ├── milvus_inbound_script/     # Milvus入库脚本
			
 
				+│   └── training_data/             # 训练数据处理
			
 
				+│
			
 
				+├── config/                         # 配置文件 ✅
			
 
				+│   ├── config.ini                 # 主配置文件 (模型/数据库/Redis)
			
 
				+│   ├── config.ini.template        # 配置模板
			
 
				+│   ├── model_setting.yaml         # 模型设置 🆕
			
 
				+│   ├── prompt/                    # 提示词配置 ✅
			
 
				+│   │   ├── system_prompt.yaml
			
 
				+│   │   └── intent_prompt.yaml
			
 
				+│   ├── sql/                       # SQL脚本
			
 
				+│   └── yolo/                      # YOLO模型 🆕
			
 
				+│       └── best.pt
			
 
				+│
			
 
				+├── utils_test/                    # 测试工具集
			
 
				+│   ├── AI_Review_Test/           # AI审查测试
			
 
				+│   ├── API_key/                  # API密钥生成 🆕
			
 
				+│   ├── Check_Item/               # 审查项测试 🆕
			
 
				+│   ├── Chunk_Split_Test/         # 分块切分测试 🆕
			
 
				+│   ├── Completeness_Enhanced_Test/ # 增强完整性测试 🆕
			
 
				+│   ├── Completeness_Test/         # 完整性测试 🆕
			
 
				+│   ├── Integration_Test/         # 集成测试
			
 
				+│   ├── Milvus_Test/              # Milvus测试
			
 
				+│   ├── MinerU_Test/              # MinerU测试
			
 
				+│   ├── Model_Test/               # 模型测试
			
 
				+│   ├── Other_Test/               # 其他测试 🆕
			
 
				+│   ├── Prompt_Test/              # 提示词测试 🆕
			
 
				+│   ├── RAG_Test/                 # RAG测试 🆕
			
 
				+│   ├── RE_Rrank_Test/            # 重排序测试 🆕
			
 
				+│   ├── Redis/                    # Redis哨兵测试 🆕
			
 
				+│   ├── Redis_Test/               # Redis测试
			
 
				+│   ├── Result_Visual_Observation_Tools/ # 结果可视化 🆕
			
 
				+│   ├── Semantic_Logic_Test/      # 语义逻辑测试
			
 
				+│   ├── Sensitive_Test/           # 敏感词测试
			
 
				+│   ├── standard_new_Test/        # 新标准测试 🆕
			
 
				+│   └── Sync_Funcation_Test/      # 同步函数测试
			
 
				+│
			
 
				+├── docker/                        # Docker配置
			
 
				+├── .design/                       # 设计文档
			
 
				+│   ├── 施工方案编写应用/           # 编写应用设计 🆕
			
 
				+│   └── 施工方案审查应用/           # 审查应用设计
			
 
				+│       ├── base_stage/           # 基础阶段设计
			
 
				+│       ├── Iterative_stage/      # 迭代阶段设计
			
 
				+│       ├── technical_rehearsal/  # 技术预演
			
 
				+│       ├── 基础架构设计/          # 架构设计
			
 
				+│       ├── 施工方案审查API架构设计/ # API架构设计
			
 
				+│       ├── 完整性审查模块/        # 完整性审查设计
			
 
				+│       └── 文档处理模块/          # 文档处理设计
			
 
				+│
			
 
				+├── .RaD/                          # 研发文档 🆕
			
 
				+│
			
 
				+├── logs/                          # 日志目录
			
 
				+├── README.md                      # 项目说明
			
 
				+├── README_deploy.md               # 部署说明 🆕
			
 
				+├── README_test.md                 # 测试说明 🆕
			
 
				+├── requirements.txt               # 依赖清单
			
 
				+├── gunicorn_config.py            # Gunicorn配置
			
 
				+├── run.sh                        # 启动脚本
			
 
				+└── Dockerfile                    # Docker镜像
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 技术栈清单
			
 
				+
			
 
				+### Web框架
			
 
				+- **FastAPI** - 现代化异步Web框架
			
 
				+- **Uvicorn** - ASGI服务器
			
 
				+
			
 
				+### AI框架
			
 
				+- **LangChain** - LLM应用开发框架
			
 
				+- **LangGraph** - 工作流编排引擎
			
 
				+
			
 
				+### 数据库
			
 
				+- **MySQL** - 关系型数据库 (主存储) （保留基础组件，暂未使用）
			
 
				+- **Milvus** - 向量数据库 (RAG检索)
			
 
				+- **PostgreSQL** - 备选关系型数据库（保留基础组件，暂未使用）
			
 
				+- **Neo4j** - 知识图谱数据库（保留基础组件，暂未使用）
			
 
				+
			
 
				+### 缓存/消息队列
			
 
				+- **Redis** - 缓存 + 分布式锁
			
 
				+- **Celery** - 异步任务队列
			
 
				+
			
 
				+### 模型支持
			
 
				+#### 本地模型
			
 
				+- **lq_qwen3_8b** - Qwen3-8B (默认模型)
			
 
				+- **lq_qwen3_4b** - Qwen3-4B
			
 
				+- **lq_qwen3_8b_emd** - Qwen3-8B Embedding
			
 
				+- **lq_rerank_model** - Qwen3-Reranker-8B
			
 
				+
			
 
				+#### 云端模型
			
 
				+- **豆包 (Doubao)** - 字节跳动模型
			
 
				+- **通义千问 (Qwen)** - 阿里云模型
			
 
				+- **DeepSeek** - DeepSeek模型
			
 
				+- **Gemini** - Google模型
			
 
				+- **SiliconFlow** - 硅基流动模型
			
 
				+
			
 
				+### OCR服务
			
 
				+- **MinerU** - 文档OCR识别 (集成中)
			
 
				+
			
 
				+
			
 
				+---
			
 
				+
			
 
				+
			
 
				+
			
 
				+## API接口清单
			
 
				+
			
 
				+### 施工方案审查API
			
 
				+
			
 
				+#### 文档上传
			
 
				+- `POST /construction/upload` - 上传待审查文档
			
 
				+
			
 
				+#### 审查控制
			
 
				+- `POST /sgsc/sse/launch_review` - 启动审查任务（SSE流式）
			
 
				+- `GET /construction/review/results` - 查询审查结果
			
 
				+
			
 
				+#### 任务控制
			
 
				+- `POST /construction/task/terminate` - 终止审查任务
			
 
				+
			
 
				+#### 数据脱敏
			
 
				+- 脱敏API接口 (desensitize_api.py)
			
 
				+
			
 
				+### 施工方案编写API 🆕
			
 
				+
			
 
				+#### 大纲生成
			
 
				+- 大纲生成与工作流接口 (outline_views.py)
			
 
				+
			
 
				+#### 内容编写
			
 
				+- 内容补全接口 (content_completion.py)
			
 
				+- 重新生成接口 (regenerate_views.py)
			
 
				+- 任务取消接口 (task_cancel_views.py)
			
 
				+
			
 
				+#### 方案推荐
			
 
				+- 相似方案推荐接口 (similar_plan_recommend.py)
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 数据流向图
			
 
				+
			
 
				+```
			
 
				+┌──────────────┐
			
 
				+│  用户上传文档  │
			
 
				+└──────┬───────┘
			
 
				+       ▼
			
 
				+┌──────────────────────────────────────────────────────────────┐
			
 
				+│  Views Layer (file_upload.py)                                 │
			
 
				+│  • 接收文件                                                    │
			
 
				+│  • 文件验证                                                    │
			
 
				+│  • 创建任务                                                    │
			
 
				+└──────┬───────────────────────────────────────────────────────┘
			
 
				+       ▼
			
 
				+┌──────────────────────────────────────────────────────────────┐
			
 
				+│  Core Layer - Document Workflow                              │
			
 
				+│  • 文档解析 (PDF/DOCX)                                        │
			
 
				+│  • 结构提取                                                   │
			
 
				+│  • 分块处理                                                   │
			
 
				+└──────┬───────────────────────────────────────────────────────┘
			
 
				+       ▼
			
 
				+┌──────────────────────────────────────────────────────────────┐
			
 
				+│  Database Layer                                              │
			
 
				+│  • Redis - 存储文档元数据                                      │
			
 
				+│  • Milvus - 向量存储                                          │
			
 
				+└──────┬───────────────────────────────────────────────────────┘
			
 
				+       ▼
			
 
				+┌──────────────────────────────────────────────────────────────┐
			
 
				+│  Core Layer - AI Review Workflow                             │
			
 
				+│  • AI审查引擎 (construction_review/ai_review_engine.py)                          │
			
 
				+│  • 多种审查器并行执行                                          │
			
 
				+│  • RAG检索增强                                                │
			
 
				+└──────┬───────────────────────────────────────────────────────┘
			
 
				+       ▼
			
 
				+┌──────────────────────────────────────────────────────────────┐
			
 
				+│  Foundation Layer - AI模块                                    │
			
 
				+│  • 模型调用 (model_handler.py)                               │
			
 
				+│  • RAG检索 (retrieval/)                                      │
			
 
				+│  • 查询重写                                                   │
			
 
				+└──────┬───────────────────────────────────────────────────────┘
			
 
				+       ▼
			
 
				+┌──────────────────────────────────────────────────────────────┐
			
 
				+│  External Services                                           │
			
 
				+│  • AI模型 (本地/云端)                                         │
			
 
				+│  • Milvus向量检索                                            │
			
 
				+│  • Redis缓存                                                 │
			
 
				+└──────┬───────────────────────────────────────────────────────┘
			
 
				+       ▼
			
 
				+┌──────────────────────────────────────────────────────────────┐
			
 
				+│  结果处理                                                     │
			
 
				+│  • 报告生成 (report_generator.py)                            │
			
 
				+│  • SSE推送进度                                                │
			
 
				+│  • 结果存储                                                   │
			
 
				+└──────┬───────────────────────────────────────────────────────┘
			
 
				+       ▼
			
 
				+┌──────────────────┐
			
 
				+│  返回审查结果     │
			
 
				+└──────────────────┘
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 各层职责说明
			
 
				+
			
 
				+### 1. Server Layer (服务器层)
			
 
				+**职责**: FastAPI应用管理、路由配置、Celery任务管理
			
 
				+**实现**:
			
 
				+- `RouteManager` - 路由配置和中间件
			
 
				+- `CeleryWorkerManager` - Celery Worker生命周期管理
			
 
				+- `ApplicationFactory` - 应用工厂模式
			
 
				+
			
 
				+### 2. Views Layer (视图层)
			
 
				+**职责**: HTTP API接口、请求处理、响应格式化
			
 
				+**实现**:
			
 
				+- **construction_review/** - 审查API (上传、审查启动、结果查询、任务控制、脱敏)
			
 
				+- **construction_write/** 🆕 - 编写API (大纲生成、内容补全、重新生成、方案推荐)
			
 
				+
			
 
				+### 3. Core Layer (核心业务层)
			
 
				+**职责**: 业务逻辑编排、应用状态管理、工作流协调
			
 
				+**实现**:
			
 
				+- **construction_review/** - 施工方案审查模块
			
 
				+  - **workflows/** - 基于LangGraph的工作流编排
			
 
				+  - **workflows/core_functions/** 🆕 - AI审查核心业务函数
			
 
				+  - **workflows/types/** 🆕 - 状态类型定义 (AIReviewState, TaskChainState)
			
 
				+  - **component/** - 可复用的业务组件
			
 
				+  - **component/desensitize/** 🆕 - 四维度数据脱敏 (PII/地理/业务/财务)
			
 
				+  - **component/minimal_pipeline/** 🆕 - 简化处理管道 (PDF提取/OCR/目录)
			
 
				+  - **component/standard_matching/** 🆕 - 标准规范匹配服务
			
 
				+  - **component/doc_worker/classification/** 🆕 - 智能内容分类 (分块/层级/本地)
			
 
				+  - **component/reviewers/** - 专门化的审查器集合
			
 
				+  - **component/reviewers/utils/llm_content_classifier_v2/** 🆕 - LLM驱动的内容分类器
			
 
				+  - **component/reviewers/utils/llm_chain_client/** 🆕 - 多厂商LLM链式调用框架
			
 
				+- **construction_write/** 🆕 - 施工方案编写模块
			
 
				+  - **component/** - 大纲生成器、状态模型
			
 
				+  - **workflows/** - 编写智能体、大纲工作流
			
 
				+
			
 
				+### 4. Foundation Layer (基础设施层-办公应用智能体可复用的基础组件)
			
 
				+**职责**: 基础设施、通用组件、外部服务集成
			
 
				+**实现**:
			
 
				+- **ai/** - AI模型管理、RAG检索
			
 
				+- **database/** - 多数据库支持
			
 
				+- **infrastructure/** - 缓存、配置、消息队列
			
 
				+- **observability/** - 日志、监控、指标
			
 
				+
			
 
				+### 5. Data Pipeline Layer (离线数据管道层，不与在线流程集成)
			
 
				+**职责**: 数据处理、格式转换、内容解析
			
 
				+**实现**: 文档解析器、数据处理器、向量入库
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 配置管理结构
			
 
				+
			
 
				+### config.ini 主要配置项
			
 
				+
			
 
				+```ini
			
 
				+见源代码中的config/config.ini
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 系统启动流程详解
			
 
				+
			
 
				+### 1. 应用启动入口 (server/app.py)
			
 
				+
			
 
				+#### 启动类架构
			
 
				+```
			
 
				+ApplicationFactory (应用工厂)
			
 
				+├── create_app()              # 创建FastAPI应用实例
			
 
				+├── create_server_config()    # 创建服务器配置
			
 
				+└── celery_manager           # CeleryWorkerManager实例
			
 
				+
			
 
				+RouteManager (路由管理器)
			
 
				+├── _setup_cors()            # 配置CORS中间件
			
 
				+├── _setup_routes()          # 配置所有路由
			
 
				+├── _setup_exception_handlers() # 全局异常处理
			
 
				+├── _setup_health_checks()   # 健康检查接口
			
 
				+└── _setup_api_docs()        # Swagger API文档
			
 
				+
			
 
				+CeleryWorkerManager (Celery Worker管理器)
			
 
				+├── start_worker()           # 启动Celery Worker（后台线程）
			
 
				+├── stop_worker()            # 优雅停止Worker
			
 
				+├── stop_worker_immediately() # 立即停止Worker
			
 
				+└── _cleanup_redis_tasks()   # 清理Redis任务
			
 
				+
			
 
				+ServerRunner (服务器运行器)
			
 
				+└── run_server()             # 运行Uvicorn服务器
			
 
				+```
			
 
				+
			
 
				+#### 启动流程
			
 
				+```
			
 
				+1. python server/app.py (主入口)
			
 
				+   ↓
			
 
				+2. 创建ApplicationFactory实例
			
 
				+   ↓
			
 
				+3. 初始化RouteManager配置路由
			
 
				+   - 添加CORS中间件
			
 
				+   - 注册所有API路由
			
 
				+   - 配置全局异常处理
			
 
				+   - 设置健康检查接口
			
 
				+   ↓
			
 
				+4. 启动CeleryWorkerManager（后台线程）
			
 
				+   - 清理Redis残留任务
			
 
				+   - 在独立线程中运行celery_app.worker_main(['worker'])
			
 
				+   - 等待2秒确保启动成功
			
 
				+   ↓
			
 
				+5. 配置信号处理器
			
 
				+   - SIGINT (Ctrl+C)
			
 
				+   - SIGTERM (终止信号)
			
 
				+   - Windows控制台事件（Ctrl_CLOSE_EVENT等）
			
 
				+   ↓
			
 
				+6. 启动Uvicorn服务器
			
 
				+   - host: 0.0.0.0 (可配置)
			
 
				+   - port: 8002 (可配置)
			
 
				+   - 加载FastAPI应用
			
 
				+   ↓
			
 
				+7. 服务运行中...
			
 
				+   ↓
			
 
				+8. 收到停止信号时
			
 
				+   - 停止Celery Worker
			
 
				+   - 清理Redis任务
			
 
				+   - 关闭事件循环
			
 
				+```
			
 
				+
			
 
				+#### 关键配置项
			
 
				+```ini
			
 
				+[launch]
			
 
				+HOST = 0.0.0.0              # 监听地址
			
 
				+LAUNCH_PORT = 8002          # 监听端口
			
 
				+
			
 
				+[redis]
			
 
				+REDIS_HOST=127.0.0.1        # Redis主机
			
 
				+REDIS_PORT=6379             # Redis端口
			
 
				+REDIS_DB=0                  # Redis数据库
			
 
				+REDIS_PASSWORD=123456       # Redis密码
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 任务提交流程详解
			
 
				+
			
 
				+### 1. 完整任务流程架构
			
 
				+
			
 
				+```
			
 
				+┌─────────────────────────────────────────────────────────────────────────────────┐
			
 
				+│                           任务提交与执行完整流程                                   │
			
 
				+└─────────────────────────────────────────────────────────────────────────────────┘
			
 
				+
			
 
				+┌─────────────────────────────────────────────────────────────────────────────────┐
			
 
				+│ 步骤1: 文件上传 (views/construction_review/file_upload.py)                      │
			
 
				+│  POST /construction/upload                                                      │
			
 
				+│  • 接收PDF文件上传                                                               │
			
 
				+│  • 生成file_id和callback_task_id                                                │
			
 
				+│  • 保存文件内容到Redis                                                           │
			
 
				+│  • 返回callback_task_id给前端                                                    │
			
 
				+└─────────────────────────────────────────────────────────────────────────────────┘
			
 
				+                                    ↓
			
 
				+┌─────────────────────────────────────────────────────────────────────────────────┐
			
 
				+│ 步骤2: 启动审查 (views/construction_review/launch_review.py)                    │
			
 
				+│  POST /sgsc/sse/launch_review                                                   │
			
 
				+│  • 验证用户权限和参数                                                            │
			
 
				+│  • 建立SSE连接                                                                   │
			
 
				+│  • 调用WorkflowManager.submit_task_processing()                                  │
			
 
				+└─────────────────────────────────────────────────────────────────────────────────┘
			
 
				+                                    ↓
			
 
				+┌─────────────────────────────────────────────────────────────────────────────────┐
			
 
				+│ 步骤3: 提交到Celery (core/base/workflow_manager.py)                             │
			
 
				+│  WorkflowManager.submit_task_processing()                                       │
			
 
				+│  • 使用CeleryTraceManager提交任务                                               │
			
 
				+│  • 自动传递trace_id用于链路追踪                                                  │
			
 
				+│  • 任务进入Redis队列                                                             │
			
 
				+└─────────────────────────────────────────────────────────────────────────────────┘
			
 
				+                                    ↓
			
 
				+┌─────────────────────────────────────────────────────────────────────────────────┐
			
 
				+│ 步骤4: Celery Worker执行 (foundation/infrastructure/messaging/tasks.py)        │
			
 
				+│  submit_task_processing_task()                                                  │
			
 
				+│  • 从队列获取任务                                                                │
			
 
				+│  • 恢复trace_id上下文                                                            │
			
 
				+│  • 调用WorkflowManager.submit_task_processing_sync()                             │
			
 
				+└─────────────────────────────────────────────────────────────────────────────────┘
			
 
				+                                    ↓
			
 
				+┌─────────────────────────────────────────────────────────────────────────────────┐
			
 
				+│ 步骤5: LangGraph任务链执行 (core/base/workflow_manager.py)                      │
			
 
				+│  submit_task_processing_sync()                                                  │
			
 
				+│  • 创建TaskFileInfo对象                                                          │
			
 
				+│  • 创建TaskChainState初始状态                                                    │
			
 
				+│  • 构建LangGraph任务链工作流图                                                   │
			
 
				+│  • 执行ainvoke()运行工作流                                                       │
			
 
				+└─────────────────────────────────────────────────────────────────────────────────┘
			
 
				+                                    ↓
			
 
				+┌─────────────────────────────────────────────────────────────────────────────────┐
			
 
				+│                    LangGraph任务链工作流（方案D）                                │
			
 
				+│  ┌─────────────────────────────────────────────────────────────────────────┐   │
			
 
				+│  │ start → document_processing → ai_review_subgraph → report_generation →  │   │
			
 
				+│  │        complete                                                          │   │
			
 
				+│  └─────────────────────────────────────────────────────────────────────────┘   │
			
 
				+│           每个阶段后检查终止信号：                                              │
			
 
				+│           - terminate: 进入终止节点                                            │
			
 
				+│           - error: 进入错误处理节点                                            │
			
 
				+│           - continue: 继续下一阶段                                             │
			
 
				+└─────────────────────────────────────────────────────────────────────────────────┘
			
 
				+```
			
 
				+
			
 
				+### 2. LangGraph任务链节点详解
			
 
				+
			
 
				+#### 节点1: start (任务链开始)
			
 
				+```python
			
 
				+async def _start_chain_node(state: TaskChainState) -> TaskChainState:
			
 
				+    """
			
 
				+    初始化任务链状态
			
 
				+    - current_stage: "start"
			
 
				+    - overall_task_status: "processing"
			
 
				+    - stage_status: 所有阶段初始化为"pending"
			
 
				+    """
			
 
				+```
			
 
				+
			
 
				+#### 节点2: document_processing (文档处理)
			
 
				+```python
			
 
				+async def _document_processing_node(state: TaskChainState) -> TaskChainState:
			
 
				+    """
			
 
				+    执行文档处理工作流
			
 
				+    - 检查终止信号
			
 
				+    - 创建DocumentWorkflow实例
			
 
				+    - 调用document_workflow.execute()
			
 
				+    - 返回结构化内容
			
 
				+    - stage_status["document"]: "completed"/"terminated"/"failed"
			
 
				+    """
			
 
				+```
			
 
				+
			
 
				+#### 节点3: ai_review_subgraph (AI审查)
			
 
				+```python
			
 
				+async def _ai_review_subgraph_node(state: TaskChainState) -> TaskChainState:
			
 
				+    """
			
 
				+    执行AI审查工作流（嵌套子图）
			
 
				+    - 检查终止信号
			
 
				+    - 获取文档处理结果中的structured_content
			
 
				+    - 创建AIReviewWorkflow实例
			
 
				+    - 调用ai_workflow.execute()
			
 
				+    - 返回审查结果
			
 
				+    - stage_status["ai_review"]: "completed"/"terminated"/"failed"
			
 
				+    """
			
 
				+```
			
 
				+
			
 
				+#### 节点4: report_generation (报告生成)
			
 
				+```python
			
 
				+async def _report_generation_node(state: TaskChainState) -> TaskChainState:
			
 
				+    """
			
 
				+    生成审查报告
			
 
				+    - 检查终止信号
			
 
				+    - 获取AI审查结果
			
 
				+    - 创建ReportWorkflow实例
			
 
				+    - 调用report_workflow.execute()
			
 
				+    - 保存完整结果到文件
			
 
				+    - stage_status["report"]: "completed"/"terminated"/"failed"
			
 
				+    """
			
 
				+```
			
 
				+
			
 
				+#### 节点5: complete (任务完成)
			
 
				+```python
			
 
				+async def _complete_chain_node(state: TaskChainState) -> TaskChainState:
			
 
				+    """
			
 
				+    标记整体任务完成
			
 
				+    - overall_task_status: "completed" ⚠️ 只有到这里才标记完成
			
 
				+    - 清理Redis文件缓存
			
 
				+    - 通知SSE连接
			
 
				+    """
			
 
				+```
			
 
				+
			
 
				+#### 节点6: error_handler (错误处理)
			
 
				+```python
			
 
				+async def _error_handler_chain_node(state: TaskChainState) -> TaskChainState:
			
 
				+    """
			
 
				+    处理任务链错误
			
 
				+    - overall_task_status: "failed"
			
 
				+    - 清理Redis文件缓存
			
 
				+    - 通知SSE连接失败状态
			
 
				+    """
			
 
				+```
			
 
				+
			
 
				+#### 节点7: terminate (任务终止)
			
 
				+```python
			
 
				+async def _terminate_chain_node(state: TaskChainState) -> TaskChainState:
			
 
				+    """
			
 
				+    处理任务终止
			
 
				+    - overall_task_status: "terminated"
			
 
				+    - 清理Redis终止信号
			
 
				+    - 清理Redis文件缓存
			
 
				+    - 通知SSE连接终止状态
			
 
				+    """
			
 
				+```
			
 
				+
			
 
				+### 3. 条件边判断逻辑
			
 
				+
			
 
				+```python
			
 
				+def _should_terminate_or_error_chain(state: TaskChainState) -> str:
			
 
				+    """
			
 
				+    决定工作流下一步走向
			
 
				+    优先级：terminate > error > continue
			
 
				+    """
			
 
				+    # 1. 优先检查终止信号
			
 
				+    if state.get("overall_task_status") == "terminated":
			
 
				+        return "terminate"
			
 
				+
			
 
				+    # 2. 检查错误状态
			
 
				+    if state.get("overall_task_status") == "failed" or state.get("error_message"):
			
 
				+        return "error"
			
 
				+
			
 
				+    # 3. 默认继续执行
			
 
				+    return "continue"
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 审查逻辑详解
			
 
				+
			
 
				+### 1. AI审查引擎架构 (core/construction_review/component/construction_review/ai_review_engine.py)
			
 
				+
			
 
				+```
			
 
				+AIReviewEngine (审查引擎核心)
			
 
				+├── 基础合规性检查
			
 
				+│   ├── check_grammar()              # 词句语法检查
			
 
				+│   ├── check_semantic_logic()       # 语义逻辑检查
			
 
				+│   ├── check_completeness()         # 完整性检查
			
 
				+│   ├── check_timeliness()           # 时效性检查
			
 
				+│   └── check_reference()            # 规范性检查
			
 
				+│
			
 
				+├── 技术性合规检查
			
 
				+│   ├── check_non_parameter_compliance()  # 非参数合规性检查
			
 
				+│   └── check_parameter_compliance()      # 参数合规性检查
			
 
				+│
			
 
				+├── RAG增强审查
			
 
				+│   ├── vector_search()              # 向量检索
			
 
				+│   ├── hybrid_search()              # 混合检索
			
 
				+│   ├── rerank_results()             # 重排序结果
			
 
				+│   └── generate_enhanced_suggestions() # 生成增强建议
			
 
				+│
			
 
				+└── 专业性审查
			
 
				+    ├── prep_basis_review()          # 编制依据审查
			
 
				+    ├── outline_review_results_df()  # 目录审查
			
 
				+    └── catalogues_check()           # 章节审查
			
 
				+```
			
 
				+
			
 
				+### 2. 审查配置与模式
			
 
				+
			
 
				+#### 审查配置参数
			
 
				+```python
			
 
				+# 方式1: review_config (审查维度枚举值)
			
 
				+review_config = [
			
 
				+    'sensitive_word_check',       # 词句语法检查
			
 
				+    'semantic_logic_check',       # 语义逻辑审查
			
 
				+    'completeness_check',         # 条文完整性审查
			
 
				+    'timeliness_check',           # 时效性审查
			
 
				+    'reference_check',            # 规范性审查
			
 
				+    'sensitive_check',            # 敏感词审查
			
 
				+    'non_parameter_compliance_check',  # 非参数合规性检查
			
 
				+    'parameter_compliance_check', # 参数合规性检查
			
 
				+]
			
 
				+
			
 
				+# 方式2: review_item_config (章节_审查维度格式)
			
 
				+review_item_config = [
			
 
				+    'basis_sensitive_word_check',     # 编制依据章节-词句语法检查
			
 
				+    'plan_semantic_logic_check',      # 施工计划章节-语义逻辑审查
			
 
				+    'catalogue_completeness_check',   # 目录章节-完整性检查（特殊规则）
			
 
				+]
			
 
				+```
			
 
				+
			
 
				+#### 审查模式配置
			
 
				+```ini
			
 
				+[ai_review]
			
 
				+MAX_REVIEW_UNITS=5          # 最大审查单元数量（0=全部审查）
			
 
				+REVIEW_MODE=all             # 审查模式: all/random/first
			
 
				+```
			
 
				+
			
 
				+#### 工程方案类型
			
 
				+```python
			
 
				+supported_types = {
			
 
				+    '01_pf_Found_Rotary_Drill',  # 旋挖钻机、冲击钻机成孔桩
			
 
				+    '02_pf_Dig_Manual_Pile',     # 人工挖孔桩
			
 
				+    '03_bd_Sub_Cyl_Pier',        # 圆柱墩、系梁、盖梁
			
 
				+    # ... 共13种工程方案类型
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+### 3. AI审查工作流 (core/construction_review/component/workflows/ai_review_workflow.py)
			
 
				+
			
 
				+```
			
 
				+AIReviewWorkflow (基于LangGraph的审查工作流)
			
 
				+├── 构建审查子图
			
 
				+│   ├── 添加审查节点（每种审查类型一个节点）
			
 
				+│   ├── 设置节点间转换关系
			
 
				+│   └── 编译为可执行图
			
 
				+│
			
 
				+├── 审查节点类型
			
 
				+│   ├── 单元级审查节点
			
 
				+│   │   ├── 文档分块
			
 
				+│   │   ├── 并发执行各类审查器
			
 
				+│   │   ├── 汇总审查结果
			
 
				+│   │   └── 计算风险等级
			
 
				+│   │
			
 
				+│   └── 章节级审查节点
			
 
				+│       ├── 提取章节内容
			
 
				+│       ├── 执行章节特定审查
			
 
				+│       └── 生成章节审查结果
			
 
				+│
			
 
				+└── 审查器集合 (reviewers/)
			
 
				+    ├── base_reviewer.py         # 基础审查器
			
 
				+    ├── check_completeness/      # 完整性检查组件
			
 
				+    ├── catalogues_check/        # 目录审查组件
			
 
				+    └── utils/                   # 审查工具函数
			
 
				+```
			
 
				+
			
 
				+### 4. RAG检索增强流程
			
 
				+
			
 
				+```
			
 
				+RAG检索增强
			
 
				+│
			
 
				+├── 1. 查询重写 (query_rewrite_manager)
			
 
				+│   └── 优化原始查询以提高检索质量
			
 
				+│
			
 
				+├── 2. 实体增强 (entity_enhance)
			
 
				+│   └── 识别文档实体并增强查询
			
 
				+│
			
 
				+├── 3. 向量检索 (MilvusManager)
			
 
				+│   ├── vector_search()         # 纯向量检索
			
 
				+│   └── hybrid_search()         # 混合检索（向量+关键词）
			
 
				+│
			
 
				+├── 4. 父块召回 (enhance_with_parent_docs_grouped)
			
 
				+│   └── 返回父块上下文信息
			
 
				+│
			
 
				+└── 5. 重排序 (rerank_results)
			
 
				+    └── 使用Rerank模型优化结果排序
			
 
				+```
			
 
				+
			
 
				+### 5. 审查结果数据结构
			
 
				+
			
 
				+```python
			
 
				+{
			
 
				+    "callback_task_id": "file_id-timestamp",
			
 
				+    "file_id": "original_file_id",
			
 
				+    "file_name": "document.pdf",
			
 
				+    "user_id": "user-001",
			
 
				+    "overall_task_status": "completed",  # processing/completed/failed/terminated
			
 
				+    "stage_status": {
			
 
				+        "document": "completed",
			
 
				+        "ai_review": "completed",
			
 
				+        "report": "completed"
			
 
				+    },
			
 
				+    "document_result": {
			
 
				+        "structured_content": {...},
			
 
				+        "parsed_sections": [...]
			
 
				+    },
			
 
				+    "ai_review_result": {
			
 
				+        "review_results": [
			
 
				+            {
			
 
				+                "unit_index": 0,
			
 
				+                "unit_content": {...},
			
 
				+                "review_items": [
			
 
				+                    {
			
 
				+                        "check_type": "semantic_logic_check",
			
 
				+                        "risk_level": "high",
			
 
				+                        "issues": [...],
			
 
				+                        "suggestions": [...]
			
 
				+                    }
			
 
				+                ]
			
 
				+            }
			
 
				+        ],
			
 
				+        "summary": {...}
			
 
				+    },
			
 
				+    "report_result": {
			
 
				+        "report_path": "/path/to/report.json",
			
 
				+        "summary": "审查报告摘要"
			
 
				+    },
			
 
				+    "timestamp": "2026-02-03T12:00:00"
			
 
				+}
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## SSE实时推送机制
			
 
				+
			
 
				+### SSE连接管理 (core/base/sse_manager.py)
			
 
				+
			
 
				+```
			
 
				+unified_sse_manager (统一SSE管理器)
			
 
				+├── establish_connection()     # 建立SSE连接并注册回调
			
 
				+├── send_progress()            # 发送进度更新
			
 
				+├── close_connection()         # 关闭SSE连接
			
 
				+└── connection_registry        # 连接注册表 {callback_task_id: queue}
			
 
				+```
			
 
				+
			
 
				+### SSE事件类型
			
 
				+
			
 
				+```python
			
 
				+# 连接事件
			
 
				+"connected"        # SSE连接已建立
			
 
				+"connection_closed" # SSE连接已关闭
			
 
				+
			
 
				+# 进度事件
			
 
				+"processing"       # 处理中（通用进度更新）
			
 
				+"unit_review_update" # 单元审查更新
			
 
				+"processing_flag"  # 处理标志
			
 
				+
			
 
				+# 完成事件
			
 
				+"submitted"        # 任务已提交
			
 
				+"completed"        # 任务已完成
			
 
				+
			
 
				+# 错误事件
			
 
				+"error"            # 发生错误
			
 
				+```
			
 
				+
			
 
				+### 进度推送流程
			
 
				+
			
 
				+```
			
 
				+1. 前端建立SSE连接
			
 
				+   POST /sgsc/sse/launch_review
			
 
				+   ↓
			
 
				+2. 后端建立SSE连接并注册回调
			
 
				+   unified_sse_manager.establish_connection(callback_task_id, sse_progress_callback)
			
 
				+   ↓
			
 
				+3. 返回"connected"事件
			
 
				+   yield format_sse_event("connected", connected_data)
			
 
				+   ↓
			
 
				+4. 提交任务到Celery
			
 
				+   workflow_manager.submit_task_processing(file_info)
			
 
				+   ↓
			
 
				+5. Celery Worker执行任务
			
 
				+   ↓
			
 
				+6. 各阶段更新进度
			
 
				+   progress_manager.update_progress(callback_task_id, stage_data)
			
 
				+   ↓
			
 
				+7. SSE回调被触发
			
 
				+   sse_progress_callback(callback_task_id, current_data)
			
 
				+   ↓
			
 
				+8. 发送SSE事件到前端
			
 
				+   unified_sse_manager.send_progress(callback_task_id, current_data)
			
 
				+   ↓
			
 
				+9. 前端接收SSE事件并更新UI
			
 
				+   ↓
			
 
				+10. 任务完成，发送"completed"事件
			
 
				+    yield format_sse_event("completed", completion_data)
			
 
				+    ↓
			
 
				+11. 关闭SSE连接
			
 
				+    unified_sse_manager.close_connection(callback_task_id)
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 任务终止机制
			
 
				+
			
 
				+### 终止信号设置
			
 
				+
			
 
				+```python
			
 
				+async def set_terminate_signal(callback_task_id: str, operator: str) -> Dict:
			
 
				+    """
			
 
				+    设置任务终止信号
			
 
				+    - 写入Redis: ai_review:terminate_signal:{callback_task_id}
			
 
				+    - 存储操作人和终止时间
			
 
				+    - 设置2小时过期时间
			
 
				+    """
			
 
				+```
			
 
				+
			
 
				+### 终止信号检测
			
 
				+
			
 
				+```python
			
 
				+async def check_terminate_signal(callback_task_id: str) -> bool:
			
 
				+    """
			
 
				+    检查是否有终止信号
			
 
				+    - 从Redis读取终止信号
			
 
				+    - 每个工作流节点执行前调用
			
 
				+    - 检测到信号后进入终止流程
			
 
				+    """
			
 
				+```
			
 
				+
			
 
				+### 终止流程
			
 
				+
			
 
				+```
			
 
				+1. 用户调用终止接口
			
 
				+   POST /construction/task/terminate
			
 
				+   ↓
			
 
				+2. 设置Redis终止信号
			
 
				+   set_terminate_signal(callback_task_id, operator)
			
 
				+   ↓
			
 
				+3. 工作流节点检测到信号
			
 
				+   check_terminate_signal() returns True
			
 
				+   ↓
			
 
				+4. 条件边判断返回"terminate"
			
 
				+   _should_terminate_or_error_chain() returns "terminate"
			
 
				+   ↓
			
 
				+5. 进入terminate节点
			
 
				+   _terminate_chain_node()
			
 
				+   ↓
			
 
				+6. 清理资源
			
 
				+   - 清理Redis终止信号
			
 
				+   - 清理Redis文件缓存
			
 
				+   - 通知SSE连接
			
 
				+   ↓
			
 
				+7. 返回终止状态
			
 
				+   overall_task_status: "terminated"
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## 部署架构
			
 
				+
			
 
				+### 开发环境
			
 
				+```
			
 
				+启动命令: python server/app.py
			
 
				+
			
 
				+启动流程:
			
 
				+1. 加载配置文件 (config/config.ini)
			
 
				+2. 初始化Redis连接
			
 
				+3. 启动Celery Worker（后台线程）
			
 
				+4. 启动Uvicorn服务器（端口8002）
			
 
				+5. 注册信号处理器（优雅关闭）
			
 
				+
			
 
				+依赖服务:
			
 
				+- Redis (localhost:6379)
			
 
				+- Milvus (向量数据库)
			
 
				+- 本地AI模型服务 (192.168.91.253:9002)
			
 
				+```
			
 
				+
			
 
				+### 生产环境
			
 
				+```
			
 
				+部署方式: Docker + Docker Compose
			
 
				+
			
 
				+启动命令: docker-compose up -d
			
 
				+
			
 
				+服务组件:
			
 
				+- web: FastAPI应用 (Uvicorn)
			
 
				+- celery: Celery Worker
			
 
				+- redis: Redis缓存
			
 
				+- milvus: Milvus向量数据库
			
 
				+```
			
 
				+
			
 
				+### 容器化部署
			
 
				+```
			
 
				+docker-compose up -d
			
 
				+```
			
 
				+
			
 
				+---
			
 
				+
			
 
				+## v2.0 → v3.0 变更记录 (2026-04-09)
			
 
				+
			
 
				+### 新增模块
			
 
				+
			
 
				+| 模块 | 路径 | 说明 |
			
 
				+|------|------|------|
			
 
				+| 施工方案编写 | `core/construction_write/` | 大纲生成、内容编写工作流 |
			
 
				+| 编写API | `views/construction_write/` | 大纲/补全/重生成/取消/推荐接口 |
			
 
				+| 数据脱敏 | `component/desensitize/` | 四维度脱敏（PII/地理/业务/财务）+ 反向映射 |
			
 
				+| 简化管道 | `component/minimal_pipeline/` | PDF提取、OCR（解耦）、目录构建/检测 |
			
 
				+| 标准匹配 | `component/standard_matching/` | 标准规范DAO与服务层 |
			
 
				+| 智能分类 | `doc_worker/classification/` | 分块/层级/本地三种分类策略 |
			
 
				+| LLM分类器v2 | `reviewers/utils/llm_content_classifier_v2/` | LLM驱动的内容分类，含补充验证机制 |
			
 
				+| LLM链式客户端 | `reviewers/utils/llm_chain_client/` | 多厂商（DeepSeek/豆包/Gemini/Qwen）链式调用框架 |
			
 
				+| 核心函数 | `workflows/core_functions/` | AI审查核心业务逻辑抽离 |
			
 
				+| 状态类型 | `workflows/types/` | AIReviewState、TaskChainState 定义 |
			
 
				+
			
 
				+### 新增审查器
			
 
				+
			
 
				+| 审查器 | 说明 |
			
 
				+|--------|------|
			
 
				+| `standard_timeliness_reviewer.py` | 标准时效性审查 |
			
 
				+| `timeliness_content_reviewer.py` | 时效性内容审查 |
			
 
				+| `sensitive_word_check.py` | 敏感词检查 |
			
 
				+
			
 
				+### 新增基础设施
			
 
				+
			
 
				+| 组件 | 说明 |
			
 
				+|------|------|
			
 
				+| `foundation/observability/cachefiles/` | 缓存文件管理 |
			
 
				+| `foundation/observability/monitoring/rag/` | RAG管道监控 |
			
 
				+| `foundation/ai/models/model_config_loader.py` | 模型配置加载器 |
			
 
				+| `foundation/utils/md5.py` | MD5工具 |
			
 
				+| `config/model_setting.yaml` | 模型设置配置 |
			
 
				+| `config/yolo/` | YOLO模型权重 |
			
 
				+
			
 
				+### 架构变更要点
			
 
				+
			
 
				+1. **审查引擎解耦**: `ai_review_engine.py` 核心逻辑部分抽离至 `core_functions/ai_review_core_fun.py`
			
 
				+2. **OCR模块独立**: `minimal_pipeline/ocr_processor.py` 从PDF提取中解耦为独立模块
			
 
				+3. **分类体系升级**: 从简单分类升级为 `llm_content_classifier_v2`，支持 keywords 关键字扫描 + LLM补充验证
			
 
				+4. **多模型支持**: `llm_chain_client` 框架统一 DeepSeek/豆包/Gemini/Qwen 多厂商调用
			
 
				+5. **脱敏-审查闭环**: `desensitize/` 模块提供正向脱敏 + `remapper.py` 反向映射，保证审查结果可还原
			
 
				+
			
 
				+---
			
--- a/config/config.ini.template
+++ b/config/config.ini.template
@@ -50,11 +50,11 @@ HOST = 0.0.0.0
 
				 LAUNCH_PORT = 8002
			
 
				 
			
 
				 [redis]
			
 
				-REDIS_URL=redis://:123456@127.0.0.1:6379
			
 
				-REDIS_HOST=127.0.0.1
			
 
				+REDIS_URL=redis://:Wxcz666@@lqRedis_dev:6379
			
 
				+REDIS_HOST=lqRedis_dev
			
 
				 REDIS_PORT=6379
			
 
				 REDIS_DB=0
			
 
				-REDIS_PASSWORD=123456
			
 
				+REDIS_PASSWORD=Wxcz666@
			
 
				 REDIS_MAX_CONNECTIONS=50
			
 
				 
			
 
				 [ocr]
			
--- a/core/construction_review/component/ai_review_engine.py
+++ b/core/construction_review/component/ai_review_engine.py
@@ -756,12 +756,7 @@ class AIReviewEngine(BaseReviewer):
 
				                 risk_level, risk_level_en = _level_risk.get(level, ("中风险", "medium"))
			
 
				                 issue_point = rec.get('issue_point', '')
			
 
				                 location = rec.get('location', '')
			
 
				-                # 三级缺失：将 location 中的标准分类名替换为文档实际章节名，信息更直观
			
 
				-                if level == '三级' and chapter_name and ' > ' in location:
			
 
				-                    sec_part = location.split(' > ', 1)[1]
			
 
				-                    new_location = f"{chapter_name} > {sec_part}"
			
 
				-                    issue_point = issue_point.replace(location, new_location, 1)
			
 
				-                    location = new_location
			
 
				+                # location 已从 completeness_reviewer 获取实际章节名，无需额外处理
			
 
				                 # 按顺序构建响应字段（first_seq -> second_seq -> third_seq 相邻）
			
 
				                 response_item = {
			
 
				                     "check_item": "completeness_check",
			
@@ -901,14 +896,16 @@ class AIReviewEngine(BaseReviewer):
 
				         logger.info(f"[{name}] 开始LLM目录完整性检查")
			
 
				 
			
 
				         try:
			
 
				-            # 获取 catalog 的标准格式文本
			
 
				+            # 获取 catalog 的标准格式文本和目录页页码
			
 
				             formatted_text = ""
			
 
				+            toc_page_range = None
			
 
				 
			
 
				-            # 优先从 catalog.formatted_text 获取
			
 
				+            # 优先从 catalog 获取
			
 
				             if outline_data and isinstance(outline_data, dict):
			
 
				                 catalog_raw = outline_data.get('catalog')
			
 
				                 if catalog_raw and isinstance(catalog_raw, dict):
			
 
				                     formatted_text = catalog_raw.get('formatted_text', '')
			
 
				+                    toc_page_range = catalog_raw.get('toc_page_range')
			
 
				 
			
 
				             # 回退到从 state 获取
			
 
				             if not formatted_text and state and isinstance(state, dict):
			
@@ -916,6 +913,7 @@ class AIReviewEngine(BaseReviewer):
 
				                 catalog_raw = structured.get('catalog')
			
 
				                 if catalog_raw and isinstance(catalog_raw, dict):
			
 
				                     formatted_text = catalog_raw.get('formatted_text', '')
			
 
				+                    toc_page_range = catalog_raw.get('toc_page_range')
			
 
				 
			
 
				             # 如果没有标准格式，从 chapters 构建
			
 
				             if not formatted_text:
			
@@ -976,7 +974,7 @@ class AIReviewEngine(BaseReviewer):
 
				 
			
 
				             # 使用 CatalogReviewer 进行审查
			
 
				             reviewer = CatalogReviewer()
			
 
				-            result = await reviewer.review(formatted_text, trace_id_idx)
			
 
				+            result = await reviewer.review(formatted_text, trace_id_idx, toc_page_range)
			
 
				 
			
 
				             logger.info(f"[DEBUG][{name}] 检查完成，返回结果")
			
 
				             logger.info(f"[DEBUG][{name}] result type: {type(result)}")
			
--- a/core/construction_review/component/doc_worker/config/StandardCategoryTable.csv
+++ b/core/construction_review/component/doc_worker/config/StandardCategoryTable.csv
@@ -20,10 +20,10 @@ first_seq,first_code,first_name,second_seq,second_code,second_name,second_focus,
 
				 2,overview,工程概况,5,RequirementsTech,施工要求和技术保证条件,名称类、日期类。名称类、量化单位类、数值类。,2,QualityTarget,质量目标,质量目标、合同条款编号、业主具体要求（如绿色施工认证）。,质量目标;合格率;质量标准;鲁班奖;优质工程;质量等级,
			
 
				 2,overview,工程概况,5,RequirementsTech,施工要求和技术保证条件,名称类、日期类。名称类、量化单位类、数值类。,3,SecurityGoals,安全目标,安全目标（如零死亡事故、隐患整改率）、合同条款编号、业主具体要求（如绿色施工认证）。,安全目标;零伤亡;安全事故;安全指标;安全生产目标,
			
 
				 2,overview,工程概况,5,RequirementsTech,施工要求和技术保证条件,名称类、日期类。名称类、量化单位类、数值类。,4,EnvironmentalGoals,环境目标,环境目标（如扬尘控制、噪声限值）或业主具体要求（如绿色施工认证）。,环境目标;扬尘控制;噪声限值;绿色施工指标;文明施工目标,
			
 
				-2,overview,工程概况,6,RiskLevel,风险辨识与分级,危害隐患性词汇类、法规名称类、标准编号类。风险等级相关专业性词汇、属于、标准编号或其它编号、部门名称类、数值类、量化单位类。名称类、数值类。,1,DangerSource,危险源,"第一优先级（引用识别）： 若文本中出现如“见表XX”、“见附件XX”、“相关表格放置于第十章（或某章）”等明确指向外部表格或附件的表述，直接视为满足当前审查要求。
			
 
				-第二优先级（要素审查）： 若文本中没有指向外部的引用，请审查正文是否同时包含了以下核心要素：列出具体的危险源。",危险源;风险源;危害因素;安全隐患;事故隐患;危险因素;风险点,
			
 
				-2,overview,工程概况,6,RiskLevel,风险辨识与分级,危害隐患性词汇类、法规名称类、标准编号类。风险等级相关专业性词汇、属于、标准编号或其它编号、部门名称类、数值类、量化单位类。名称类、数值类。,2,ClassificationAndResponseMeasures,分级与应对措施,"第一优先级（引用识别）： 若文本中出现如“见表XX”、“见附件XX”、“相关表格放置于第十章（或某章）”等明确指向外部表格或附件的表述，直接视为满足当前审查要求。
			
 
				-第二优先级（要素审查）： 若文本中没有指向外部的引用，请审查正文是否同时包含了以下核心要素：① 对危险源进行分级；；② 明确对应的应对措施。",风险等级;重大风险;较大风险;一般风险;应对措施;LEC;风险分级;风险评估,
			
 
				+2,overview,工程概况,6,RiskLevel,风险辨识与分级,危害隐患性词汇类、法规名称类、标准编号类。风险等级相关专业性词汇、属于、标准编号或其它编号、部门名称类、数值类、量化单位类。名称类、数值类。,1,DangerSource,危险源,"第一优先级（引用识别）： 若文本中出现如“见表XX”、“见附件XX”、“相关表格放置于第十章（或某章）”等明确指向外部表格或附件的表述，直接视为满足当前审查要求，需同时将其分类为危险源。
			
 
				+第二优先级（要素审查）： 若文本中没有指向外部的引用，请审查正文是否同时包含了以下核心要素：列出具体的危险源。",详见;风险辨识与分级;危险源;风险源;危害因素;安全隐患;事故隐患;危险因素;风险点,
			
 
				+2,overview,工程概况,6,RiskLevel,风险辨识与分级,危害隐患性词汇类、法规名称类、标准编号类。风险等级相关专业性词汇、属于、标准编号或其它编号、部门名称类、数值类、量化单位类。名称类、数值类。,2,ClassificationAndResponseMeasures,分级与应对措施,"第一优先级（引用识别）： 若文本中出现如“见表XX”、“见附件XX”、“相关表格放置于第十章（或某章）”等明确指向外部表格或附件的表述，直接视为满足当前审查要求。需同时将其分类为分级与应对措施
			
 
				+第二优先级（要素审查）： 若文本中没有指向外部的引用，请审查正文是否同时包含了以下核心要素：① 对危险源进行分级；；② 明确对应的应对措施。",详见;风险辨识与分级;风险等级;重大风险;较大风险;一般风险;应对措施;LEC;风险分级;风险评估,
			
 
				 2,overview,工程概况,7,Stakeholders,参建各方责任主体单位,名称类、数值类。,1,UnitType,单位类型,"参建各方责任主体单位主要描述该项目的建设单位、设计单位、监理单位、施
			
 
				 工单位、监控单位、专业分包单位的名称。",建设单位;设计单位;监理单位;施工单位;参建单位;总承包;社会信用代码,
			
 
				 3,plan,施工计划,1,Schedule,施工进度计划,关键工程节点安排、施工进度计划横道图、进度控制点、里程碑事件、工序搭接关系、工期延误风险、进度调整机制、施工流水节拍、网络计划技术（如双代号网络图）,1,KeyProjectNodeArrangement,关键工程（工序）节点安排,主要工程（工序）节点的起止时间和持续时间、聚焦影响总工期的关键工序（如基础浇筑、主体封顶）、是进度控制的核心；,关键节点;里程碑;关键工序;主要节点;节点工期;关键线路,
			
--- a/core/construction_review/component/minimal_pipeline/catalog_reviewer.py
+++ b/core/construction_review/component/minimal_pipeline/catalog_reviewer.py
@@ -32,6 +32,7 @@ class CatalogReviewer:
 
				         "check_result": {
			
 
				           "issue_point": "【一级缺失】第四章 施工工艺技术",
			
 
				           "location": "目录页",
			
 
				+          "page": 3,
			
 
				           "suggestion": "建议补充'第四章 施工工艺技术'章节",
			
 
				           "reason": "目录页缺少该章节",
			
 
				           "risk_level": "高风险"
			
@@ -46,6 +47,7 @@ class CatalogReviewer:
 
				         "check_result": {
			
 
				           "issue_point": "【一级缺失】第十章 其他资料",
			
 
				           "location": "目录页",
			
 
				+          "page": 3,
			
 
				           "suggestion": "建议补充'第十章 其他资料'章节",
			
 
				           "reason": "目录页缺少该章节",
			
 
				           "risk_level": "高风险"
			
@@ -60,6 +62,7 @@ class CatalogReviewer:
 
				         "check_result": {
			
 
				           "issue_point": "【二级缺失】第一章 编制依据 - 四、编制原则",
			
 
				           "location": "第一章",
			
 
				+          "page": 3,
			
 
				           "suggestion": "建议补充'四、编制原则'",
			
 
				           "reason": "第一章缺少该二级目录",
			
 
				           "risk_level": "中风险"
			
@@ -157,13 +160,15 @@ class CatalogReviewer:
 
				 三、附图附表
			
 
				 四、编制及审核人员情况"""
			
 
				 
			
 
				-    async def review(self, actual_catalog_text: str, trace_id_idx: str = "") -> Dict[str, Any]:
			
 
				+    async def review(self, actual_catalog_text: str, trace_id_idx: str = "",
			
 
				+                      toc_page_range: Dict[str, int] = None) -> Dict[str, Any]:
			
 
				         """
			
 
				         审查目录完整性
			
 
				 
			
 
				         Args:
			
 
				             actual_catalog_text: 实际目录文本（标准格式）
			
 
				             trace_id_idx: 追踪ID索引
			
 
				+            toc_page_range: 目录页页码范围，如 {"start": 3, "end": 4}
			
 
				 
			
 
				         Returns:
			
 
				             对齐 completeness_check 格式的结果字典
			
@@ -174,7 +179,7 @@ class CatalogReviewer:
 
				         try:
			
 
				             from foundation.ai.agent.generate.model_generate import generate_model_client
			
 
				 
			
 
				-            prompt = self._build_prompt(actual_catalog_text)
			
 
				+            prompt = self._build_prompt(actual_catalog_text, toc_page_range)
			
 
				 
			
 
				             # 重试机制：最多3次
			
 
				             max_retries = 3
			
@@ -251,10 +256,21 @@ class CatalogReviewer:
 
				                 "execution_time": execution_time
			
 
				             }
			
 
				 
			
 
				-    def _build_prompt(self, actual_catalog_text: str) -> str:
			
 
				+    def _build_prompt(self, actual_catalog_text: str,
			
 
				+                       toc_page_range: Dict[str, int] = None) -> str:
			
 
				         """构建审查Prompt"""
			
 
				         json_example = self._JSON_EXAMPLE_TEMPLATE
			
 
				 
			
 
				+        # 构建页码信息说明
			
 
				+        page_info = ""
			
 
				+        if toc_page_range:
			
 
				+            start_page = toc_page_range.get('start', 3)
			
 
				+            end_page = toc_page_range.get('end', 3)
			
 
				+            if start_page == end_page:
			
 
				+                page_info = f"目录页位于第 {start_page} 页"
			
 
				+            else:
			
 
				+                page_info = f"目录页位于第 {start_page}-{end_page} 页"
			
 
				+
			
 
				         # 基础 JSON 模板（使用单引号字符串避免 f-string 转义问题）
			
 
				         base_template = '''{
			
 
				   "details": {
			
@@ -267,6 +283,7 @@ class CatalogReviewer:
 
				         "check_result": {
			
 
				           "issue_point": "【一级缺失】xxx",
			
 
				           "location": "目录页",
			
 
				+          "page": 3,
			
 
				           "suggestion": "建议补充'xxx'章节",
			
 
				           "reason": "简要说明",
			
 
				           "risk_level": "高风险"
			
@@ -281,6 +298,29 @@ class CatalogReviewer:
 
				   "success": true
			
 
				 }'''
			
 
				 
			
 
				+        page_instruction = f"""
			
 
				+## 页码信息
			
 
				+{page_info if page_info else "目录页页码未知，统一使用 page=3"}
			
 
				+
			
 
				+## 输出格式要求
			
 
				+check_result 中必须包含以下字段：
			
 
				+- issue_point: 问题描述
			
 
				+- location: 问题定位（一级缺失填"目录页"，二级缺失填对应的一级章节名）
			
 
				+- page: 页码数字（{toc_page_range.get('start', 3) if toc_page_range else 3}）
			
 
				+- suggestion: 补充建议
			
 
				+- reason: 原因说明
			
 
				+- risk_level: 风险等级（"高风险"或"中风险"）
			
 
				+""" if toc_page_range else """
			
 
				+## 输出格式要求
			
 
				+check_result 中必须包含以下字段：
			
 
				+- issue_point: 问题描述
			
 
				+- location: 问题定位（一级缺失填"目录页"，二级缺失填对应的一级章节名）
			
 
				+- page: 页码数字（统一使用 3）
			
 
				+- suggestion: 补充建议
			
 
				+- reason: 原因说明
			
 
				+- risk_level: 风险等级（"高风险"或"中风险"）
			
 
				+"""
			
 
				+
			
 
				         return f"""你是一位施工方案文档审查专家。请对比【实际目录】和【标准目录】，找出缺失项。
			
 
				 
			
 
				 ## 审查原则
			
@@ -329,6 +369,8 @@ class CatalogReviewer:
 
				 - 一级缺失：risk_level 为 "高风险", risk_info.risk_level 为 "high"
			
 
				 - 二级缺失：risk_level 为 "中风险", risk_info.risk_level 为 "medium"
			
 
				 - 如无缺失，response 中放一条 "issue_point": "【目录完整】一二级目录结构完整", "exist_issue": false
			
 
				+
			
 
				+{page_instruction}
			
 
				 """
			
 
				 
			
 
				     def _extract_json(self, content: str) -> Optional[Dict[str, Any]]:
			
--- a/core/construction_review/component/minimal_pipeline/ocr_processor.py
+++ b/core/construction_review/component/minimal_pipeline/ocr_processor.py
@@ -0,0 +1,458 @@
 
				+"""
			
 
				+OCR 处理模块 - 表格检测与识别
			
 
				+
			
 
				+提供 PDF 表格区域检测和 OCR 识别功能，支持：
			
 
				+- RapidLayout 表格区域检测
			
 
				+- GLM-OCR 并发识别
			
 
				+- 表格文本替换回填
			
 
				+"""
			
 
				+
			
 
				+import base64
			
 
				+import io
			
 
				+import time
			
 
				+from concurrent.futures import ThreadPoolExecutor, as_completed
			
 
				+from dataclasses import dataclass
			
 
				+from typing import Dict, Any, List, Optional, Tuple, Set
			
 
				+
			
 
				+import fitz
			
 
				+import numpy as np
			
 
				+import requests
			
 
				+
			
 
				+from foundation.observability.logger.loggering import review_logger as logger
			
 
				+
			
 
				+# 尝试导入 RapidLayout
			
 
				+try:
			
 
				+    from rapid_layout import RapidLayout
			
 
				+    RAPID_LAYOUT_AVAILABLE = True
			
 
				+except ImportError:
			
 
				+    RAPID_LAYOUT_AVAILABLE = False
			
 
				+    RapidLayout = None
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class TableRegion:
			
 
				+    """表格区域信息"""
			
 
				+    page_num: int
			
 
				+    page: fitz.Page
			
 
				+    bbox: Tuple[float, float, float, float]
			
 
				+    score: float
			
 
				+
			
 
				+
			
 
				+@dataclass
			
 
				+class OcrResult:
			
 
				+    """OCR 结果"""
			
 
				+    page_num: int
			
 
				+    bbox: Tuple[float, float, float, float]
			
 
				+    score: float
			
 
				+    text: str
			
 
				+    success: bool
			
 
				+
			
 
				+
			
 
				+class OcrProcessor:
			
 
				+    """OCR 处理器：表格检测与识别"""
			
 
				+
			
 
				+    # 默认配置
			
 
				+    MAX_SHORT_EDGE = 1024
			
 
				+    JPEG_QUALITY = 90
			
 
				+    OCR_DPI = 200
			
 
				+    OCR_CONFIDENCE_THRESHOLD = 0.5
			
 
				+    OCR_CONCURRENT_WORKERS = 5
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        ocr_api_url: str = "http://183.220.37.46:25429/v1/chat/completions",
			
 
				+        ocr_timeout: int = 600,
			
 
				+        ocr_api_key: str = "",
			
 
				+        max_short_edge: int = 1024,
			
 
				+        jpeg_quality: int = 90,
			
 
				+        ocr_dpi: int = 200,
			
 
				+        confidence_threshold: float = 0.5,
			
 
				+        concurrent_workers: int = 5,
			
 
				+    ):
			
 
				+        """
			
 
				+        初始化 OCR 处理器
			
 
				+
			
 
				+        Args:
			
 
				+            ocr_api_url: OCR API 地址
			
 
				+            ocr_timeout: OCR 请求超时时间（秒）
			
 
				+            ocr_api_key: OCR API 密钥
			
 
				+            max_short_edge: 图片压缩后短边最大尺寸
			
 
				+            jpeg_quality: JPEG 压缩质量
			
 
				+            ocr_dpi: OCR 渲染 DPI
			
 
				+            confidence_threshold: 表格检测置信度阈值
			
 
				+            concurrent_workers: OCR 并发工作线程数
			
 
				+        """
			
 
				+        self.ocr_api_url = ocr_api_url
			
 
				+        self.ocr_timeout = ocr_timeout
			
 
				+        self.ocr_api_key = ocr_api_key
			
 
				+        self.max_short_edge = max_short_edge
			
 
				+        self.jpeg_quality = jpeg_quality
			
 
				+        self.ocr_dpi = ocr_dpi
			
 
				+        self.confidence_threshold = confidence_threshold
			
 
				+        self.concurrent_workers = concurrent_workers
			
 
				+
			
 
				+        self._layout_engine: Optional[Any] = None
			
 
				+
			
 
				+        if not RAPID_LAYOUT_AVAILABLE:
			
 
				+            logger.warning("RapidLayout 未安装，表格检测功能不可用")
			
 
				+
			
 
				+    def is_available(self) -> bool:
			
 
				+        """检查 OCR 功能是否可用"""
			
 
				+        return RAPID_LAYOUT_AVAILABLE
			
 
				+
			
 
				+    def _get_layout_engine(self) -> Optional[Any]:
			
 
				+        """延迟初始化 RapidLayout"""
			
 
				+        if self._layout_engine is None and RAPID_LAYOUT_AVAILABLE:
			
 
				+            self._layout_engine = RapidLayout()
			
 
				+        return self._layout_engine
			
 
				+
			
 
				+    def detect_table_regions(
			
 
				+        self,
			
 
				+        page: fitz.Page,
			
 
				+        page_num: int,
			
 
				+        clip_box: fitz.Rect
			
 
				+    ) -> List[Tuple[Tuple[float, float, float, float], float]]:
			
 
				+        """
			
 
				+        检测页面中的表格区域
			
 
				+
			
 
				+        Args:
			
 
				+            page: PDF 页面对象
			
 
				+            page_num: 页码（用于日志）
			
 
				+            clip_box: 裁剪区域
			
 
				+
			
 
				+        Returns:
			
 
				+            列表，元素为 ((x1, y1, x2, y2), score)
			
 
				+        """
			
 
				+        table_regions: List[Tuple[Tuple[float, float, float, float], float]] = []
			
 
				+
			
 
				+        if not RAPID_LAYOUT_AVAILABLE:
			
 
				+            return table_regions
			
 
				+
			
 
				+        layout_engine = self._get_layout_engine()
			
 
				+        if layout_engine is None:
			
 
				+            return table_regions
			
 
				+
			
 
				+        # 渲染页面（裁剪区域）
			
 
				+        pix = page.get_pixmap(dpi=self.ocr_dpi, clip=clip_box)
			
 
				+        img = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, 3)
			
 
				+
			
 
				+        try:
			
 
				+            layout_output = layout_engine(img)
			
 
				+
			
 
				+            # 解析版面结果
			
 
				+            if hasattr(layout_output, 'boxes') and hasattr(layout_output, 'class_names'):
			
 
				+                # 获取缩放比例
			
 
				+                scale_x = clip_box.width / img.shape[1]
			
 
				+                scale_y = clip_box.height / img.shape[0]
			
 
				+
			
 
				+                for box, label, score in zip(layout_output.boxes, layout_output.class_names, layout_output.scores):
			
 
				+                    if label == "table" and score > self.confidence_threshold:
			
 
				+                        # 转换为 PDF 坐标
			
 
				+                        pdf_x1 = clip_box.x0 + box[0] * scale_x
			
 
				+                        pdf_y1 = clip_box.y0 + box[1] * scale_y
			
 
				+                        pdf_x2 = clip_box.x0 + box[2] * scale_x
			
 
				+                        pdf_y2 = clip_box.y0 + box[3] * scale_y
			
 
				+
			
 
				+                        table_regions.append(((pdf_x1, pdf_y1, pdf_x2, pdf_y2), score))
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            logger.warning(f"  第 {page_num} 页: 版面分析失败 ({e})")
			
 
				+
			
 
				+        return table_regions
			
 
				+
			
 
				+    def process_ocr_concurrent(
			
 
				+        self,
			
 
				+        regions: List[TableRegion],
			
 
				+        progress_callback=None
			
 
				+    ) -> List[OcrResult]:
			
 
				+        """
			
 
				+        同步并发处理 OCR
			
 
				+
			
 
				+        Args:
			
 
				+            regions: 表格区域列表
			
 
				+            progress_callback: 进度回调函数，接收 (completed, total) 参数
			
 
				+
			
 
				+        Returns:
			
 
				+            OCR 结果列表
			
 
				+        """
			
 
				+        results: List[OcrResult] = []
			
 
				+        total = len(regions)
			
 
				+        completed = 0
			
 
				+
			
 
				+        with ThreadPoolExecutor(max_workers=self.concurrent_workers) as executor:
			
 
				+            # 提交所有任务
			
 
				+            future_to_region = {
			
 
				+                executor.submit(self._ocr_table_region, r.page, r.bbox): r
			
 
				+                for r in regions
			
 
				+            }
			
 
				+
			
 
				+            # 处理完成的结果
			
 
				+            for future in as_completed(future_to_region):
			
 
				+                region = future_to_region[future]
			
 
				+                completed += 1
			
 
				+                try:
			
 
				+                    text = future.result()
			
 
				+                    results.append(OcrResult(
			
 
				+                        page_num=region.page_num,
			
 
				+                        bbox=region.bbox,
			
 
				+                        score=region.score,
			
 
				+                        text=text,
			
 
				+                        success=True,
			
 
				+                    ))
			
 
				+                except Exception as e:
			
 
				+                    logger.error(f"  第 {region.page_num} 页表格 OCR 失败: {e}")
			
 
				+                    results.append(OcrResult(
			
 
				+                        page_num=region.page_num,
			
 
				+                        bbox=region.bbox,
			
 
				+                        score=region.score,
			
 
				+                        text="",
			
 
				+                        success=False,
			
 
				+                    ))
			
 
				+
			
 
				+                # 每完成5个或最后一个时推送进度
			
 
				+                if progress_callback and (completed % 5 == 0 or completed == total):
			
 
				+                    progress_callback(completed, total)
			
 
				+
			
 
				+        return results
			
 
				+
			
 
				+    def _ocr_table_region(
			
 
				+        self,
			
 
				+        page: fitz.Page,
			
 
				+        bbox: Tuple[float, float, float, float],
			
 
				+        max_retries: int = 3
			
 
				+    ) -> str:
			
 
				+        """
			
 
				+        对指定区域进行 OCR 识别（使用 GLM-OCR），支持指数退避重试
			
 
				+
			
 
				+        Args:
			
 
				+            page: PDF 页面对象
			
 
				+            bbox: 区域坐标 (x1, y1, x2, y2)
			
 
				+            max_retries: 最大重试次数
			
 
				+
			
 
				+        Returns:
			
 
				+            识别的文本内容
			
 
				+        """
			
 
				+        # 渲染指定区域
			
 
				+        rect = fitz.Rect(bbox)
			
 
				+        pix = page.get_pixmap(dpi=self.ocr_dpi, clip=rect)
			
 
				+        img_bytes = pix.tobytes("jpeg")
			
 
				+
			
 
				+        # 压缩图片
			
 
				+        compressed = self._compress_image(img_bytes)
			
 
				+        img_base64 = base64.b64encode(compressed).decode('utf-8')
			
 
				+
			
 
				+        # 请求 OCR
			
 
				+        payload = {
			
 
				+            "model": "GLM-OCR",
			
 
				+            "messages": [
			
 
				+                {
			
 
				+                    "role": "user",
			
 
				+                    "content": [
			
 
				+                        {
			
 
				+                            "type": "text",
			
 
				+                            "text": "识别图片中的表格内容，按原文排版输出。"
			
 
				+                                    "注意："
			
 
				+                                    "1. 表格用 Markdown 表格格式"
			
 
				+                                    "2. 保持换行和列对齐"
			
 
				+                                    "3. 只输出表格内容，不要其他说明"
			
 
				+                        },
			
 
				+                        {
			
 
				+                            "type": "image_url",
			
 
				+                            "image_url": {"url": f"data:image/jpeg;base64,{img_base64}"}
			
 
				+                        }
			
 
				+                    ]
			
 
				+                }
			
 
				+            ],
			
 
				+            "max_tokens": 2048,
			
 
				+            "temperature": 0.1
			
 
				+        }
			
 
				+
			
 
				+        headers = {"Content-Type": "application/json"}
			
 
				+        if self.ocr_api_key:
			
 
				+            headers["Authorization"] = f"Bearer {self.ocr_api_key}"
			
 
				+
			
 
				+        # 指数退避重试
			
 
				+        last_error = None
			
 
				+        for attempt in range(max_retries):
			
 
				+            try:
			
 
				+                response = requests.post(
			
 
				+                    self.ocr_api_url,
			
 
				+                    headers=headers,
			
 
				+                    json=payload,
			
 
				+                    timeout=self.ocr_timeout
			
 
				+                )
			
 
				+                response.raise_for_status()
			
 
				+
			
 
				+                result = response.json()
			
 
				+                return self._extract_ocr_content(result)
			
 
				+
			
 
				+            except Exception as e:
			
 
				+                last_error = e
			
 
				+                if attempt < max_retries - 1:
			
 
				+                    # 指数退避: 2, 4, 8 秒
			
 
				+                    wait_time = 2 ** (attempt + 1)
			
 
				+                    logger.warning(f"  第 {page.number + 1} 页表格 OCR 第 {attempt + 1} 次失败: {e}, {wait_time}秒后重试...")
			
 
				+                    time.sleep(wait_time)
			
 
				+                else:
			
 
				+                    logger.error(f"  第 {page.number + 1} 页表格 OCR 最终失败（已重试{max_retries}次）: {e}")
			
 
				+
			
 
				+        # 所有重试都失败，抛出最后一个错误
			
 
				+        raise last_error
			
 
				+
			
 
				+    def _compress_image(self, img_bytes: bytes) -> bytes:
			
 
				+        """
			
 
				+        压缩图片
			
 
				+
			
 
				+        Args:
			
 
				+            img_bytes: 原始图片字节
			
 
				+
			
 
				+        Returns:
			
 
				+            压缩后的图片字节
			
 
				+        """
			
 
				+        try:
			
 
				+            from PIL import Image
			
 
				+            img = Image.open(io.BytesIO(img_bytes))
			
 
				+
			
 
				+            if img.mode in ('RGBA', 'LA', 'P'):
			
 
				+                background = Image.new('RGB', img.size, (255, 255, 255))
			
 
				+                if img.mode == 'P':
			
 
				+                    img = img.convert('RGBA')
			
 
				+                if img.mode in ('RGBA', 'LA'):
			
 
				+                    background.paste(img, mask=img.split()[-1])
			
 
				+                img = background
			
 
				+            elif img.mode != 'RGB':
			
 
				+                img = img.convert('RGB')
			
 
				+
			
 
				+            min_edge = min(img.size)
			
 
				+            if min_edge > self.max_short_edge:
			
 
				+                ratio = self.max_short_edge / min_edge
			
 
				+                new_size = (int(img.width * ratio), int(img.height * ratio))
			
 
				+                img = img.resize(new_size, Image.Resampling.LANCZOS)
			
 
				+
			
 
				+            buffer = io.BytesIO()
			
 
				+            img.save(buffer, format='JPEG', quality=self.jpeg_quality, optimize=True)
			
 
				+            return buffer.getvalue()
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            logger.warning(f"图片压缩失败，使用原图: {e}")
			
 
				+            return img_bytes
			
 
				+
			
 
				+    def _extract_ocr_content(self, result: Dict) -> str:
			
 
				+        """
			
 
				+        从 OCR 响应提取内容，并将 HTML 表格转换为 Markdown
			
 
				+
			
 
				+        Args:
			
 
				+            result: OCR API 响应
			
 
				+
			
 
				+        Returns:
			
 
				+            提取的文本内容
			
 
				+        """
			
 
				+        content = ""
			
 
				+        if "choices" in result and isinstance(result["choices"], list):
			
 
				+            if len(result["choices"]) > 0:
			
 
				+                message = result["choices"][0].get("message", {})
			
 
				+                content = message.get("content", "")
			
 
				+
			
 
				+        # 如果内容包含 HTML 标签，转换为 Markdown
			
 
				+        if content and "<" in content and ">" in content:
			
 
				+            try:
			
 
				+                from ..doc_worker.pdf_worker.html_to_markdown import convert_html_to_markdown
			
 
				+                content = convert_html_to_markdown(content)
			
 
				+            except Exception as e:
			
 
				+                logger.debug(f"HTML 转 Markdown 失败，保留原始内容: {e}")
			
 
				+
			
 
				+        return content
			
 
				+
			
 
				+    def replace_table_regions(
			
 
				+        self,
			
 
				+        page: fitz.Page,
			
 
				+        original_text: str,
			
 
				+        ocr_results: List[Dict],
			
 
				+        clip_box: fitz.Rect
			
 
				+    ) -> str:
			
 
				+        """
			
 
				+        用 OCR 结果替换原始文本中的表格区域
			
 
				+
			
 
				+        Args:
			
 
				+            page: PDF 页面对象
			
 
				+            original_text: 原始文本
			
 
				+            ocr_results: OCR 结果列表，每个元素包含 region_index, bbox, score, ocr_text
			
 
				+            clip_box: 裁剪区域
			
 
				+
			
 
				+        Returns:
			
 
				+            替换后的文本
			
 
				+        """
			
 
				+        if not ocr_results:
			
 
				+            return original_text
			
 
				+
			
 
				+        # 获取页面上的文本块及其坐标
			
 
				+        text_blocks = []
			
 
				+        for block in page.get_text("blocks"):
			
 
				+            x0, y0, x1, y1, text, _, _ = block
			
 
				+            # 只考虑裁剪区域内的文本
			
 
				+            if y0 >= clip_box.y0 and y1 <= clip_box.y1:
			
 
				+                text_blocks.append({
			
 
				+                    "bbox": (x0, y0, x1, y1),
			
 
				+                    "text": text.strip(),
			
 
				+                })
			
 
				+
			
 
				+        # 按 Y 坐标排序
			
 
				+        text_blocks.sort(key=lambda b: (b["bbox"][1], b["bbox"][0]))
			
 
				+
			
 
				+        # 找出属于表格区域的文本块
			
 
				+        replaced_indices: Set[int] = set()
			
 
				+        for ocr_result in ocr_results:
			
 
				+            bbox = ocr_result["bbox"]
			
 
				+            rx0, ry0, rx1, ry1 = bbox
			
 
				+
			
 
				+            for idx, block in enumerate(text_blocks):
			
 
				+                if idx in replaced_indices:
			
 
				+                    continue
			
 
				+                bx0, by0, bx1, by1 = block["bbox"]
			
 
				+
			
 
				+                # 检查重叠
			
 
				+                overlap_x = max(0, min(bx1, rx1) - max(bx0, rx0))
			
 
				+                overlap_y = max(0, min(by1, ry1) - max(by0, ry0))
			
 
				+                overlap_area = overlap_x * overlap_y
			
 
				+                block_area = (bx1 - bx0) * (by1 - by0)
			
 
				+
			
 
				+                if block_area > 0 and overlap_area / block_area > 0.5:
			
 
				+                    replaced_indices.add(idx)
			
 
				+
			
 
				+        # 构建新文本
			
 
				+        result_parts: List[str] = []
			
 
				+        last_idx = 0
			
 
				+
			
 
				+        for ocr_result in sorted(ocr_results, key=lambda r: r["bbox"][1]):
			
 
				+            bbox = ocr_result["bbox"]
			
 
				+            rx0, ry0, rx1, ry1 = bbox
			
 
				+
			
 
				+            # 找到该表格区域之前的文本
			
 
				+            region_start_idx = None
			
 
				+            for idx, block in enumerate(text_blocks):
			
 
				+                if idx in replaced_indices:
			
 
				+                    bx0, by0, bx1, by1 = block["bbox"]
			
 
				+                    if (bx0 >= rx0 - 5 and bx1 <= rx1 + 5 and
			
 
				+                        by0 >= ry0 - 5 and by1 <= ry1 + 5):
			
 
				+                        if region_start_idx is None:
			
 
				+                            region_start_idx = idx
			
 
				+                        last_idx = idx + 1
			
 
				+
			
 
				+            if region_start_idx is not None:
			
 
				+                # 添加表格前的非表格文本
			
 
				+                for idx in range(last_idx - (last_idx - region_start_idx), region_start_idx):
			
 
				+                    if idx not in replaced_indices and idx < len(text_blocks):
			
 
				+                        result_parts.append(text_blocks[idx]["text"])
			
 
				+                        result_parts.append("\n")
			
 
				+
			
 
				+                # 添加 OCR 结果
			
 
				+                result_parts.append(ocr_result["ocr_text"])
			
 
				+                result_parts.append("\n")
			
 
				+
			
 
				+        # 添加剩余文本
			
 
				+        for idx in range(last_idx, len(text_blocks)):
			
 
				+            if idx not in replaced_indices:
			
 
				+                result_parts.append(text_blocks[idx]["text"])
			
 
				+                result_parts.append("\n")
			
 
				+
			
 
				+        return "".join(result_parts).strip() or original_text
			
--- a/core/construction_review/component/minimal_pipeline/pdf_extractor.py
+++ b/core/construction_review/component/minimal_pipeline/pdf_extractor.py
--- a/core/construction_review/component/minimal_pipeline/pdf_extractor3.py
+++ b/core/construction_review/component/minimal_pipeline/pdf_extractor3.py
@@ -0,0 +1,481 @@
 
				+"""
			
 
				+PDF 结构提取器 - 同步并发 OCR 版本
			
 
				+
			
 
				+基于 splitter_pdf 逻辑，直接提取章节结构并记录页码。
			
 
				+支持 OCR 增强：检测表格区域并使用 ThreadPoolExecutor 5并发 OCR，其他文本保持 PyMuPDF 提取。
			
 
				+输出格式兼容后续分类与组装流程。
			
 
				+"""
			
 
				+
			
 
				+import re
			
 
				+from typing import Dict, Any, List, Optional, Tuple
			
 
				+
			
 
				+import fitz
			
 
				+
			
 
				+from foundation.observability.logger.loggering import review_logger as logger
			
 
				+
			
 
				+from .ocr_processor import OcrProcessor, TableRegion, OcrResult
			
 
				+
			
 
				+# 尝试导入 RapidLayout
			
 
				+try:
			
 
				+    from rapid_layout import RapidLayout
			
 
				+    RAPID_LAYOUT_AVAILABLE = True
			
 
				+except ImportError:
			
 
				+    RAPID_LAYOUT_AVAILABLE = False
			
 
				+    RapidLayout = None
			
 
				+
			
 
				+
			
 
				+class PdfStructureExtractor:
			
 
				+    """PDF 章节结构提取器（支持 OCR 异步并发）"""
			
 
				+
			
 
				+    CHAPTER_PATTERN = re.compile(r'^第[一二三四五六七八九十百]+章\s*.*')
			
 
				+    SECTION_PATTERN = re.compile(r'^[一二三四五六七八九十百]+、\s*.*')
			
 
				+    TOC_PATTERN = re.compile(r"\.{3,}|…{2,}")
			
 
				+
			
 
				+    def __init__(
			
 
				+        self,
			
 
				+        clip_top: float = 60,
			
 
				+        clip_bottom: float = 60,
			
 
				+        use_ocr: bool = False,
			
 
				+        ocr_api_url: str = "http://183.220.37.46:25429/v1/chat/completions",
			
 
				+        ocr_timeout: int = 600,
			
 
				+        ocr_api_key: str = "",
			
 
				+        detect_toc: bool = True,
			
 
				+        toc_model_path: str = "config/yolo/best.pt",
			
 
				+    ):
			
 
				+        self.clip_top = clip_top
			
 
				+        self.clip_bottom = clip_bottom
			
 
				+        self.use_ocr = use_ocr and RAPID_LAYOUT_AVAILABLE
			
 
				+
			
 
				+        # 初始化 OCR 处理器
			
 
				+        self._ocr_processor = OcrProcessor(
			
 
				+            ocr_api_url=ocr_api_url,
			
 
				+            ocr_timeout=ocr_timeout,
			
 
				+            ocr_api_key=ocr_api_key,
			
 
				+        ) if self.use_ocr else None
			
 
				+
			
 
				+        # 目录检测配置
			
 
				+        self.detect_toc = detect_toc
			
 
				+        self.toc_model_path = toc_model_path
			
 
				+        self._toc_extractor = None
			
 
				+
			
 
				+        if use_ocr and not RAPID_LAYOUT_AVAILABLE:
			
 
				+            logger.warning("RapidLayout 未安装，OCR 功能不可用")
			
 
				+
			
 
				+    def extract(self, file_content: bytes, progress_callback=None) -> Dict[str, Any]:
			
 
				+        """
			
 
				+        从 PDF 字节流提取章节结构。
			
 
				+
			
 
				+        Args:
			
 
				+            file_content: PDF 文件字节流
			
 
				+            progress_callback: 进度回调函数，接收 (stage, current, message) 参数
			
 
				+
			
 
				+        Returns:
			
 
				+            {
			
 
				+                "chapters": {
			
 
				+                    "第一章 xxx": {
			
 
				+                        "章节标题": {"content": "...", "page_start": 1, "page_end": 1},
			
 
				+                        "一、xxx": {"content": "...", "page_start": 2, "page_end": 3},
			
 
				+                    }
			
 
				+                },
			
 
				+                "total_pages": N,
			
 
				+                "catalog": {  # 目录结构（YOLO检测+OCR提取）
			
 
				+                    "chapters": [...],
			
 
				+                    "total_chapters": N
			
 
				+                }
			
 
				+            }
			
 
				+        """
			
 
				+        result = {"chapters": {}, "total_pages": 0, "catalog": None}
			
 
				+
			
 
				+        # === 阶段0: 目录页检测与提取（如果启用）===
			
 
				+        if self.detect_toc:
			
 
				+            try:
			
 
				+                catalog = self._extract_catalog(file_content, progress_callback)
			
 
				+                if catalog:
			
 
				+                    result["catalog"] = catalog
			
 
				+                    logger.info(f"[PDF提取] 目录提取完成: {catalog.get('total_chapters', 0)} 章")
			
 
				+            except Exception as e:
			
 
				+                logger.warning(f"[PDF提取] 目录提取失败: {e}")
			
 
				+
			
 
				+        # === 阶段1-3: 文档结构提取 ===
			
 
				+        doc = fitz.open(stream=file_content)
			
 
				+        try:
			
 
				+            structure = self._extract_from_doc(doc, progress_callback)
			
 
				+            result["chapters"] = structure.get("chapters", {})
			
 
				+            result["total_pages"] = len(doc)
			
 
				+            return result
			
 
				+        finally:
			
 
				+            doc.close()
			
 
				+
			
 
				+    def _extract_catalog(self, file_content: bytes, progress_callback=None) -> Optional[Dict[str, Any]]:
			
 
				+        """
			
 
				+        提取目录结构（YOLO检测 + OCR识别）
			
 
				+
			
 
				+        Returns:
			
 
				+            {"chapters": [...], "total_chapters": N} 或 None
			
 
				+        """
			
 
				+        # 延迟导入避免循环依赖（YOLO依赖必须存在，否则报错）
			
 
				+        from .toc_detector import TOCCatalogExtractor
			
 
				+
			
 
				+        if self._toc_extractor is None:
			
 
				+            # 使用 OCR 处理器的配置（如果已初始化）
			
 
				+            ocr_config = {}
			
 
				+            if self._ocr_processor:
			
 
				+                ocr_config = {
			
 
				+                    "ocr_api_url": self._ocr_processor.ocr_api_url,
			
 
				+                    "ocr_api_key": self._ocr_processor.ocr_api_key,
			
 
				+                    "ocr_timeout": self._ocr_processor.ocr_timeout,
			
 
				+                }
			
 
				+            self._toc_extractor = TOCCatalogExtractor(
			
 
				+                model_path=self.toc_model_path,
			
 
				+                **ocr_config
			
 
				+            )
			
 
				+
			
 
				+        return self._toc_extractor.detect_and_extract(file_content, progress_callback)
			
 
				+
			
 
				+    def _extract_from_doc(self, doc: fitz.Document, progress_callback=None) -> Dict[str, Any]:
			
 
				+        """
			
 
				+        提取文档结构（支持 OCR 异步并发）- 带坐标的精准回填方案。
			
 
				+
			
 
				+        流程：
			
 
				+        1. 提取带坐标的文本块
			
 
				+        2. 章节标题匹配 + 块归属划分
			
 
				+        3. 扫描表格区域并 OCR
			
 
				+        4. 根据表格坐标，将其作为新的块插入到对应小节
			
 
				+        5. 将每个小节的块列表按顺序拼接成纯文本输出
			
 
				+        """
			
 
				+
			
 
				+        def _emit_progress(stage: str, current: int, message: str):
			
 
				+            """发送进度回调"""
			
 
				+            if progress_callback:
			
 
				+                try:
			
 
				+                    progress_callback(stage, current, message)
			
 
				+                except Exception:
			
 
				+                    pass
			
 
				+
			
 
				+        total_pages = len(doc)
			
 
				+
			
 
				+        # ==================== 阶段1: 提取带坐标的文本块并归属到章节/小节====================
			
 
				+        logger.info("[阶段1] 提取带坐标的文本块并归属章节...")
			
 
				+
			
 
				+        # 数据结构: {(chapter_name, section_name): [blocks_with_position]}
			
 
				+        chapter_blocks: Dict[Tuple[str, str], List[Dict[str, Any]]] = {}
			
 
				+        current_chapter = "未分类前言"
			
 
				+        current_section = "默认部分"
			
 
				+        in_body = False
			
 
				+
			
 
				+        for page_num in range(total_pages):
			
 
				+            page = doc.load_page(page_num)
			
 
				+            rect = page.rect
			
 
				+            clip_box = fitz.Rect(0, self.clip_top, rect.width, rect.height - self.clip_bottom)
			
 
				+
			
 
				+            # 获取带坐标的文本块
			
 
				+            blocks = self._extract_text_blocks_with_position(page, clip_box)
			
 
				+
			
 
				+            for block in blocks:
			
 
				+                line = block["text"]
			
 
				+
			
 
				+                # 跳过空行和页眉页脚
			
 
				+                if not line.strip():
			
 
				+                    continue
			
 
				+                if self._is_header_footer(line):
			
 
				+                    continue
			
 
				+
			
 
				+                # 跳过目录阶段
			
 
				+                if not in_body:
			
 
				+                    if self.CHAPTER_PATTERN.match(line) and not self.TOC_PATTERN.search(line):
			
 
				+                        in_body = True
			
 
				+                    else:
			
 
				+                        continue
			
 
				+
			
 
				+                # 跳过残余目录格式
			
 
				+                if self.TOC_PATTERN.search(line):
			
 
				+                    continue
			
 
				+
			
 
				+                # 匹配章标题
			
 
				+                if self.CHAPTER_PATTERN.match(line):
			
 
				+                    current_chapter = self._clean_chapter_title(line)
			
 
				+                    current_section = "章节标题"
			
 
				+                    key = (current_chapter, current_section)
			
 
				+                    if key not in chapter_blocks:
			
 
				+                        chapter_blocks[key] = []
			
 
				+                    chapter_blocks[key].append(block)
			
 
				+                    continue
			
 
				+
			
 
				+                # 匹配节标题
			
 
				+                if self.SECTION_PATTERN.match(line):
			
 
				+                    current_section = line
			
 
				+                    key = (current_chapter, current_section)
			
 
				+                    if key not in chapter_blocks:
			
 
				+                        chapter_blocks[key] = []
			
 
				+                    chapter_blocks[key].append(block)
			
 
				+                    continue
			
 
				+
			
 
				+                # 普通内容块
			
 
				+                key = (current_chapter, current_section)
			
 
				+                if key not in chapter_blocks:
			
 
				+                    chapter_blocks[key] = []
			
 
				+                chapter_blocks[key].append(block)
			
 
				+
			
 
				+        logger.info(f"[阶段1] 章节结构提取完成，共 {len({k[0] for k in chapter_blocks})} 个章节")
			
 
				+
			
 
				+        # ==================== 阶段2: 收集表格区域并OCR（如果启用OCR）====================
			
 
				+        table_regions: List[TableRegion] = []
			
 
				+        ocr_results: List[OcrResult] = []
			
 
				+
			
 
				+        if self.use_ocr and self._ocr_processor:
			
 
				+            logger.info("[阶段2] 扫描表格区域...")
			
 
				+            for page_num in range(total_pages):
			
 
				+                page = doc.load_page(page_num)
			
 
				+                rect = page.rect
			
 
				+                clip_box = fitz.Rect(0, self.clip_top, rect.width, rect.height - self.clip_bottom)
			
 
				+                regions = self._ocr_processor.detect_table_regions(page, page_num + 1, clip_box)
			
 
				+                for bbox, score in regions:
			
 
				+                    table_regions.append(TableRegion(
			
 
				+                        page_num=page_num + 1,
			
 
				+                        page=page,
			
 
				+                        bbox=bbox,
			
 
				+                        score=score
			
 
				+                    ))
			
 
				+                # 每5页推送进度
			
 
				+                if (page_num + 1) % 5 == 0 or page_num == total_pages - 1:
			
 
				+                    progress = int((page_num + 1) / total_pages * 30)
			
 
				+                    _emit_progress("版面分析", progress, f"扫描页面 {page_num + 1}/{total_pages}")
			
 
				+
			
 
				+            logger.info(f"[阶段2] 发现 {len(table_regions)} 个表格区域")
			
 
				+
			
 
				+            # 执行OCR
			
 
				+            if table_regions:
			
 
				+                _emit_progress("版面分析", 35, f"发现 {len(table_regions)} 个表格，开始OCR识别...")
			
 
				+                ocr_results = self._ocr_processor.process_ocr_concurrent(
			
 
				+                    table_regions,
			
 
				+                    progress_callback=lambda completed, total: _emit_progress(
			
 
				+                        "版面分析", 35 + int(completed / total * 15), f"OCR识别中 {completed}/{total}"
			
 
				+                    )
			
 
				+                )
			
 
				+                success_count = sum(1 for r in ocr_results if r.success)
			
 
				+                logger.info(f"[阶段2] OCR完成 {success_count}/{len(table_regions)}")
			
 
				+                _emit_progress("版面分析", 50, f"OCR识别完成 {success_count}/{len(table_regions)}")
			
 
				+
			
 
				+        # ==================== 阶段3: 将OCR结果作为新块插入到对应章节====================
			
 
				+        if ocr_results:
			
 
				+            logger.info("[阶段3] 将OCR结果回填到对应章节...")
			
 
				+            self._insert_ocr_blocks_into_chapters(chapter_blocks, ocr_results)
			
 
				+
			
 
				+        # ==================== 阶段4: 生成最终输出（块列表转纯文本）====================
			
 
				+        logger.info("[阶段4] 生成最终文本输出...")
			
 
				+        result: Dict[str, Any] = {"chapters": {}}
			
 
				+
			
 
				+        for (chap_name, sec_name), blocks in chapter_blocks.items():
			
 
				+            if chap_name not in result["chapters"]:
			
 
				+                result["chapters"][chap_name] = {}
			
 
				+
			
 
				+            # 按页码和Y坐标排序块
			
 
				+            blocks.sort(key=lambda b: (b["page"], b["bbox"][1]))
			
 
				+
			
 
				+            # 拼接文本
			
 
				+            lines = []
			
 
				+            page_start = blocks[0]["page"] if blocks else 1
			
 
				+            page_end = blocks[-1]["page"] if blocks else 1
			
 
				+
			
 
				+            for block in blocks:
			
 
				+                if block.get("type") == "table":
			
 
				+                    lines.append(f"\n[表格OCR识别结果]:\n{block['text']}\n[/表格]\n")
			
 
				+                else:
			
 
				+                    lines.append(block["text"])
			
 
				+
			
 
				+            result["chapters"][chap_name][sec_name] = {
			
 
				+                "content": "\n".join(lines),
			
 
				+                "page_start": page_start,
			
 
				+                "page_end": page_end,
			
 
				+            }
			
 
				+
			
 
				+        logger.info(f"[PdfExtractor] 提取完成，共 {len(result['chapters'])} 个章节")
			
 
				+        return result
			
 
				+
			
 
				+    def _extract_text_blocks_with_position(
			
 
				+        self,
			
 
				+        page: fitz.Page,
			
 
				+        clip_box: fitz.Rect
			
 
				+    ) -> List[Dict[str, Any]]:
			
 
				+        """
			
 
				+        提取带坐标的文本块列表。
			
 
				+
			
 
				+        使用 page.get_text("dict") 获取每个文本块的精确边界框和文本内容。
			
 
				+        """
			
 
				+        blocks = []
			
 
				+        page_dict = page.get_text("dict", clip=clip_box)
			
 
				+
			
 
				+        for block in page_dict.get("blocks", []):
			
 
				+            if block.get("type") == 0:  # 文本块
			
 
				+                bbox = block["bbox"]
			
 
				+                y_center = (bbox[1] + bbox[3]) / 2
			
 
				+
			
 
				+                # 拼接块内所有文本
			
 
				+                text_lines = []
			
 
				+                for line in block.get("lines", []):
			
 
				+                    line_text = ""
			
 
				+                    for span in line.get("spans", []):
			
 
				+                        line_text += span.get("text", "")
			
 
				+                    if line_text.strip():
			
 
				+                        text_lines.append(line_text)
			
 
				+
			
 
				+                if text_lines:
			
 
				+                    blocks.append({
			
 
				+                        "text": "\n".join(text_lines),
			
 
				+                        "page": page.number + 1,
			
 
				+                        "bbox": bbox,
			
 
				+                        "y_center": y_center,
			
 
				+                        "type": "text"
			
 
				+                    })
			
 
				+
			
 
				+        # 按阅读顺序排序（Y坐标为主，X坐标为辅）
			
 
				+        blocks.sort(key=lambda b: (b["page"], b["bbox"][1], b["bbox"][0]))
			
 
				+        return blocks
			
 
				+
			
 
				+    def _insert_ocr_blocks_into_chapters(
			
 
				+        self,
			
 
				+        chapter_blocks: Dict[Tuple[str, str], List[Dict[str, Any]]],
			
 
				+        ocr_results: List[OcrResult]
			
 
				+    ) -> None:
			
 
				+        """
			
 
				+        将OCR结果作为新的块插入到对应章节。
			
 
				+
			
 
				+        策略：
			
 
				+        1. 找到表格Y坐标所在的页面
			
 
				+        2. 在该页面的所有小节中，找到表格Y坐标介于哪两个文本块之间
			
 
				+        3. 将OCR块插入到正确位置
			
 
				+        """
			
 
				+        # 按页码分组OCR结果
			
 
				+        ocr_by_page: Dict[int, List[OcrResult]] = {}
			
 
				+        for result in ocr_results:
			
 
				+            if result.success:
			
 
				+                if result.page_num not in ocr_by_page:
			
 
				+                    ocr_by_page[result.page_num] = []
			
 
				+                ocr_by_page[result.page_num].append(result)
			
 
				+
			
 
				+        # 处理每个包含表格的页面
			
 
				+        for page_num, ocr_list in ocr_by_page.items():
			
 
				+            # 找到该页面涉及的所有小节
			
 
				+            page_sections = []
			
 
				+            for (chap_name, sec_name), blocks in chapter_blocks.items():
			
 
				+                # 检查该小节是否包含该页面的块
			
 
				+                page_blocks = [b for b in blocks if b["page"] == page_num]
			
 
				+                if page_blocks:
			
 
				+                    page_sections.append({
			
 
				+                        "chapter": chap_name,
			
 
				+                        "section": sec_name,
			
 
				+                        "blocks": page_blocks,
			
 
				+                        "all_blocks": blocks,  # 引用原列表用于插入
			
 
				+                    })
			
 
				+
			
 
				+            if not page_sections:
			
 
				+                logger.warning(f"[OCR回填] 第{page_num}页没有匹配到任何小节")
			
 
				+                continue
			
 
				+
			
 
				+            # 处理每个OCR结果
			
 
				+            for ocr_result in sorted(ocr_list, key=lambda r: r.bbox[1]):
			
 
				+                table_y_top = ocr_result.bbox[1]
			
 
				+                table_y_bottom = ocr_result.bbox[3]
			
 
				+                ocr_text = ocr_result.text
			
 
				+
			
 
				+                # 构造表格块
			
 
				+                table_block = {
			
 
				+                    "text": ocr_text,
			
 
				+                    "page": page_num,
			
 
				+                    "bbox": ocr_result.bbox,
			
 
				+                    "y_center": (table_y_top + table_y_bottom) / 2,
			
 
				+                    "type": "table"
			
 
				+                }
			
 
				+
			
 
				+                # 找到目标小节
			
 
				+                target_section = None
			
 
				+                insert_index = -1
			
 
				+
			
 
				+                for ps in page_sections:
			
 
				+                    # 获取该小节在该页面的所有块，按Y坐标排序
			
 
				+                    page_blocks = sorted(ps["blocks"], key=lambda b: b["bbox"][1])
			
 
				+
			
 
				+                    if not page_blocks:
			
 
				+                        continue
			
 
				+
			
 
				+                    # 找到表格应该插入的位置
			
 
				+                    # 策略：表格上边界位于哪个块之后
			
 
				+                    found = False
			
 
				+                    for i, block in enumerate(page_blocks):
			
 
				+                        block_y_bottom = block["bbox"][3]
			
 
				+                        if i < len(page_blocks) - 1:
			
 
				+                            next_y_top = page_blocks[i + 1]["bbox"][1]
			
 
				+                        else:
			
 
				+                            next_y_top = float('inf')
			
 
				+
			
 
				+                        # 如果表格位于当前块之后，且在下一块之前
			
 
				+                        if block_y_bottom <= table_y_top < next_y_top:
			
 
				+                            # 找到在原列表中的位置
			
 
				+                            try:
			
 
				+                                insert_index = ps["all_blocks"].index(block) + 1
			
 
				+                                target_section = ps
			
 
				+                                found = True
			
 
				+                                break
			
 
				+                            except ValueError:
			
 
				+                                continue
			
 
				+
			
 
				+                    # 如果表格在所有块之前
			
 
				+                    if not found and table_y_top < page_blocks[0]["bbox"][1]:
			
 
				+                        try:
			
 
				+                            insert_index = ps["all_blocks"].index(page_blocks[0])
			
 
				+                            target_section = ps
			
 
				+                            found = True
			
 
				+                        except ValueError:
			
 
				+                            continue
			
 
				+
			
 
				+                    # 如果表格在所有块之后
			
 
				+                    if not found and table_y_bottom > page_blocks[-1]["bbox"][3]:
			
 
				+                        try:
			
 
				+                            insert_index = ps["all_blocks"].index(page_blocks[-1]) + 1
			
 
				+                            target_section = ps
			
 
				+                            found = True
			
 
				+                        except ValueError:
			
 
				+                            continue
			
 
				+
			
 
				+                    if found:
			
 
				+                        break
			
 
				+
			
 
				+                # 执行插入
			
 
				+                if target_section and insert_index >= 0:
			
 
				+                    target_section["all_blocks"].insert(insert_index, table_block)
			
 
				+                    logger.debug(
			
 
				+                        f"[OCR回填] 第{page_num}页表格(Y={table_y_top:.0f}) -> "
			
 
				+                        f"{target_section['chapter']}/{target_section['section']} 位置{insert_index}"
			
 
				+                    )
			
 
				+                else:
			
 
				+                    # 兜底：追加到该页面第一个小节末尾
			
 
				+                    if page_sections:
			
 
				+                        ps = page_sections[0]
			
 
				+                        ps["all_blocks"].append(table_block)
			
 
				+                        logger.warning(
			
 
				+                            f"[OCR回填] 第{page_num}页表格无法精确定位，追加到 {ps['chapter']}/{ps['section']}"
			
 
				+                        )
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def _is_header_footer(line: str) -> bool:
			
 
				+        return (
			
 
				+            "四川路桥建设集团股份有限公司" in line
			
 
				+            or "T梁运输及安装专项施工方案" in line
			
 
				+            or line.isdigit()
			
 
				+        )
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def _clean_chapter_title(line: str) -> str:
			
 
				+        chapter_match = re.search(r"第[一二三四五六七八九十百]+章", line)
			
 
				+        if not chapter_match:
			
 
				+            return line.strip()
			
 
				+
			
 
				+        prefix = chapter_match.group(0)
			
 
				+        remaining = line[chapter_match.end() :].strip()
			
 
				+        remaining = re.sub(r"^[\.\s]+", "", remaining)
			
 
				+        remaining = re.sub(r"\s+\d+\s*$", "", remaining)
			
 
				+        remaining = re.sub(r"[\._\-]{3,}[^\u4e00-\u9fa5a-zA-Z0-9]*", "", remaining)
			
 
				+
			
 
				+        if remaining:
			
 
				+            return f"{prefix} {remaining}"
			
 
				+        return prefix
			
--- a/core/construction_review/component/minimal_pipeline/simple_processor.py
+++ b/core/construction_review/component/minimal_pipeline/simple_processor.py
@@ -9,6 +9,7 @@
 
				 """
			
 
				 
			
 
				 import asyncio
			
 
				+import json
			
 
				 import uuid
			
 
				 from collections import defaultdict
			
 
				 from typing import Dict, Any, Optional, Tuple, List
			
@@ -16,6 +17,7 @@ from typing import Dict, Any, Optional, Tuple, List
 
				 from foundation.observability.logger.loggering import review_logger as logger
			
 
				 from foundation.observability.cachefiles import cache, CacheBaseDir
			
 
				 
			
 
				+#from .pdf_extractor2 import PdfStructureExtractor
			
 
				 from .pdf_extractor import PdfStructureExtractor
			
 
				 from .toc_builder import build_toc_items_from_structure
			
 
				 from .chunk_assembler import assemble_chunks
			
@@ -477,8 +479,9 @@ class SimpleDocumentProcessor:
 
				             l2_threshold: 二级小节提取率阈值
			
 
				         """
			
 
				         chapters = structure.get("chapters", {})
			
 
				-        if not chapters:
			
 
				-            return
			
 
				+        # 确保 chapters 存在（即使为空），以便添加 quality_check
			
 
				+        if "chapters" not in structure:
			
 
				+            structure["chapters"] = chapters
			
 
				 
			
 
				         # 统计一级章节数量
			
 
				         l1_count = len(chapters)
			
--- a/core/construction_review/component/minimal_pipeline/test.py
+++ b/core/construction_review/component/minimal_pipeline/test.py
@@ -0,0 +1,119 @@
 
				+import fitz  # PyMuPDF
			
 
				+import re
			
 
				+import json
			
 
				+import os
			
 
				+from datetime import datetime
			
 
				+
			
 
				+def extract_and_split_construction_plan(pdf_path):
			
 
				+    # 打开PDF文件
			
 
				+    doc = fitz.open(pdf_path)
			
 
				+    
			
 
				+    # 编译正则表达式
			
 
				+    chapter_pattern = re.compile(r'^第[一二三四五六七八九十百]+章\s*.*')
			
 
				+    section_pattern = re.compile(r'^[一二三四五六七八九十百]+、\s*.*')
			
 
				+    # 用于识别目录的特征：连续的三个以上小数点或省略号
			
 
				+    toc_pattern = re.compile(r'\.{3,}|…{2,}') 
			
 
				+    
			
 
				+    structured_data = {}
			
 
				+    current_chapter = "未分类前言"
			
 
				+    current_section = "默认部分"
			
 
				+    
			
 
				+    in_body = False  # 状态机：标记是否已经跳过目录，正式进入正文
			
 
				+    
			
 
				+    for page_num in range(len(doc)):
			
 
				+        page = doc.load_page(page_num)
			
 
				+        
			
 
				+        # 1. 清理页眉页脚：利用 clip 裁剪页面提取区域
			
 
				+        # 默认A4纸高度约842磅，裁剪掉顶部和底部各60磅的区域（可根据实际PDF微调）
			
 
				+        rect = page.rect
			
 
				+        clip_box = fitz.Rect(0, 60, rect.width, rect.height - 60)
			
 
				+        
			
 
				+        # 仅提取裁剪框内的纯文本
			
 
				+        text = page.get_text("text", clip=clip_box)
			
 
				+        lines = text.split('\n')
			
 
				+        
			
 
				+        for line in lines:
			
 
				+            line = line.strip()
			
 
				+            # 跳过空行
			
 
				+            if not line:
			
 
				+                continue
			
 
				+            
			
 
				+            # 双保险：过滤掉可能因排版偏移漏掉的页眉页脚特征词或孤立的页码
			
 
				+            if "四川路桥建设集团股份有限公司" in line or "T梁运输及安装专项施工方案" in line or line.isdigit():
			
 
				+                continue
			
 
				+            
			
 
				+            # 2. 删除目录逻辑：判断是否正式进入正文
			
 
				+            if not in_body:
			
 
				+                if chapter_pattern.match(line) and not toc_pattern.search(line):
			
 
				+                    in_body = True
			
 
				+                else:
			
 
				+                    continue  # 还在目录页，直接跳过
			
 
				+            
			
 
				+            # 进入正文后的防干扰处理：跳过残余目录格式
			
 
				+            if toc_pattern.search(line):
			
 
				+                continue
			
 
				+            
			
 
				+            # 匹配到一级标题
			
 
				+            if chapter_pattern.match(line):
			
 
				+                current_chapter = line
			
 
				+                current_section = "章节前言" 
			
 
				+                if current_chapter not in structured_data:
			
 
				+                    structured_data[current_chapter] = {current_section: []}
			
 
				+                continue
			
 
				+            
			
 
				+            # 匹配到二级标题
			
 
				+            if section_pattern.match(line):
			
 
				+                current_section = line
			
 
				+                if current_chapter not in structured_data:
			
 
				+                    structured_data[current_chapter] = {}
			
 
				+                if current_section not in structured_data[current_chapter]:
			
 
				+                    structured_data[current_chapter][current_section] = []
			
 
				+                continue
			
 
				+            
			
 
				+            # 容错处理：确保基础字典结构存在
			
 
				+            if current_chapter not in structured_data:
			
 
				+                structured_data[current_chapter] = {current_section: []}
			
 
				+            if current_section not in structured_data[current_chapter]:
			
 
				+                structured_data[current_chapter][current_section] = []
			
 
				+                
			
 
				+            # 3. 将正文内容累加到对应的层级下
			
 
				+            structured_data[current_chapter][current_section].append(line)
			
 
				+    
			
 
				+    # 将列表拼接成完整的文本块
			
 
				+    for chap in structured_data:
			
 
				+        for sec in structured_data[chap]:
			
 
				+            structured_data[chap][sec] = '\n'.join(structured_data[chap][sec])
			
 
				+            
			
 
				+    return structured_data
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    # 获取用户输入的路径
			
 
				+    user_input = input("请输入需要提取的PDF文件路径（支持直接拖入文件或粘贴路径）：")
			
 
				+    
			
 
				+    # 清理路径两端可能存在的引号和空格（应对“复制文件地址”或拖拽文件带来的双引号）
			
 
				+    pdf_file_path = user_input.strip('\'" ')
			
 
				+    
			
 
				+    # 检查文件是否存在
			
 
				+    if not os.path.exists(pdf_file_path):
			
 
				+        print(f"\n[错误] 找不到文件，请检查路径是否正确：{pdf_file_path}")
			
 
				+    else:
			
 
				+        print("\n开始提取施工方案，请稍候...")
			
 
				+        try:
			
 
				+            result_data = extract_and_split_construction_plan(pdf_file_path)
			
 
				+            
			
 
				+            # 4. 保存为本地JSON，名称为：文件名+当前时间（到秒）
			
 
				+            base_name = os.path.splitext(os.path.basename(pdf_file_path))[0]
			
 
				+            current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
			
 
				+            
			
 
				+            # 将输出文件保存在与原PDF相同的目录下
			
 
				+            output_dir = os.path.dirname(pdf_file_path)
			
 
				+            output_filename = os.path.join(output_dir, f"{base_name}_{current_time}.json")
			
 
				+            
			
 
				+            with open(output_filename, 'w', encoding='utf-8') as json_file:
			
 
				+                json.dump(result_data, json_file, ensure_ascii=False, indent=4)
			
 
				+                
			
 
				+            print(f"\n[成功] 提取完成！")
			
 
				+            print(f"结构化数据已保存至: {output_filename}")
			
 
				+            
			
 
				+        except Exception as e:
			
 
				+            print(f"\n[失败] 提取过程中发生错误: {e}")
			
--- a/core/construction_review/component/minimal_pipeline/toc_detector.py
+++ b/core/construction_review/component/minimal_pipeline/toc_detector.py
@@ -142,6 +142,13 @@ class TOCCatalogExtractor:
 
				 
			
 
				             catalog = self._parse_toc_text(toc_text)
			
 
				 
			
 
				+            # 添加目录页页码范围（1-based）
			
 
				+            if toc_pages:
			
 
				+                catalog["toc_page_range"] = {
			
 
				+                    "start": toc_pages[0] + 1,  # 转换为1-based页码
			
 
				+                    "end": toc_pages[-1] + 1
			
 
				+                }
			
 
				+
			
 
				             if progress_callback:
			
 
				                 progress_callback("目录识别", 100, f"目录提取完成，共{catalog['total_chapters']}章")
			
 
				 
			
--- a/core/construction_review/component/reviewers/check_completeness/components/result_analyzer.py
+++ b/core/construction_review/component/reviewers/check_completeness/components/result_analyzer.py
@@ -255,7 +255,7 @@ class ResultAnalyzer(IResultAnalyzer):
 
				         for row in summary_rows:
			
 
				             level2 = (row.get("二级目录") or "").strip()
			
 
				             requirement = (row.get("内容要求") or "").strip()
			
 
				-            reference_source = '《桥梁公司危险性较大工程管理实施细则（2025版）》'
			
 
				+            reference_source = '交通运输部《公路水运危险性较大工程专项施工方案编制审查规程》(JT/T 1495—2024)'
			
 
				             reason= f"参照：{reference_source} 中的内容要求，{row.get('section_label', '')}内容属于,专项施工方案内容要求中的 【{suorces_eum[row.get('标签', '')]}】 板块，应包含{requirement}"
			
 
				             review_references = (row.get("依据") or "").strip()
			
 
				             if level2 in row.get("content", ""):
			
@@ -295,7 +295,7 @@ class ResultAnalyzer(IResultAnalyzer):
 
				                     "issue_point": issue_point,
			
 
				                     "location": row.get("section_label", ""),
			
 
				                     "suggestion": suggestion,
			
 
				-                    "reason": f"根据《桥梁公司危险性较大工程管理实施细则（2025版）》，{section_label}的'{level2_name}'应包含：{requirement}。当前缺失：{missing_content_text}",
			
 
				+                    "reason": f"根据交通运输部《公路水运危险性较大工程专项施工方案编制审查规程》(JT/T 1495—2024)，{section_label}的'{level2_name}'应包含：{requirement}。当前缺失：{missing_content_text}",
			
 
				                     "risk_level": risk_level
			
 
				                 },
			
 
				                 "exist_issue": True,
			
--- a/core/construction_review/component/reviewers/completeness_reviewer.py
+++ b/core/construction_review/component/reviewers/completeness_reviewer.py
@@ -276,7 +276,7 @@ class LightweightCompletenessChecker:
 
				 
			
 
				             reference = f"""
			
 
				 【规范参考信息】
			
 
				-根据《桥梁公司危险性较大工程管理实施细则（2025版）》，'{first_name}'章节应包含以下内容：
			
 
				+根据交通运输部《公路水运危险性较大工程专项施工方案编制审查规程》(JT/T 1495—2024)，'{first_name}'章节应包含以下内容：
			
 
				 {chr(10).join(related_specs)}
			
 
				 """
			
 
				 
			
@@ -294,7 +294,7 @@ class LightweightCompletenessChecker:
 
				 
			
 
				             reference = f"""
			
 
				 【规范参考信息】
			
 
				-根据《桥梁公司危险性较大工程管理实施细则（2025版）》，'{second_name}'章节应包含以下三级内容要点：
			
 
				+根据交通运输部《公路水运危险性较大工程专项施工方案编制审查规程》(JT/T 1495—2024)，'{second_name}'章节应包含以下三级内容要点：
			
 
				 {chr(10).join(tertiary_info)}
			
 
				 """
			
 
				 
			
@@ -482,7 +482,8 @@ JSON输出："""
 
				         recommendations = await self._generate_recommendations(
			
 
				             tertiary_result, catalogue_result, outline_result,
			
 
				             actual_first, actual_secondary, actual_tertiary,
			
 
				-            chapter_classification
			
 
				+            chapter_classification,
			
 
				+            chunks  # 传入 chunks 用于获取实际章节名
			
 
				         )
			
 
				 
			
 
				         return LightweightCompletenessResult(
			
@@ -856,6 +857,62 @@ JSON输出："""
 
				         else:
			
 
				             return "incomplete"
			
 
				     
			
 
				+    def _build_section_label_map(self, chunks: List[Dict]) -> Dict[Tuple[str, str], str]:
			
 
				+        """
			
 
				+        从 chunks 构建 (first_code, second_code) -> section_label 映射
			
 
				+        section_label 格式："第一章编制依据->一、法律法规"
			
 
				+        """
			
 
				+        label_map: Dict[Tuple[str, str], str] = {}
			
 
				+        for chunk in chunks:
			
 
				+            metadata = chunk.get("metadata", {})
			
 
				+            cat1 = (metadata.get("chapter_classification") or
			
 
				+                    chunk.get("chapter_classification") or
			
 
				+                    chunk.get("first_code"))
			
 
				+            cat2 = (metadata.get("secondary_category_code") or
			
 
				+                    chunk.get("secondary_category_code") or
			
 
				+                    chunk.get("second_code"))
			
 
				+            section_label = (metadata.get("section_label") or
			
 
				+                             chunk.get("section_label") or
			
 
				+                             "")
			
 
				+            if cat1 and cat2 and section_label:
			
 
				+                label_map[(cat1, cat2)] = section_label
			
 
				+        return label_map
			
 
				+
			
 
				+    def _get_actual_chapter_name(self, label_map: Dict[Tuple[str, str], str],
			
 
				+                                  first_code: str, second_code: str = None) -> str:
			
 
				+        """
			
 
				+        获取实际章节名
			
 
				+        - 一级缺失：返回 first_name（保持原逻辑）
			
 
				+        - 二级缺失：返回一级章节名（section_label.split('->')[0]）
			
 
				+        - 三级缺失：返回二级小节名（section_label.split('->')[-1]）
			
 
				+        """
			
 
				+        if not second_code:
			
 
				+            return self.spec_loader.first_names.get(first_code, first_code)
			
 
				+
			
 
				+        section_label = label_map.get((first_code, second_code), "")
			
 
				+        if not section_label:
			
 
				+            # 回退到标准名称
			
 
				+            sec_item = self.secondary_specs.get((first_code, second_code))
			
 
				+            if sec_item:
			
 
				+                return f"{sec_item.first_cn} > {sec_item.second_cn}"
			
 
				+            return f"{first_code} > {second_code}"
			
 
				+
			
 
				+        parts = section_label.split("->")
			
 
				+        if len(parts) >= 2:
			
 
				+            return parts[-1].strip()  # 返回二级小节名
			
 
				+        return section_label.strip()
			
 
				+
			
 
				+    def _get_actual_first_name(self, label_map: Dict[Tuple[str, str], str],
			
 
				+                                first_code: str) -> str:
			
 
				+        """
			
 
				+        获取实际一级章节名（从任意一个该一级下的 section_label 提取）
			
 
				+        """
			
 
				+        for (fc, sc), label in label_map.items():
			
 
				+            if fc == first_code and "->" in label:
			
 
				+                return label.split("->")[0].strip()
			
 
				+        # 回退到标准名称
			
 
				+        return self.spec_loader.first_names.get(first_code, first_code)
			
 
				+
			
 
				     async def _generate_recommendations(
			
 
				         self,
			
 
				         tertiary_result: Dict,
			
@@ -864,7 +921,8 @@ JSON输出："""
 
				         actual_first: Set[str],
			
 
				         actual_secondary: Set[Tuple[str, str]],
			
 
				         actual_tertiary: Set[Tuple[str, str, str]],
			
 
				-        chapter_classification: Optional[str] = None
			
 
				+        chapter_classification: Optional[str] = None,
			
 
				+        chunks: List[Dict] = None
			
 
				     ) -> List[Dict[str, Any]]:
			
 
				         """
			
 
				         生成结构化分级改进建议。
			
@@ -872,12 +930,15 @@ JSON输出："""
 
				         每条建议包含：
			
 
				           level        : 缺失级别（一级 / 二级 / 三级 / 一致性）
			
 
				           issue_point  : 问题摘要（含级别标识）
			
 
				-          location     : 问题定位路径
			
 
				+          location     : 问题定位路径（使用实际章节名）
			
 
				           suggestion   : 补充建议（使用LLM生成）
			
 
				           reason       : 规范依据说明（使用LLM生成）
			
 
				         """
			
 
				         recommendations: List[Dict[str, Any]] = []
			
 
				 
			
 
				+        # 构建 section_label 映射，用于获取实际章节名
			
 
				+        label_map = self._build_section_label_map(chunks or [])
			
 
				+
			
 
				         # 确定需要检查的一级分类范围
			
 
				         if chapter_classification:
			
 
				             required_first = (
			
@@ -901,7 +962,7 @@ JSON输出："""
 
				             if first_code not in actual_first:
			
 
				                 # issue_point 和 reason 使用简单拼接
			
 
				                 issue_point = f"【一级章节缺失】'{first_name}'整个章节不存在"
			
 
				-                reason = f"依据《桥梁公司危险性较大工程管理实施细则（2025版）》规定，文档必须包含'{first_name}'一级章节，当前正文中未发现该章节任何内容"
			
 
				+                reason = f"依据交通运输部《公路水运危险性较大工程专项施工方案编制审查规程》(JT/T 1495—2024)规定，文档必须包含'{first_name}'一级章节，当前正文中未发现该章节任何内容"
			
 
				 
			
 
				                 # 尝试使用LLM生成 suggestion
			
 
				                 llm_result = await self._generate_recommendation_with_llm(
			
@@ -939,15 +1000,18 @@ JSON输出："""
 
				 
			
 
				                 # ── 二级缺失 ──────────────────────────────────────────
			
 
				                 if (cat1, cat2) not in actual_secondary:
			
 
				+                    # 获取实际一级章节名
			
 
				+                    actual_first_name = self._get_actual_first_name(label_map, cat1)
			
 
				+
			
 
				                     # issue_point 和 reason 使用简单拼接
			
 
				-                    issue_point = f"【二级章节缺失】{first_name} > '{second_name}'整个章节不存在"
			
 
				-                    reason = f"依据《桥梁公司危险性较大工程管理实施细则（2025版）》规定，'{first_name}'下应包含'{second_name}'二级章节，当前正文中未发现该章节内容"
			
 
				+                    issue_point = f"【二级章节缺失】{actual_first_name} > '{second_name}'整个章节不存在"
			
 
				+                    reason = f"依据交通运输部《公路水运危险性较大工程专项施工方案编制审查规程》(JT/T 1495—2024)规定，'{actual_first_name}'下应包含'{second_name}'二级章节，当前正文中未发现该章节内容"
			
 
				 
			
 
				                     # 尝试使用LLM生成 suggestion
			
 
				                     llm_result = await self._generate_recommendation_with_llm(
			
 
				                         level="二级",
			
 
				                         first_code=cat1,
			
 
				-                        first_name=first_name,
			
 
				+                        first_name=actual_first_name,
			
 
				                         second_code=cat2,
			
 
				                         second_name=second_name,
			
 
				                         first_seq=first_seq,
			
@@ -958,12 +1022,12 @@ JSON输出："""
 
				                         suggestion = llm_result.get("suggestion")
			
 
				                     else:
			
 
				                         # 回退到简单拼接
			
 
				-                        suggestion = f"请在'{first_name}'下添加'{second_name}'章节内容"
			
 
				+                        suggestion = f"请在'{actual_first_name}'下添加'{second_name}'章节内容"
			
 
				 
			
 
				                     recommendations.append({
			
 
				                         "level": "二级",
			
 
				                         "issue_point": issue_point,
			
 
				-                        "location": f"{first_name} > {second_name}",
			
 
				+                        "location": actual_first_name,  # 二级缺失定位到一级章节
			
 
				                         "suggestion": suggestion,
			
 
				                         "reason": reason,
			
 
				                         "first_seq": first_seq,
			
@@ -986,6 +1050,9 @@ JSON输出："""
 
				                 if not missing_t_items:
			
 
				                     continue
			
 
				 
			
 
				+                # 获取实际二级小节名
			
 
				+                actual_second_name = self._get_actual_chapter_name(label_map, cat1, cat2)
			
 
				+
			
 
				                 # issue_point 和 reason 使用简单拼接（三级缺失）
			
 
				                 # 尝试使用LLM批量生成 suggestion
			
 
				                 llm_result = await self._generate_recommendation_with_llm(
			
@@ -1012,9 +1079,9 @@ JSON输出："""
 
				                     recommendations.append({
			
 
				                         "level": "三级",
			
 
				                         "issue_point": f"【三级内容缺失】{first_name} > {second_name} > '{t_item.third_cn}'",
			
 
				-                        "location": f"{first_name} > {second_name}",
			
 
				+                        "location": actual_second_name,  # 三级缺失定位到二级小节
			
 
				                         "suggestion": suggestion,
			
 
				-                        "reason": f"依据《桥梁公司危险性较大工程管理实施细则（2025版）》规定，'{second_name}'下应包含'{t_item.third_cn}'内容要点",
			
 
				+                        "reason": f"依据交通运输部《公路水运危险性较大工程专项施工方案编制审查规程》(JT/T 1495—2024)规定，'{second_name}'下应包含'{t_item.third_cn}'内容要点",
			
 
				                         "first_seq": first_seq,
			
 
				                         "second_seq": second_seq,
			
 
				                         "third_seq": t_item.third_seq,
			
@@ -1036,7 +1103,7 @@ JSON输出："""
 
				 
			
 
				                 # issue_point 和 reason 使用简单拼接（一致性审查）
			
 
				                 issue_point = f"【目录正文不一致】'{location}'目录已列但正文无内容"
			
 
				-                reason = f"依据《桥梁公司危险性较大工程管理实施细则（2025版）》规定，目录应与正文保持一致。目录页列有'{sec_title}'章节，但正文中未发现对应内容"
			
 
				+                reason = f"依据交通运输部《公路水运危险性较大工程专项施工方案编制审查规程》(JT/T 1495—2024)规定，目录应与正文保持一致。目录页列有'{sec_title}'章节，但正文中未发现对应内容"
			
 
				 
			
 
				                 # 尝试使用LLM生成 suggestion
			
 
				                 llm_result = await self._generate_recommendation_with_llm(
			
@@ -1071,7 +1138,7 @@ JSON输出："""
 
				                 "issue_point": "文档完整性良好",
			
 
				                 "location": "",
			
 
				                 "suggestion": "无需补充",
			
 
				-                "reason": "依据《桥梁公司危险性较大工程管理实施细则（2025版）》规定，文档已覆盖所有章节与内容要点",
			
 
				+                "reason": "依据交通运输部《公路水运危险性较大工程专项施工方案编制审查规程》(JT/T 1495—2024)规定，文档已覆盖所有章节与内容要点",
			
 
				             })
			
 
				 
			
 
				         return recommendations