Просмотр исходного кода

v0.0.5-功能优化-实现目录审查功能

WangXuMing 2 месяцев назад
Родитель
Сommit
38617440e0

+ 2 - 1
.gitignore

@@ -77,4 +77,5 @@ config/config.ini
 output/
 output/
 命令.txt
 命令.txt
 /core/construction_review/component/doc_worker/utils/llm_client copy.py
 /core/construction_review/component/doc_worker/utils/llm_client copy.py
-.venv/
+.venv/
+.project_optimization/

+ 571 - 120
core/base/workflow_manager.py

@@ -9,15 +9,18 @@
 
 
 import asyncio
 import asyncio
 import time
 import time
-from typing import Dict, Optional
+from typing import Dict, Optional, Any
 from datetime import datetime
 from datetime import datetime
+from langgraph.graph import StateGraph, END
+from langchain_core.messages import BaseMessage, HumanMessage, AIMessage
 from foundation.observability.logger.loggering import server_logger as logger
 from foundation.observability.logger.loggering import server_logger as logger
 from foundation.observability.monitoring.time_statistics import track_execution_time
 from foundation.observability.monitoring.time_statistics import track_execution_time
 from foundation.infrastructure.cache.redis_connection import RedisConnectionFactory
 from foundation.infrastructure.cache.redis_connection import RedisConnectionFactory
 from .progress_manager import ProgressManager
 from .progress_manager import ProgressManager
 from .redis_duplicate_checker import RedisDuplicateChecker
 from .redis_duplicate_checker import RedisDuplicateChecker
 from .task_models import TaskFileInfo, TaskChain
 from .task_models import TaskFileInfo, TaskChain
-from ..construction_review.workflows import DocumentWorkflow,AIReviewWorkflow
+from ..construction_review.workflows import DocumentWorkflow, AIReviewWorkflow, ReportWorkflow
+from ..construction_review.workflows.types import TaskChainState
 
 
 class ProgressManagerRegistry:
 class ProgressManagerRegistry:
     """ProgressManager注册表 - 为每个任务管理独立的ProgressManager实例"""
     """ProgressManager注册表 - 为每个任务管理独立的ProgressManager实例"""
@@ -64,6 +67,9 @@ class WorkflowManager:
         self._terminate_signal_prefix = "ai_review:terminate_signal:"
         self._terminate_signal_prefix = "ai_review:terminate_signal:"
         self._task_expire_time = 7200  # 2小时
         self._task_expire_time = 7200  # 2小时
 
 
+        # LangGraph 任务链工作流(方案D)
+        self.task_chain_graph = None  # 延迟初始化,避免循环导入
+
     async def submit_task_processing(self, file_info: dict) -> str:
     async def submit_task_processing(self, file_info: dict) -> str:
         """异步提交任务处理(用于file_upload层)"""
         """异步提交任务处理(用于file_upload层)"""
         from foundation.infrastructure.messaging.tasks import submit_task_processing_task
         from foundation.infrastructure.messaging.tasks import submit_task_processing_task
@@ -86,10 +92,15 @@ class WorkflowManager:
             raise
             raise
     @track_execution_time
     @track_execution_time
     def submit_task_processing_sync(self, file_info: dict) -> dict:
     def submit_task_processing_sync(self, file_info: dict) -> dict:
-        """同步提交任务处理(用于Celery worker)"""
-        try:
+        """
+        同步提交任务处理(用于Celery worker)
 
 
-            logger.info(f"提交文档处理任务: {file_info['file_id']}")
+        Note:
+            已切换到 LangGraph 任务链工作流(方案D)
+            使用统一的状态管理和嵌套子图架构
+        """
+        try:
+            logger.info(f"提交文档处理任务(LangGraph方案D): {file_info['file_id']}")
 
 
             # 1. 创建TaskFileInfo对象(封装任务文件信息)
             # 1. 创建TaskFileInfo对象(封装任务文件信息)
             task_file_info = TaskFileInfo(file_info)
             task_file_info = TaskFileInfo(file_info)
@@ -107,153 +118,95 @@ class WorkflowManager:
             # 5. 添加到活跃任务跟踪
             # 5. 添加到活跃任务跟踪
             self.active_chains[callback_task_id] = task_chain
             self.active_chains[callback_task_id] = task_chain
 
 
-            # 5. 初始化进度管理
+            # 6. 初始化进度管理
             asyncio.run(self.progress_manager.initialize_progress(
             asyncio.run(self.progress_manager.initialize_progress(
                 callback_task_id=callback_task_id,
                 callback_task_id=callback_task_id,
                 user_id=task_file_info.user_id,
                 user_id=task_file_info.user_id,
                 stages=[]
                 stages=[]
             ))
             ))
 
 
-            # 6. 启动处理流程(同步执行)
-            self._process_task_chain_sync(task_chain, task_file_info, task_file_info.file_type)
-
-            # logger.info(f"提交文档处理任务: {callback_task_id}")
-            logger.info(f"施工方案审查任务已完成! ")
-            logger.info(f"文件ID: {task_file_info.file_id}")
-            logger.info(f"文件名:{task_file_info.file_name}")
-
-        except Exception as e:
-            logger.error(f"提交文档处理任务失败: {str(e)}")
-            raise
-
-
-    def _process_task_chain_sync(self, task_chain: TaskChain, task_file_info: TaskFileInfo, file_type: str):
-        """同步处理文档任务链(用于Celery worker)"""
-        try:
-            file_content = task_file_info.file_content
-
-            # 阶段1:文档处理(串行)
-
-            document_workflow = DocumentWorkflow(
-                task_file_info=task_file_info,  
-                progress_manager=self.progress_manager,
-                redis_duplicate_checker=self.redis_duplicate_checker
-            )
-
-            # 同步执行文档处理
-            loop = asyncio.new_event_loop()
-            asyncio.set_event_loop(loop)
-            doc_result = loop.run_until_complete(document_workflow.execute(file_content, file_type))
-            loop.close()
-
-            task_chain.results['document'] = doc_result
-
-            # 阶段2:AI审查(内部并发)
-            task_chain.update_stage("ai_review")
-
-            structured_content = doc_result['structured_content']
-
-            # 读取AI审查配置
-            import configparser
-            config = configparser.ConfigParser()
-            config.read('config/config.ini', encoding='utf-8')
-
-            max_review_units = config.getint('ai_review', 'MAX_REVIEW_UNITS', fallback=None)
-            if max_review_units == 0:  # 如果配置为0,表示审查所有
-                max_review_units = None
-            review_mode = config.get('ai_review', 'REVIEW_MODE', fallback='all')
-
-            logger.info(f"AI审查配置: 最大审查数量={max_review_units}, 审查模式={review_mode}")
+            # 7. 构建 LangGraph 任务链工作流(延迟初始化)
+            if self.task_chain_graph is None:
+                self.task_chain_graph = self._build_task_chain_workflow()
 
 
-            ai_workflow = AIReviewWorkflow(
-                task_file_info=task_file_info,  
-                structured_content=structured_content,
+            # 8. 构建初始状态
+            initial_state = TaskChainState(
+                file_id=task_file_info.file_id,
+                callback_task_id=callback_task_id,
+                user_id=task_file_info.user_id,
+                file_name=task_file_info.file_name,
+                file_type=task_file_info.file_type,
+                file_content=task_file_info.file_content,
+                current_stage="start",
+                overall_task_status="processing",
+                stage_status={
+                    "document": "pending",
+                    "ai_review": "pending",
+                    "report": "pending"
+                },
+                document_result=None,
+                ai_review_result=None,
+                report_result=None,
+                error_message=None,
                 progress_manager=self.progress_manager,
                 progress_manager=self.progress_manager,
-                max_review_units=max_review_units,
-                review_mode=review_mode
+                task_file_info=task_file_info,
+                messages=[HumanMessage(content=f"开始任务链: {task_file_info.file_id}")]
             )
             )
 
 
-            # 同步执行AI审查
+            # 9. 执行 LangGraph 任务链工作流
             loop = asyncio.new_event_loop()
             loop = asyncio.new_event_loop()
             asyncio.set_event_loop(loop)
             asyncio.set_event_loop(loop)
-            ai_result = loop.run_until_complete(ai_workflow.execute())
+            result = loop.run_until_complete(self.task_chain_graph.ainvoke(initial_state))
             loop.close()
             loop.close()
 
 
-            task_chain.results['ai_review'] = ai_result
-
-            # # 阶段3:报告生成(串行)
-            # task_chain.current_stage = "report_generation"
-
-            # report_workflow = ReportWorkflow(
-            #     file_id=task_chain.file_id,
-            #     callback_task_id=task_chain.callback_task_id,
-            #     user_id=task_chain.user_id,
-            #     ai_review_results=ai_result,
-            #     progress_manager=self.progress_manager
-            # )
-
-            # # 同步执行报告生成
-            # loop = asyncio.new_event_loop()
-            # asyncio.set_event_loop(loop)
-            # report_result = loop.run_until_complete(report_workflow.execute())
-            # loop.close()
-
-            # task_chain.results['report'] = report_result
-
-            # 完成任务链
-            task_chain.complete_processing()
-
-            # 清理任务注册
+            # 10. 清理任务注册
             asyncio.run(self.redis_duplicate_checker.unregister_task(task_chain.file_id))
             asyncio.run(self.redis_duplicate_checker.unregister_task(task_chain.file_id))
-            # 通知SSE连接任务完成
-            asyncio.run(self.progress_manager.complete_task(task_chain.callback_task_id, task_chain.user_id))
 
 
-            # 清理Redis文件缓存
-            try:
-                from foundation.utils.redis_utils import delete_file_info
-                asyncio.run(delete_file_info(task_chain.file_id))
-                logger.info(f"已清理Redis文件缓存: {task_chain.file_id}")
-            except Exception as e:
-                logger.warning(f"清理Redis文件缓存失败: {str(e)}")
+            logger.info(f"施工方案审查任务已完成(LangGraph方案D)!")
+            logger.info(f"文件ID: {task_file_info.file_id}")
+            logger.info(f"文件名: {task_file_info.file_name}")
+            logger.info(f"整体状态: {result.get('overall_task_status', 'unknown')}")
+
+            # 构建可序列化的返回结果(移除不可序列化的对象)
+            serializable_result = {
+                "file_id": result.get("file_id"),
+                "callback_task_id": result.get("callback_task_id"),
+                "user_id": result.get("user_id"),
+                "file_name": result.get("file_name"),
+                "current_stage": result.get("current_stage"),
+                "overall_task_status": result.get("overall_task_status"),
+                "stage_status": result.get("stage_status"),
+                "error_message": result.get("error_message"),
+                # 注意:不包含 progress_manager, task_file_info, messages 等不可序列化对象
+            }
 
 
-            logger.info(f"文档处理任务链完成: {task_chain.callback_task_id}")
-            return task_chain.results
+            return serializable_result
 
 
         except Exception as e:
         except Exception as e:
+            logger.error(f"提交文档处理任务失败: {str(e)}", exc_info=True)
+
             # 标记任务失败
             # 标记任务失败
-            task_chain.fail_processing(str(e))
-            logger.error(f"文档处理任务链失败: {task_chain.callback_task_id}, 错误: {str(e)}")
+            if callback_task_id in self.active_chains:
+                self.active_chains[callback_task_id].fail_processing(str(e))
 
 
             # 清理任务注册
             # 清理任务注册
-            asyncio.run(self.redis_duplicate_checker.unregister_task(task_chain.file_id))
-
-            # 清理Redis文件缓存(即使失败也清理)
-            try:
-                from foundation.utils.redis_utils import delete_file_info
-                asyncio.run(delete_file_info(task_chain.file_id))
-                logger.info(f"已清理Redis文件缓存: {task_chain.file_id}")
-            except Exception as cleanup_error:
-                logger.warning(f"清理Redis文件缓存失败: {str(cleanup_error)}")
+            asyncio.run(self.redis_duplicate_checker.unregister_task(task_file_info.file_id))
 
 
             # 通知SSE连接任务失败
             # 通知SSE连接任务失败
-            error_result = {
+            error_data = {
                 "error": str(e),
                 "error": str(e),
                 "status": "failed",
                 "status": "failed",
+                "overall_task_status": "failed",
                 "timestamp": datetime.now().isoformat()
                 "timestamp": datetime.now().isoformat()
             }
             }
-            current_data = {
-                "status": "failed",
-                "result": error_result
-            }
-            asyncio.run(self.progress_manager.complete_task(task_chain.callback_task_id, task_chain.user_id, current_data))
+            asyncio.run(self.progress_manager.complete_task(callback_task_id, task_file_info.user_id, error_data))
 
 
             raise
             raise
         finally:
         finally:
             # 清理活跃任务
             # 清理活跃任务
-            if task_chain.callback_task_id in self.active_chains:
-                del self.active_chains[task_chain.callback_task_id]
+            if callback_task_id in self.active_chains:
+                del self.active_chains[callback_task_id]
 
 
-    # ==================== 任务终止管理方法 ====================
 
 
     async def set_terminate_signal(self, callback_task_id: str, operator: str = "unknown") -> Dict[str, any]:
     async def set_terminate_signal(self, callback_task_id: str, operator: str = "unknown") -> Dict[str, any]:
         """
         """
@@ -456,4 +409,502 @@ class WorkflowManager:
 
 
         except Exception as e:
         except Exception as e:
             logger.error(f"获取任务信息失败: {str(e)}", exc_info=True)
             logger.error(f"获取任务信息失败: {str(e)}", exc_info=True)
-            return None
+            return None
+
+    # ==================== LangGraph 任务链工作流(方案D)====================
+
+    def _build_task_chain_workflow(self) -> StateGraph:
+        """
+        构建 LangGraph 任务链工作流图(方案D)
+
+        Returns:
+            StateGraph: 配置完成的 LangGraph 任务链图实例
+
+        Note:
+            创建包含文档处理、AI审查(嵌套子图)、报告生成的完整任务链
+            设置节点间的转换关系和条件边,支持终止检查和错误处理
+            工作流路径: start → document_processing → ai_review_subgraph → report_generation → complete → END
+        """
+        logger.info("开始构建 LangGraph 任务链工作流图")
+
+        workflow = StateGraph(TaskChainState)
+
+        # 添加节点
+        workflow.add_node("start", self._start_chain_node)
+        workflow.add_node("document_processing", self._document_processing_node)
+        workflow.add_node("ai_review_subgraph", self._ai_review_subgraph_node)
+        workflow.add_node("report_generation", self._report_generation_node)
+        workflow.add_node("complete", self._complete_chain_node)
+        workflow.add_node("error_handler", self._error_handler_chain_node)
+        workflow.add_node("terminate", self._terminate_chain_node)
+
+        # 设置入口点
+        workflow.set_entry_point("start")
+
+        # 添加边和条件边
+        workflow.add_edge("start", "document_processing")
+
+        # 文档处理后检查终止信号
+        workflow.add_conditional_edges(
+            "document_processing",
+            self._should_terminate_or_error_chain,
+            {
+                "terminate": "terminate",
+                "error": "error_handler",
+                "continue": "ai_review_subgraph"
+            }
+        )
+
+        # AI审查后检查终止信号
+        workflow.add_conditional_edges(
+            "ai_review_subgraph",
+            self._should_terminate_or_error_chain,
+            {
+                "terminate": "terminate",
+                "error": "error_handler",
+                "continue": "report_generation"
+            }
+        )
+
+        # 报告生成后检查终止信号
+        workflow.add_conditional_edges(
+            "report_generation",
+            self._should_terminate_or_error_chain,
+            {
+                "terminate": "terminate",
+                "error": "error_handler",
+                "continue": "complete"
+            }
+        )
+
+        # 完成节点直接结束
+        workflow.add_edge("complete", END)
+        workflow.add_edge("error_handler", END)
+        workflow.add_edge("terminate", END)
+
+        # 编译工作流图
+        compiled_graph = workflow.compile()
+
+        logger.info("LangGraph 任务链工作流图构建完成")
+        return compiled_graph
+
+    async def _start_chain_node(self, state: TaskChainState) -> TaskChainState:
+        """
+        任务链开始节点
+
+        Args:
+            state: 任务链状态
+
+        Returns:
+            TaskChainState: 更新后的状态
+        """
+        logger.info(f"任务链工作流启动: {state['callback_task_id']}")
+
+        return {
+            "current_stage": "start",
+            "overall_task_status": "processing",
+            "stage_status": {
+                "document": "pending",
+                "ai_review": "pending",
+                "report": "pending"
+            },
+            "messages": [AIMessage(content="任务链工作流启动")]
+        }
+
+    async def _document_processing_node(self, state: TaskChainState) -> TaskChainState:
+        """
+        文档处理节点
+
+        Args:
+            state: 任务链状态
+
+        Returns:
+            TaskChainState: 更新后的状态,包含文档处理结果
+        """
+        try:
+            logger.info(f"开始文档处理阶段: {state['callback_task_id']}")
+
+            # 检查终止信号
+            if await self.check_terminate_signal(state["callback_task_id"]):
+                logger.warning(f"文档处理阶段检测到终止信号: {state['callback_task_id']}")
+                return {
+                    "current_stage": "document_processing",
+                    "overall_task_status": "terminated",
+                    "stage_status": {**state["stage_status"], "document": "terminated"},
+                    "messages": [AIMessage(content="文档处理阶段检测到终止信号")]
+                }
+
+            # 获取 TaskFileInfo 实例
+            task_file_info = state["task_file_info"]
+
+            # 创建文档工作流实例
+            document_workflow = DocumentWorkflow(
+                task_file_info=task_file_info,
+                progress_manager=state["progress_manager"],
+                redis_duplicate_checker=self.redis_duplicate_checker
+            )
+
+            # 执行文档处理
+            doc_result = await document_workflow.execute(
+                state["file_content"],
+                state["file_type"]
+            )
+
+            logger.info(f"文档处理完成: {state['callback_task_id']}")
+
+            return {
+                "current_stage": "document_processing",
+                "overall_task_status": "processing",
+                "stage_status": {**state["stage_status"], "document": "completed"},
+                "document_result": doc_result,
+                "messages": [AIMessage(content="文档处理完成")]
+            }
+
+        except Exception as e:
+            logger.error(f"文档处理失败: {str(e)}", exc_info=True)
+            return {
+                "current_stage": "document_processing",
+                "overall_task_status": "failed",
+                "stage_status": {**state["stage_status"], "document": "failed"},
+                "error_message": f"文档处理失败: {str(e)}",
+                "messages": [AIMessage(content=f"文档处理失败: {str(e)}")]
+            }
+
+    async def _ai_review_subgraph_node(self, state: TaskChainState) -> TaskChainState:
+        """
+        AI审查子图节点(嵌套现有的 AIReviewWorkflow)
+
+        Args:
+            state: 任务链状态
+
+        Returns:
+            TaskChainState: 更新后的状态,包含AI审查结果
+
+        Note:
+            这是方案D的核心实现:将现有的 AIReviewWorkflow 作为子图嵌套
+            无需修改 AIReviewWorkflow 的代码,保持其独立性
+        """
+        try:
+            logger.info(f"开始AI审查阶段: {state['callback_task_id']}")
+
+            # 检查终止信号
+            if await self.check_terminate_signal(state["callback_task_id"]):
+                logger.warning(f"AI审查阶段检测到终止信号: {state['callback_task_id']}")
+                return {
+                    "current_stage": "ai_review",
+                    "overall_task_status": "terminated",
+                    "stage_status": {**state["stage_status"], "ai_review": "terminated"},
+                    "messages": [AIMessage(content="AI审查阶段检测到终止信号")]
+                }
+
+            # 获取文档处理结果中的结构化内容
+            structured_content = state["document_result"].get("structured_content")
+            if not structured_content:
+                raise ValueError("文档处理结果中缺少结构化内容")
+
+            # 获取 TaskFileInfo 实例
+            task_file_info = state["task_file_info"]
+
+            # 读取AI审查配置
+            import configparser
+            config = configparser.ConfigParser()
+            config.read('config/config.ini', encoding='utf-8')
+
+            max_review_units = config.getint('ai_review', 'MAX_REVIEW_UNITS', fallback=None)
+            if max_review_units == 0:
+                max_review_units = None
+            review_mode = config.get('ai_review', 'REVIEW_MODE', fallback='all')
+
+            logger.info(f"AI审查配置: 最大审查数量={max_review_units}, 审查模式={review_mode}")
+
+            # 创建AI审查工作流实例(作为嵌套子图)
+            ai_workflow = AIReviewWorkflow(
+                task_file_info=task_file_info,
+                structured_content=structured_content,
+                progress_manager=state["progress_manager"],
+                max_review_units=max_review_units,
+                review_mode=review_mode
+            )
+
+            # 执行AI审查(内部使用 LangGraph)
+            ai_result = await ai_workflow.execute()
+
+            logger.info(f"AI审查完成: {state['callback_task_id']}")
+
+            return {
+                "current_stage": "ai_review",
+                "overall_task_status": "processing",
+                "stage_status": {**state["stage_status"], "ai_review": "completed"},
+                "ai_review_result": ai_result,
+                "messages": [AIMessage(content="AI审查完成")]
+            }
+
+        except Exception as e:
+            logger.error(f"AI审查失败: {str(e)}", exc_info=True)
+            return {
+                "current_stage": "ai_review",
+                "overall_task_status": "failed",
+                "stage_status": {**state["stage_status"], "ai_review": "failed"},
+                "error_message": f"AI审查失败: {str(e)}",
+                "messages": [AIMessage(content=f"AI审查失败: {str(e)}")]
+            }
+
+    async def _report_generation_node(self, state: TaskChainState) -> TaskChainState:
+        """
+        报告生成节点
+
+        Args:
+            state: 任务链状态
+
+        Returns:
+            TaskChainState: 更新后的状态,包含报告生成结果
+
+        Note:
+            调用ReportWorkflow生成审查报告摘要(基于高中风险问题,使用LLM)
+            根据决策2(方案A-方式1),在此阶段生成完整报告后一次性保存
+        """
+        try:
+            logger.info(f"开始报告生成阶段: {state['callback_task_id']}")
+
+            # 检查终止信号
+            if await self.check_terminate_signal(state["callback_task_id"]):
+                logger.warning(f"报告生成阶段检测到终止信号: {state['callback_task_id']}")
+                return {
+                    "current_stage": "report_generation",
+                    "overall_task_status": "terminated",
+                    "stage_status": {**state["stage_status"], "report": "terminated"},
+                    "messages": [AIMessage(content="报告生成阶段检测到终止信号")]
+                }
+
+            # 获取AI审查结果
+            ai_review_result = state.get("ai_review_result")
+            if not ai_review_result:
+                raise ValueError("AI审查结果缺失,无法生成报告")
+
+            # 获取 TaskFileInfo 实例
+            task_file_info = state["task_file_info"]
+
+            # 创建报告生成工作流实例
+            report_workflow = ReportWorkflow(
+                file_id=state["file_id"],
+                file_name=state["file_name"],
+                callback_task_id=state["callback_task_id"],
+                user_id=state["user_id"],
+                ai_review_results=ai_review_result,
+                progress_manager=state["progress_manager"]
+            )
+
+            # 执行报告生成
+            report_result = await report_workflow.execute()
+
+            logger.info(f"报告生成完成: {state['callback_task_id']}")
+
+            # 保存完整结果(包含文档处理、AI审查、报告生成)
+            await self._save_complete_results(state, report_result)
+
+            return {
+                "current_stage": "report_generation",
+                "overall_task_status": "processing",
+                "stage_status": {**state["stage_status"], "report": "completed"},
+                "report_result": report_result,
+                "messages": [AIMessage(content="报告生成完成")]
+            }
+
+        except Exception as e:
+            logger.error(f"报告生成失败: {str(e)}", exc_info=True)
+            return {
+                "current_stage": "report_generation",
+                "overall_task_status": "failed",
+                "stage_status": {**state["stage_status"], "report": "failed"},
+                "error_message": f"报告生成失败: {str(e)}",
+                "messages": [AIMessage(content=f"报告生成失败: {str(e)}")]
+            }
+
+    async def _complete_chain_node(self, state: TaskChainState) -> TaskChainState:
+        """
+        任务链完成节点
+
+        Args:
+            state: 任务链状态
+
+        Returns:
+            TaskChainState: 更新后的状态,标记整体任务已完成
+
+        Note:
+            只有在所有阶段(文档处理、AI审查、报告生成)都完成后才标记 overall_task_status="completed"
+            这解决了原有的状态语义混乱问题(P0-1)
+        """
+        logger.info(f"任务链工作流完成: {state['callback_task_id']}")
+
+        # 标记整体任务完成
+        if state["progress_manager"]:
+            await state["progress_manager"].complete_task(
+                state["callback_task_id"],
+                state["user_id"],
+                {"overall_task_status": "completed", "message": "所有阶段已完成"}
+            )
+
+        # 清理 Redis 缓存
+        try:
+            from foundation.utils.redis_utils import delete_file_info
+            await delete_file_info(state["file_id"])
+            logger.info(f"已清理 Redis 文件缓存: {state['file_id']}")
+        except Exception as e:
+            logger.warning(f"清理 Redis 文件缓存失败: {str(e)}")
+
+        return {
+            "current_stage": "complete",
+            "overall_task_status": "completed",  # ⚠️ 关键:只有到这里才标记整体完成
+            "messages": [AIMessage(content="任务链工作流完成")]
+        }
+
+    async def _error_handler_chain_node(self, state: TaskChainState) -> TaskChainState:
+        """
+        任务链错误处理节点
+
+        Args:
+            state: 任务链状态
+
+        Returns:
+            TaskChainState: 更新后的状态,标记为失败
+        """
+        logger.error(f"任务链工作流错误: {state['callback_task_id']}, 错误: {state.get('error_message', '未知错误')}")
+
+        # 通知失败
+        if state["progress_manager"]:
+            error_data = {
+                "overall_task_status": "failed",
+                "error": state.get("error_message", "未知错误"),
+                "status": "failed",
+                "timestamp": datetime.now().isoformat()
+            }
+            await state["progress_manager"].complete_task(
+                state["callback_task_id"],
+                state["user_id"],
+                error_data
+            )
+
+        # 清理 Redis 缓存(即使失败也清理)
+        try:
+            from foundation.utils.redis_utils import delete_file_info
+            await delete_file_info(state["file_id"])
+            logger.info(f"已清理 Redis 文件缓存: {state['file_id']}")
+        except Exception as e:
+            logger.warning(f"清理 Redis 文件缓存失败: {str(e)}")
+
+        return {
+            "current_stage": "error_handler",
+            "overall_task_status": "failed",
+            "messages": [AIMessage(content=f"任务链错误: {state.get('error_message', '未知错误')}")]
+        }
+
+    async def _terminate_chain_node(self, state: TaskChainState) -> TaskChainState:
+        """
+        任务链终止节点
+
+        Args:
+            state: 任务链状态
+
+        Returns:
+            TaskChainState: 更新后的状态,标记为已终止
+        """
+        logger.warning(f"任务链工作流已终止: {state['callback_task_id']}")
+
+        # 通知终止
+        if state["progress_manager"]:
+            await state["progress_manager"].complete_task(
+                state["callback_task_id"],
+                state["user_id"],
+                {"overall_task_status": "terminated", "message": "任务已被用户终止"}
+            )
+
+        # 清理 Redis 终止信号
+        await self.clear_terminate_signal(state["callback_task_id"])
+
+        # 清理 Redis 文件缓存
+        try:
+            from foundation.utils.redis_utils import delete_file_info
+            await delete_file_info(state["file_id"])
+            logger.info(f"已清理 Redis 文件缓存: {state['file_id']}")
+        except Exception as e:
+            logger.warning(f"清理 Redis 文件缓存失败: {str(e)}")
+
+        return {
+            "current_stage": "terminated",
+            "overall_task_status": "terminated",
+            "messages": [AIMessage(content="任务链已被终止")]
+        }
+
+    def _should_terminate_or_error_chain(self, state: TaskChainState) -> str:
+        """
+        检查任务链是否应该终止或发生错误
+
+        Args:
+            state: 任务链状态
+
+        Returns:
+            str: "terminate", "error", 或 "continue"
+
+        Note:
+            这是条件边判断方法,用于决定工作流的下一步走向
+            1. 优先检查终止信号
+            2. 检查是否有错误
+            3. 都没有则继续执行
+        """
+        # 检查终止状态
+        if state.get("overall_task_status") == "terminated":
+            return "terminate"
+
+        # 检查错误状态
+        if state.get("overall_task_status") == "failed" or state.get("error_message"):
+            return "error"
+
+        # 默认继续执行
+        return "continue"
+
+    async def _save_complete_results(self, state: TaskChainState, report_result: Dict[str, Any]):
+        """
+        保存完整结果(方案A-方式1:一次性保存)
+
+        Args:
+            state: 任务链状态
+            report_result: 报告生成结果
+
+        Note:
+            根据决策2(方案A-方式1),在报告工作流完成后一次性保存完整结果
+            包含:文档处理结果 + AI审查结果 + 报告生成结果
+        """
+        try:
+            import json
+            import os
+
+            logger.info(f"开始保存完整结果: {state['callback_task_id']}")
+
+            # 创建 temp 目录
+            temp_dir = "temp"
+            os.makedirs(temp_dir, exist_ok=True)
+
+            # 构建完整结果
+            complete_results = {
+                "callback_task_id": state["callback_task_id"],
+                "file_id": state["file_id"],
+                "file_name": state["file_name"],
+                "user_id": state["user_id"],
+                "overall_task_status": "processing",  # 此时还在处理中,complete节点才标记completed
+                "stage_status": state["stage_status"],
+                "document_result": state.get("document_result"),
+                "ai_review_result": state.get("ai_review_result"),
+                "report_result": report_result,
+                "timestamp": datetime.now().isoformat()
+            }
+
+            # 保存到文件
+            file_path = os.path.join(temp_dir, f"{state['callback_task_id']}.json")
+            with open(file_path, 'w', encoding='utf-8') as f:
+                json.dump(complete_results, f, ensure_ascii=False, indent=2)
+
+            logger.info(f"完整结果已保存到: {file_path}")
+
+        except Exception as e:
+            logger.error(f"保存完整结果失败: {str(e)}", exc_info=True)
+            raise

+ 35 - 28
core/construction_review/component/ai_review_engine.py

@@ -868,7 +868,7 @@ class AIReviewEngine(BaseReviewer):
             # 调用大模型得到敏感词审查结果
             # 调用大模型得到敏感词审查结果
             return await self.review("sensitive_check", trace_id, "basic", "sensitive_word_check",
             return await self.review("sensitive_check", trace_id, "basic", "sensitive_word_check",
                                    review_content, formatted_sensitive_words,
                                    review_content, formatted_sensitive_words,
-                                   None, None, state, stage_name, timeout = 60, model_name="qwen3_30b")
+                                   None, state, stage_name, timeout=60, model_name="qwen3_30b")
         else:
         else:
             # 没有检测到敏感词,构造返回体
             # 没有检测到敏感词,构造返回体
             logger.info("没有检测到敏感词,未进入二审")
             logger.info("没有检测到敏感词,未进入二审")
@@ -1019,33 +1019,40 @@ class AIReviewEngine(BaseReviewer):
             # 从 Redis 查询目录审查结果,使用 chapter_label 作为 title 查询
             # 从 Redis 查询目录审查结果,使用 chapter_label 作为 title 查询
             redis_manager = get_redis_manager()
             redis_manager = get_redis_manager()
             chapter_labels = miss_outline_df['chapter_label'].unique().tolist()
             chapter_labels = miss_outline_df['chapter_label'].unique().tolist()
-            redis_data = redis_manager.read_catalogues_data_by_chapters(state['callback_task_id'], chapter_labels)
-            path_redis = 'temp/document_temp/redis_data.csv'
-
-            # 去除两个DataFrame中相同chapter_label行的miss_outline列与missing_items列的公共元素
-            miss_outline_df, redis_data, common_elements_list = remove_common_elements_between_dataframes(miss_outline_df, redis_data)
-            logger.info(f"[大纲审查] 公共元素列表: {common_elements_list}")
-            
-            # 将更新后的数据写回Redis
-            for index, row in redis_data.iterrows():
-                chapter_label = row['chapter_label']
-                # 准备要更新的数据
-                update_data = {
-                    'title': chapter_label,
-                    'chapter_label': chapter_label,
-                    'chapter_classification': row.get('chapter_classification', ''),
-                    'missing_items': row.get('missing_items', []),
-                    'common_elements_list': common_elements_list,
-                    'miss_outline': miss_outline_df.loc[miss_outline_df['chapter_label'] == chapter_label, 'miss_outline'].values[0]
-                                   if len(miss_outline_df.loc[miss_outline_df['chapter_label'] == chapter_label]) > 0 else [],
-                    'specification_items': row.get('specification_items', []),
-                }
-                # 使用 update_row_by_title 方法更新Redis中的数据
-                update_success = redis_manager.update_row_by_title(state['callback_task_id'], chapter_label, update_data)
-                if update_success:
-                    logger.info(f"[大纲审查] 成功将章节 '{chapter_label}' 的更新数据写回Redis")
-                else:
-                    logger.warning(f"[大纲审查] 未能将章节 '{chapter_label}' 的数据写回Redis")
+            # 过滤掉不需要查询Redis的特殊章节(如"目录")
+            chapter_labels = [label for label in chapter_labels if label not in ['目录']]
+
+            # 只有当存在需要查询的章节时,才进行Redis操作
+            if chapter_labels:
+                redis_data = redis_manager.read_catalogues_data_by_chapters(state['callback_task_id'], chapter_labels)
+                path_redis = 'temp/document_temp/redis_data.csv'
+
+                # 去除两个DataFrame中相同chapter_label行的miss_outline列与missing_items列的公共元素
+                miss_outline_df, redis_data, common_elements_list = remove_common_elements_between_dataframes(miss_outline_df, redis_data)
+                logger.info(f"[大纲审查] 公共元素列表: {common_elements_list}")
+
+                # 将更新后的数据写回Redis
+                for index, row in redis_data.iterrows():
+                    chapter_label = row['chapter_label']
+                    # 准备要更新的数据
+                    update_data = {
+                        'title': chapter_label,
+                        'chapter_label': chapter_label,
+                        'chapter_classification': row.get('chapter_classification', ''),
+                        'missing_items': row.get('missing_items', []),
+                        'common_elements_list': common_elements_list,
+                        'miss_outline': miss_outline_df.loc[miss_outline_df['chapter_label'] == chapter_label, 'miss_outline'].values[0]
+                                       if len(miss_outline_df.loc[miss_outline_df['chapter_label'] == chapter_label]) > 0 else [],
+                        'specification_items': row.get('specification_items', []),
+                    }
+                    # 使用 update_row_by_title 方法更新Redis中的数据
+                    update_success = redis_manager.update_row_by_title(state['callback_task_id'], chapter_label, update_data)
+                    if update_success:
+                        logger.info(f"[大纲审查] 成功将章节 '{chapter_label}' 的更新数据写回Redis")
+                    else:
+                        logger.warning(f"[大纲审查] 未能将章节 '{chapter_label}' 的数据写回Redis")
+            else:
+                logger.info(f"[大纲审查] 过滤后没有需要查询Redis的章节,跳过Redis操作")
             
             
             # if os.path.exists(path_redis):
             # if os.path.exists(path_redis):
             #     # 文件已存在,追加时不写表头
             #     # 文件已存在,追加时不写表头

+ 3 - 0
core/construction_review/component/report/__init__.py

@@ -0,0 +1,3 @@
+"""
+报告模块
+"""

+ 46 - 0
core/construction_review/component/report/prompt/report_reviewers.yaml

@@ -0,0 +1,46 @@
+# 报告摘要生成提示词配置
+
+# 报告摘要生成(基于高中风险问题)
+report_summary:
+  system_prompt: |
+    你是一名专业的施工方案审查专家,负责分析审查结果并生成专业的摘要报告。
+    你的任务是基于AI审查发现的高风险和中风险问题,生成清晰、专业的审查报告摘要。
+
+    输出要求:
+    - 总体评价需准确反映施工方案整体质量(100-150字)
+    - 高风险问题汇总需抓住核心要点(每个问题20-30字)
+    - 中风险问题汇总需简洁明了(每个问题20-30字)
+    - 改进建议需具体可执行(3-5条,每条30-50字)
+    - 风险提示需突出重点关注点(2-3条)
+    - 必须以JSON格式输出,结构完整规范
+
+  user_prompt_template: |
+    请基于以下施工方案审查结果生成专业的审查报告摘要:
+
+    ## 审查统计
+    - 文件名称: {file_name}
+    - 高风险问题数量: {high_risk_count}
+    - 中风险问题数量: {medium_risk_count}
+    - 审查总问题数: {total_issues}
+
+    ## 高风险问题清单
+    {high_risk_issues_text}
+
+    ## 中风险问题清单
+    {medium_risk_issues_text}
+
+    请生成包含以下内容的审查报告摘要:
+    1. **总体评价**: 对施工方案整体质量的评价(100-150字)
+    2. **高风险问题汇总**: 高风险问题的核心要点汇总(每个问题20-30字)
+    3. **中风险问题汇总**: 中风险问题的核心要点汇总(每个问题20-30字)
+    4. **改进建议**: 针对发现问题的具体改进建议(3-5条,每条30-50字)
+    5. **风险提示**: 需要重点关注的风险点(2-3条)
+
+    请以JSON格式输出,结构如下:
+    {{
+      "overall_assessment": "总体评价文本",
+      "high_risk_summary": ["高风险问题1汇总", "高风险问题2汇总", ...],
+      "medium_risk_summary": ["中风险问题1汇总", "中风险问题2汇总", ...],
+      "improvement_recommendations": ["建议1", "建议2", ...],
+      "risk_alerts": ["风险提示1", "风险提示2", ...]
+    }}

+ 341 - 233
core/construction_review/component/report_generator.py

@@ -1,292 +1,400 @@
 """
 """
 报告生成器
 报告生成器
-负责生成审查报告和多维评分
+负责基于AI审查结果生成审查报告摘要
 """
 """
 
 
 import asyncio
 import asyncio
+import json
+import time
+import os
 from typing import Dict, List, Any, Optional, Callable
 from typing import Dict, List, Any, Optional, Callable
 from dataclasses import dataclass
 from dataclasses import dataclass
 from datetime import datetime
 from datetime import datetime
-import json
 
 
 from foundation.observability.logger.loggering import server_logger as logger
 from foundation.observability.logger.loggering import server_logger as logger
-
-@dataclass
-class DimensionScores:
-    """四维评分"""
-    safety: int      # 安全维度评分
-    quality: int     # 质量维度评分
-    schedule: int    # 进度维度评分
-    cost: int        # 成本维度评分
+from foundation.ai.agent.generate.model_generate import generate_model_client
+from core.construction_review.component.reviewers.utils.prompt_loader import PromptLoader
 
 
 @dataclass
 @dataclass
 class FinalReport:
 class FinalReport:
     """最终报告"""
     """最终报告"""
     file_id: str
     file_id: str
-    document_name: str
-    risk_stats: Dict[str, int]
-    dimension_scores: DimensionScores
-    summary_report: str
-    multidimensional_report: str
-    recommendations: List[str]
+    file_name: str
+    total_issues: int
+    high_risk_count: int
+    medium_risk_count: int
+    low_risk_count: int
+    overall_assessment: str
+    high_risk_summary: List[str]
+    medium_risk_summary: List[str]
+    improvement_recommendations: List[str]
+    risk_alerts: List[str]
     generated_at: datetime
     generated_at: datetime
 
 
 class ReportGenerator:
 class ReportGenerator:
-    """报告生成器"""
+    """报告生成器 - 基于高中风险问题使用LLM生成摘要"""
 
 
     def __init__(self):
     def __init__(self):
-        self.risk_weight_mapping = {
-            "high": 3,
-            "medium": 2,
-            "low": 1
-        }
+        """初始化报告生成器"""
+        self.model_client = generate_model_client
+
+        # 获取提示词加载器实例(使用全局单例)
+        self.prompt_loader = PromptLoader()
+
+        # 临时修改 prompt_config_dir 以加载 report 类型的 prompt
+        current_dir = os.path.dirname(os.path.abspath(__file__))
+        prompt_dir = os.path.join(current_dir, 'report', 'prompt')
+        original_prompt_dir = self.prompt_loader.prompt_config_dir
+
+        try:
+            # 临时切换到 report prompt 目录
+            self.prompt_loader.prompt_config_dir = prompt_dir
+
+            # 手动加载 report 类型的 prompt
+            self.prompt_loader._load_prompt('report', 'report_summary')
+            logger.info(f"成功加载 report_summary prompt,目录: {prompt_dir}")
+
+        except Exception as e:
+            logger.error(f"加载 report_summary prompt 失败: {str(e)}", exc_info=True)
+        finally:
+            # 恢复原始目录
+            self.prompt_loader.prompt_config_dir = original_prompt_dir
 
 
-    async def generate_report(self, file_id: str, review_results: Dict[str, Any],
+        logger.info(f"报告生成器初始化完成")
+
+    async def generate_report(self, file_id: str, file_name: str, review_results: Dict[str, Any],
                             progress_callback: Optional[Callable[[int, str], None]] = None) -> FinalReport:
                             progress_callback: Optional[Callable[[int, str], None]] = None) -> FinalReport:
         """
         """
-        生成审查报告
+        生成审查报告(基于高中风险问题)
 
 
         Args:
         Args:
             file_id: 文件ID
             file_id: 文件ID
-            review_results: AI审查结果
+            file_name: 文件名称
+            review_results: AI审查结果(包含issues列表)
             progress_callback: 进度回调函数
             progress_callback: 进度回调函数
 
 
         Returns:
         Returns:
             FinalReport: 最终报告
             FinalReport: 最终报告
         """
         """
+        start_time = time.time()
+
         try:
         try:
-            logger.info(f"开始生成报告,文件ID: {file_id}")
+            logger.info(f"开始生成报告,文件ID: {file_id}, 文件名: {file_name}")
 
 
             if progress_callback:
             if progress_callback:
-                progress_callback(20, "汇总审查结果")
-
-            # 1. 数据汇总
-            aggregated_data = await self._aggregate_results(review_results)
+                progress_callback(20, "提取高中风险问题")
 
 
-            if progress_callback:
-                progress_callback(40, "计算多维评分")
+            # 1. 提取高中风险问题
+            high_risk_issues, medium_risk_issues, low_risk_count = await self._extract_high_medium_risk_issues(review_results)
+            total_issues = len(high_risk_issues) + len(medium_risk_issues) + low_risk_count
 
 
-            # 2. 计算四维评分
-            dimension_scores = await self._calculate_dimension_scores(aggregated_data)
+            logger.info(f"问题统计 - 高风险: {len(high_risk_issues)}, 中风险: {len(medium_risk_issues)}, 低风险: {low_risk_count}, 总计: {total_issues}")
 
 
             if progress_callback:
             if progress_callback:
-                progress_callback(60, "生成总结报告")
-
-            # 3. 生成总结报告
-            summary_report = await self._generate_summary(aggregated_data, dimension_scores)
+                progress_callback(50, "调用LLM生成报告摘要")
+
+            # 2. 调用LLM生成摘要
+            llm_summary = await self._call_llm_for_summary(
+                file_name=file_name,
+                high_risk_issues=high_risk_issues,
+                medium_risk_issues=medium_risk_issues,
+                total_issues=total_issues,
+                trace_id=file_id
+            )
 
 
             if progress_callback:
             if progress_callback:
-                progress_callback(80, "生成多维报告")
+                progress_callback(80, "解析LLM响应")
 
 
-            # 4. 生成多维报告
-            multidimensional_report = await self._generate_multidimensional_report(
-                aggregated_data, dimension_scores
-            )
+            # 3. 解析LLM响应
+            parsed_summary = self._parse_llm_response(llm_summary)
 
 
             if progress_callback:
             if progress_callback:
                 progress_callback(100, "报告生成完成")
                 progress_callback(100, "报告生成完成")
 
 
-            # 5. 生成建议
-            recommendations = await self._generate_recommendations(aggregated_data, dimension_scores)
-
+            # 4. 构建最终报告
             final_report = FinalReport(
             final_report = FinalReport(
                 file_id=file_id,
                 file_id=file_id,
-                document_name=f"施工方案审查报告_{file_id}",
-                risk_stats=aggregated_data.get('risk_distribution', {}),
-                dimension_scores=dimension_scores,
-                summary_report=summary_report,
-                multidimensional_report=multidimensional_report,
-                recommendations=recommendations,
+                file_name=file_name,
+                total_issues=total_issues,
+                high_risk_count=len(high_risk_issues),
+                medium_risk_count=len(medium_risk_issues),
+                low_risk_count=low_risk_count,
+                overall_assessment=parsed_summary.get("overall_assessment", ""),
+                high_risk_summary=parsed_summary.get("high_risk_summary", []),
+                medium_risk_summary=parsed_summary.get("medium_risk_summary", []),
+                improvement_recommendations=parsed_summary.get("improvement_recommendations", []),
+                risk_alerts=parsed_summary.get("risk_alerts", []),
                 generated_at=datetime.now()
                 generated_at=datetime.now()
             )
             )
 
 
-            logger.info(f"报告生成完成,文件ID: {file_id}")
+            execution_time = time.time() - start_time
+            logger.info(f"报告生成完成,文件ID: {file_id}, 耗时: {execution_time:.2f}秒")
+
             return final_report
             return final_report
 
 
         except Exception as e:
         except Exception as e:
-            logger.error(f"报告生成失败: {str(e)}")
+            logger.error(f"报告生成失败: {str(e)}", exc_info=True)
             raise
             raise
 
 
-    async def _aggregate_results(self, review_results: Dict[str, Any]) -> Dict[str, Any]:
-        """汇总审查结果"""
-        summary = review_results.get('summary', {})
-        risk_stats = summary.get('risk_distribution', {})
+    async def _extract_high_medium_risk_issues(self, review_results: Dict[str, Any]) -> tuple:
+        """
+        从审查结果中提取高中风险问题
 
 
-        # 按风险等级分类问题
-        issues_by_risk = {
-            "high": [],
-            "medium": [],
-            "low": []
-        }
+        Args:
+            review_results: AI审查结果
+
+        Returns:
+            tuple: (high_risk_issues, medium_risk_issues, low_risk_count)
+        """
+        high_risk_issues = []
+        medium_risk_issues = []
+        low_risk_count = 0
+
+        # 获取issues列表(键名为 'review_results')
+        issues = review_results.get('review_results', [])
+
+        logger.info(f"开始提取风险问题,issues数量: {len(issues)}")
+
+        for issue_wrapper in issues:
+            # issue_wrapper是一个字典,key是问题ID,value是问题详情
+            for issue_id, issue_detail in issue_wrapper.items():
+                # 获取review_lists
+                review_lists = issue_detail.get('review_lists', [])
+
+                # 获取元数据
+                metadata = issue_detail.get('metadata', {})
+                location = metadata.get('review_location_label', '未知位置')
+
+                for review_item in review_lists:
+                    # 只处理exist_issue=True的项
+                    if not review_item.get('exist_issue', False):
+                        continue
+
+                    # 获取检查结果(check_result 可能是字典或字符串)
+                    check_result = review_item.get('check_result', {})
+
+                    # 如果 check_result 是字符串,转换为字典格式
+                    if isinstance(check_result, str):
+                        check_result = {'issue_point': check_result}
+                    elif not isinstance(check_result, dict):
+                        check_result = {}
+
+                    # 获取风险级别
+                    risk_level_raw = check_result.get('risk_level', '').lower() if isinstance(check_result, dict) else ''
+
+                    # 统一风险级别格式(支持"高风险"、"high"等多种格式)
+                    if '高' in risk_level_raw or 'high' in risk_level_raw:
+                        risk_level = 'high'
+                    elif '中' in risk_level_raw or 'medium' in risk_level_raw:
+                        risk_level = 'medium'
+                    elif '低' in risk_level_raw or 'low' in risk_level_raw:
+                        risk_level = 'low'
+                    else:
+                        # 默认按risk_info判断
+                        risk_info = review_item.get('risk_info', {})
+                        if isinstance(risk_info, dict):
+                            risk_level = risk_info.get('risk_level', 'low')
+                        else:
+                            risk_level = 'low'
+
+                    # 获取检查项类型
+                    check_item = review_item.get('check_item', 'unknown')
+                    check_item_code = review_item.get('check_item_code', '')
+
+                    # 构建问题对象(包含检查项信息)
+                    issue_obj = {
+                        'check_item': check_item,
+                        'check_item_code': check_item_code,
+                        'location': location,
+                        'issue_point': check_result.get('issue_point', '') if isinstance(check_result, dict) else str(check_result),
+                        'suggestion': check_result.get('suggestion', '') if isinstance(check_result, dict) else '',
+                        'reason': check_result.get('reason', '') if isinstance(check_result, dict) else '',
+                        'reference_source': check_result.get('reference_source', '') if isinstance(check_result, dict) else ''
+                    }
+
+                    # 按风险级别分类
+                    if risk_level == 'high':
+                        high_risk_issues.append(issue_obj)
+                    elif risk_level == 'medium':
+                        medium_risk_issues.append(issue_obj)
+                    else:
+                        low_risk_count += 1
+
+        logger.info(f"风险问题提取完成 - 高风险: {len(high_risk_issues)}, 中风险: {len(medium_risk_issues)}, 低风险: {low_risk_count}")
+
+        return high_risk_issues, medium_risk_issues, low_risk_count
+
+    async def _call_llm_for_summary(self, file_name: str, high_risk_issues: List[Dict],
+                                   medium_risk_issues: List[Dict], total_issues: int, trace_id: str) -> str:
+        """
+        调用LLM生成报告摘要
+
+        Args:
+            file_name: 文件名称
+            high_risk_issues: 高风险问题列表
+            medium_risk_issues: 中风险问题列表
+            total_issues: 总问题数
+            trace_id: 追踪ID
 
 
-        # 分析每个审查单元的问题
-        for result in review_results.get('review_results', []):
-            risk_level = result.overall_risk
-            issues_by_risk[risk_level].append({
-                'unit_index': result.unit_index,
-                'chapter': result.unit_content.get('chapter', ''),
-                'title': result.unit_content.get('title', ''),
-                'content': result.unit_content.get('original_content', ''),
-                'basic_issues': self._extract_issues(result.basic_compliance),
-                'technical_issues': self._extract_issues(result.technical_compliance),
-                'rag_suggestions': result.rag_enhanced.get('enhanced_suggestions', [])
-            })
-
-        return {
-            'risk_distribution': risk_stats,
-            'issues_by_risk': issues_by_risk,
-            'total_units': review_results.get('total_units', 0),
-            'successful_units': review_results.get('successful_units', 0),
-            'failed_units': review_results.get('failed_units', 0)
+        Returns:
+            str: LLM生成的摘要文本(JSON格式)
+        """
+        try:
+            # 构建问题清单文本
+            high_risk_text = self._format_issues_text(high_risk_issues)
+            medium_risk_text = self._format_issues_text(medium_risk_issues)
+
+            # 准备提示词参数
+            prompt_kwargs = {
+                "file_name": file_name,
+                "high_risk_count": len(high_risk_issues),
+                "medium_risk_count": len(medium_risk_issues),
+                "total_issues": total_issues,
+                "high_risk_issues_text": high_risk_text,
+                "medium_risk_issues_text": medium_risk_text
+            }
+
+            # 获取提示词模板
+            task_prompt_info = {
+                "task_prompt": self.prompt_loader.get_prompt_template("report", "report_summary", **prompt_kwargs),
+                "task_name": "报告摘要生成"
+            }
+
+            logger.debug(f"开始调用LLM生成报告摘要")
+
+            # 调用模型(参考base_reviewer.py的实现)
+            model_response = await self.model_client.get_model_generate_invoke(
+                trace_id=trace_id,
+                task_prompt_info=task_prompt_info,
+                timeout=60,
+                model_name="qwen3_30b"
+            )
+
+            logger.info(f"LLM摘要生成成功,响应长度: {len(model_response)} 字符")
+
+            return model_response
+
+        except Exception as e:
+            logger.error(f"调用LLM生成摘要失败: {str(e)}", exc_info=True)
+            raise
+
+    def _format_issues_text(self, issues: List[Dict]) -> str:
+        """
+        格式化问题列表为文本(按 check_item 分组)
+
+        Args:
+            issues: 问题列表
+
+        Returns:
+            str: 格式化后的问题文本
+        """
+        if not issues:
+            return "(无)"
+
+        # 按 check_item 分组
+        from collections import defaultdict
+        grouped_issues = defaultdict(list)
+        for issue in issues:
+            check_item = issue.get('check_item', 'unknown')
+            grouped_issues[check_item].append(issue)
+
+        # 检查项名称映射(中文)
+        check_item_names = {
+            'timeliness_check': '时效性审查',
+            'completeness_check': '完整性审查',
+            'semantic_logic_check': '语义逻辑审查',
+            'reference_check': '参考文献审查',
+            'sensitive_word_check': '敏感词审查',
+            'mandatory_standards_check': '强制性标准审查',
+            'technical_parameters_check': '技术参数审查',
+            'design_values_check': '设计值审查',
+            'rag_enhanced_review': 'RAG增强审查',
+            'professional_suggestion': '专业建议',
+            'unknown': '其他审查'
         }
         }
 
 
-    async def _calculate_dimension_scores(self, aggregated_data: Dict[str, Any]) -> DimensionScores:
-        """计算四维评分"""
-        risk_stats = aggregated_data.get('risk_distribution', {})
-        total_issues = risk_stats.get('high', 0) + risk_stats.get('medium', 0) + risk_stats.get('low', 0)
-        total_units = aggregated_data.get('total_units', 1)
-
-        # 基础评分计算
-        base_score = max(0, 100 - (risk_stats.get('high', 0) * 10) - (risk_stats.get('medium', 0) * 5) - (risk_stats.get('low', 0) * 2))
-
-        # 安全维度评分 (安全风险权重更高)
-        safety_score = max(0, base_score - (risk_stats.get('high', 0) * 15) - (risk_stats.get('medium', 0) * 8))
-
-        # 质量维度评分
-        quality_score = max(0, base_score - (risk_stats.get('high', 0) * 12) - (risk_stats.get('medium', 0) * 6))
-
-        # 进度维度评分
-        schedule_score = max(0, base_score - (risk_stats.get('medium', 0) * 8) - (risk_stats.get('low', 0) * 3))
-
-        # 成本维度评分
-        cost_score = max(0, base_score - (risk_stats.get('high', 0) * 10) - (risk_stats.get('medium', 0) * 5))
-
-        return DimensionScores(
-            safety=min(100, safety_score),
-            quality=min(100, quality_score),
-            schedule=min(100, schedule_score),
-            cost=min(100, cost_score)
-        )
-
-    async def _generate_summary(self, aggregated_data: Dict[str, Any], dimension_scores: DimensionScores) -> str:
-        """生成总结报告"""
-        risk_stats = aggregated_data.get('risk_distribution', {})
-        high_risk = risk_stats.get('high', 0)
-        medium_risk = risk_stats.get('medium', 0)
-        low_risk = risk_stats.get('low', 0)
-        total_issues = high_risk + medium_risk + low_risk
-
-        # 生成总结报告
-        summary_parts = []
-
-        # 整体评价
-        if high_risk == 0 and medium_risk <= 2:
-            summary_parts.append("该施工方案整体符合规范要求,质量良好。")
-        elif high_risk <= 2:
-            summary_parts.append("该施工方案基本符合规范要求,存在少量问题需要整改。")
-        else:
-            summary_parts.append("该施工方案存在较多高风险问题,需要重点整改。")
-
-        # 风险统计
-        summary_parts.append(f"发现风险问题 {total_issues} 个,其中高风险 {high_risk} 个,中风险 {medium_risk} 个,低风险 {low_risk} 个。")
-
-        # 评分说明
-        avg_score = (dimension_scores.safety + dimension_scores.quality +
-                     dimension_scores.schedule + dimension_scores.cost) / 4
-
-        if avg_score >= 85:
-            summary_parts.append(f"综合评分 {avg_score:.1f} 分,表现优秀。")
-        elif avg_score >= 70:
-            summary_parts.append(f"综合评分 {avg_score:.1f} 分,表现良好。")
-        else:
-            summary_parts.append(f"综合评分 {avg_score:.1f} 分,需要改进。")
-
-        return " ".join(summary_parts)
-
-    async def _generate_multidimensional_report(self, aggregated_data: Dict[str, Any],
-                                              dimension_scores: DimensionScores) -> str:
-        """生成多维报告"""
-        report_parts = []
-
-        # 安全维度分析
-        safety_desc = self._get_score_description(dimension_scores.safety, "安全")
-        report_parts.append(f"安全维度评分 {dimension_scores.safety} 分:{safety_desc}")
-
-        # 质量维度分析
-        quality_desc = self._get_score_description(dimension_scores.quality, "质量")
-        report_parts.append(f"质量维度评分 {dimension_scores.quality} 分:{quality_desc}")
-
-        # 进度维度分析
-        schedule_desc = self._get_score_description(dimension_scores.schedule, "进度")
-        report_parts.append(f"进度维度评分 {dimension_scores.schedule} 分:{schedule_desc}")
-
-        # 成本维度分析
-        cost_desc = self._get_score_description(dimension_scores.cost, "成本")
-        report_parts.append(f"成本维度评分 {dimension_scores.cost} 分:{cost_desc}")
-
-        # 综合建议
-        report_parts.append("\n综合建议:")
-        if dimension_scores.safety < 70:
-            report_parts.append("• 重点关注安全管理,完善安全技术措施。")
-        if dimension_scores.quality < 70:
-            report_parts.append("• 加强质量控制,完善施工工艺标准。")
-        if dimension_scores.schedule < 70:
-            report_parts.append("• 优化进度管理,确保工期可控。")
-        if dimension_scores.cost < 70:
-            report_parts.append("• 加强成本控制,避免预算超支。")
-
-        return "\n".join(report_parts)
-
-    async def _generate_recommendations(self, aggregated_data: Dict[str, Any],
-                                       dimension_scores: DimensionScores) -> List[str]:
-        """生成改进建议"""
-        recommendations = []
-
-        # 基于风险分布的建议
-        risk_stats = aggregated_data.get('risk_distribution', {})
-        high_risk_issues = aggregated_data.get('issues_by_risk', {}).get('high', [])
-
-        if high_risk_issues:
-            recommendations.append("立即处理高风险问题:")
-            for issue in high_risk_issues[:3]:  # 取前3个高风险问题
-                recommendations.append(f"• {issue.get('title', '未知章节')}:{issue.get('content', '')[:50]}...")
-
-        # 基于评分的建议
-        if dimension_scores.safety < 80:
-            recommendations.append("完善安全管理体系:")
-            recommendations.append("• 补充安全技术交底记录")
-            recommendations.append("• 加强现场安全防护措施")
-
-        if dimension_scores.quality < 80:
-            recommendations.append("提升施工质量:")
-            recommendations.append("• 完善施工工艺标准")
-            recommendations.append("• 加强质量检查频次")
-
-        if dimension_scores.schedule < 80:
-            recommendations.append("优化进度管理:")
-            recommendations.append("• 细化施工进度计划")
-            recommendations.append("• 建立进度预警机制")
-
-        return recommendations
-
-    def _extract_issues(self, compliance_result: Dict[str, Any]) -> List[str]:
-        """提取问题描述"""
-        if isinstance(compliance_result, dict) and 'error' in compliance_result:
-            return [f"检查失败: {compliance_result['error']}"]
-
-        # 根据实际审查结果结构提取问题
-        # 这里需要根据实际的审查结果结构来实现
-        return []
-
-    def _get_score_description(self, score: int, dimension: str) -> str:
-        """获取评分描述"""
-        if score >= 90:
-            return f"{dimension}管理优秀,符合规范要求,无明显风险。"
-        elif score >= 80:
-            return f"{dimension}管理良好,基本符合规范,存在少量细节问题。"
-        elif score >= 70:
-            return f"{dimension}管理一般,需要改进部分环节。"
-        elif score >= 60:
-            return f"{dimension}管理较差,存在较多问题需要整改。"
-        else:
-            return f"{dimension}管理差,存在严重问题,需要全面整改。"
+        result = []
+        for check_item, item_issues in sorted(grouped_issues.items()):
+            # 添加分组标题
+            check_item_name = check_item_names.get(check_item, check_item)
+            result.append(f"## 【{check_item_name}】({len(item_issues)}个问题)")
+            result.append("")
+
+            # 添加该组的所有问题
+            for idx, issue in enumerate(item_issues, 1):
+                result.append(f"### 问题 {idx}")
+                result.append(f"- 位置: {issue.get('location', '未知')}")
+                result.append(f"- 问题点: {issue.get('issue_point', '未知')}")
+                result.append(f"- 建议: {issue.get('suggestion', '无')}")
+                result.append(f"- 依据: {issue.get('reason', '无')}")
+
+                reference_source = issue.get('reference_source', '无')
+                if reference_source and reference_source != '无':
+                    result.append(f"- 参考来源: {reference_source}")
+
+                result.append("")  # 空行分隔
+
+            result.append("")  # 分组之间额外空行
+
+        return "\n".join(result)
+
+    def _parse_llm_response(self, llm_response: str) -> Dict[str, Any]:
+        """
+        解析LLM响应
+
+        Args:
+            llm_response: LLM返回的JSON字符串
+
+        Returns:
+            Dict: 解析后的摘要字典
+        """
+        try:
+            # 移除可能的markdown代码块标记
+            cleaned_response = llm_response.strip()
+            if cleaned_response.startswith('```json'):
+                cleaned_response = cleaned_response[7:]
+            if cleaned_response.startswith('```'):
+                cleaned_response = cleaned_response[3:]
+            if cleaned_response.endswith('```'):
+                cleaned_response = cleaned_response[:-3]
+
+            cleaned_response = cleaned_response.strip()
+
+            # 解析JSON
+            parsed = json.loads(cleaned_response)
+
+            # 验证必需字段
+            required_fields = [
+                "overall_assessment",
+                "high_risk_summary",
+                "medium_risk_summary",
+                "improvement_recommendations",
+                "risk_alerts"
+            ]
+
+            for field in required_fields:
+                if field not in parsed:
+                    logger.warning(f"LLM响应缺少必需字段: {field},使用默认值")
+                    if field == "overall_assessment":
+                        parsed[field] = "无法生成总体评价"
+                    else:
+                        parsed[field] = []
+
+            logger.info("LLM响应解析成功")
+            return parsed
+
+        except json.JSONDecodeError as e:
+            logger.error(f"解析LLM响应失败: {str(e)}")
+            logger.error(f"原始响应: {llm_response[:500]}...")  # 打印前500字符用于调试
+
+            # 返回默认结构
+            return {
+                "overall_assessment": "报告生成失败,请重试",
+                "high_risk_summary": [],
+                "medium_risk_summary": [],
+                "improvement_recommendations": [],
+                "risk_alerts": []
+            }
+        except Exception as e:
+            logger.error(f"解析LLM响应时发生未知错误: {str(e)}", exc_info=True)
+            raise

+ 4 - 1
core/construction_review/component/reviewers/catalogues_check/utils/redis_utils.py

@@ -444,7 +444,10 @@ class CataloguesRedisManager:
                 all_data = self.read_all(task_id)
                 all_data = self.read_all(task_id)
                 if not all_data.empty:
                 if not all_data.empty:
                     # 查找 chapter_label 字段匹配的行
                     # 查找 chapter_label 字段匹配的行
-                    matched_rows = all_data[all_data.get('chapter_label', '') == chapter_label]
+                    if 'chapter_label' in all_data.columns:
+                        matched_rows = all_data[all_data['chapter_label'] == chapter_label]
+                    else:
+                        matched_rows = pd.DataFrame()
                     if not matched_rows.empty:
                     if not matched_rows.empty:
                         redis_row = matched_rows.iloc[0].to_dict()
                         redis_row = matched_rows.iloc[0].to_dict()
                         logger.info(f"[Redis] 通过 chapter_label 字段匹配到数据: {chapter_label}")
                         logger.info(f"[Redis] 通过 chapter_label 字段匹配到数据: {chapter_label}")

+ 52 - 58
core/construction_review/workflows/ai_review_workflow.py

@@ -393,15 +393,16 @@ class AIReviewWorkflow:
                     "chapter": "目录",
                     "chapter": "目录",
                     "title": "目录",
                     "title": "目录",
                     "original_content": "full_content",
                     "original_content": "full_content",
-                    "is_complete_field": False
+                    "is_complete_field": True
                 }
                 }
             ]
             ]
             chapter_chunks_map, chapter_names = self.core_fun._group_chunks_by_chapter(filtered_chunks)
             chapter_chunks_map, chapter_names = self.core_fun._group_chunks_by_chapter(filtered_chunks)
-            # with open("temp/filtered_chunks/chapter_chunks_map.json", "w", encoding="utf-8") as f:
-            #      json.dump(chapter_chunks_map, f, ensure_ascii=False, indent=4)
+
             chapter_chunks_map["catalogue"] = catalogue
             chapter_chunks_map["catalogue"] = catalogue
             review_item_dict_sorted["catalogue"] = ["check_completeness"]
             review_item_dict_sorted["catalogue"] = ["check_completeness"]
             logger.info(f"内容分组完成,共 {len(chapter_chunks_map)} 个章节")
             logger.info(f"内容分组完成,共 {len(chapter_chunks_map)} 个章节")
+            with open("temp/filtered_chunks/chapter_chunks_map.json", "w", encoding="utf-8") as f:
+                json.dump(chapter_chunks_map, f, ensure_ascii=False, indent=4)
             await self.core_fun._send_start_review_progress(state,total_chunks, chapter_names)
             await self.core_fun._send_start_review_progress(state,total_chunks, chapter_names)
             # 6️ 按章节处理
             # 6️ 按章节处理
             for chapter_idx, (chapter_code, func_names) in enumerate(review_item_dict_sorted.items()):
             for chapter_idx, (chapter_code, func_names) in enumerate(review_item_dict_sorted.items()):
@@ -432,9 +433,17 @@ class AIReviewWorkflow:
 
 
             # 7️ 汇总结果
             # 7️ 汇总结果
             summary = self.inter_tool._aggregate_results(all_issues)
             summary = self.inter_tool._aggregate_results(all_issues)
-            
-            # 8️ 构建完整的响应结构
+
+            # 8️ 构建完整的响应结构(兼容 execute() 方法的期望格式)
             review_results = {
             review_results = {
+                # 兼容旧版格式的字段
+                "total_units": total_chunks,
+                "successful_units": completed_chunks,
+                "failed_units": max(0, total_chunks - completed_chunks),
+                "review_results": all_issues,  # 审查结果列表
+                "summary": summary,
+
+                # 额外的元信息
                 "callback_task_id": state["callback_task_id"],
                 "callback_task_id": state["callback_task_id"],
                 "file_name": state.get("file_name", ""),
                 "file_name": state.get("file_name", ""),
                 "user_id": state["user_id"],
                 "user_id": state["user_id"],
@@ -442,12 +451,10 @@ class AIReviewWorkflow:
                 "stage_name": "审查项检查结果",
                 "stage_name": "审查项检查结果",
                 "status": "full_review_result",
                 "status": "full_review_result",
                 "message": f"审查项检查完成,共发现{summary.get('total_issues', 0)}个问题",
                 "message": f"审查项检查完成,共发现{summary.get('total_issues', 0)}个问题",
-                "overall_task_status": "completed",
-                "updated_at": int(time.time()),
-                "issues": all_issues
+                "updated_at": int(time.time())
             }
             }
 
 
-            logger.info(f"AI审查项检查执行成功,任务ID: {state['callback_task_id']}, 共发现{summary.get('total_issues', 0)}个问题")
+            logger.info(f"AI审查项检查执行成功,任务ID: {state['callback_task_id']}, 总单元数: {total_chunks}, 已完成: {completed_chunks}, 共发现{summary.get('total_issues', 0)}个问题")
 
 
             # 返回新的状态
             # 返回新的状态
             return {
             return {
@@ -665,8 +672,16 @@ class AIReviewWorkflow:
                 if unit_issues and isinstance(unit_issues, list):
                 if unit_issues and isinstance(unit_issues, list):
                     all_issues.extend(unit_issues)
                     all_issues.extend(unit_issues)
 
 
-            # 构建符合格式的review_results
+            # 构建符合格式的review_results(兼容 execute() 方法的期望格式)
             review_results = {
             review_results = {
+                # 兼容旧版格式的字段
+                "total_units": total_units,
+                "successful_units": len(successful_results),
+                "failed_units": max(0, total_units - len(successful_results)),
+                "review_results": all_issues,  # 审查结果列表
+                "summary": summary,
+
+                # 额外的元信息
                 "callback_task_id": state["callback_task_id"],
                 "callback_task_id": state["callback_task_id"],
                 "file_name": state.get("file_name", ""),
                 "file_name": state.get("file_name", ""),
                 "user_id": state["user_id"],
                 "user_id": state["user_id"],
@@ -674,12 +689,10 @@ class AIReviewWorkflow:
                 "stage_name": "完整审查结果",
                 "stage_name": "完整审查结果",
                 "status": "full_review_result",
                 "status": "full_review_result",
                 "message": f"审查完成,共发现{summary.get('total_issues', 0)}个问题",
                 "message": f"审查完成,共发现{summary.get('total_issues', 0)}个问题",
-                "overall_task_status": "completed",
-                "updated_at": int(time.time()),
-                "issues": all_issues
+                "updated_at": int(time.time())
             }
             }
 
 
-            logger.info(f"AI审查节点执行成功,任务ID: {state['callback_task_id']}")
+            logger.info(f"AI审查节点执行成功,任务ID: {state['callback_task_id']}, 总单元数: {total_units}, 成功: {len(successful_results)}")
 
 
             # 返回新的状态,避免原地修改导致的LangGraph冲突
             # 返回新的状态,避免原地修改导致的LangGraph冲突
             return {
             return {
@@ -711,53 +724,30 @@ class AIReviewWorkflow:
             AIReviewState: 更新后的工作流状态
             AIReviewState: 更新后的工作流状态
 
 
         Note:
         Note:
-            当前实现:将审查结果以JSON格式保存到temp目录
-            文件名:callback_task_id.json
-            未来规划:使用SQL语句存储到数据库
+            ⚠️ 根据方案D + 决策2(方案A-方式1)的优化:
+            - AI审查工作流作为子图嵌套在外层任务链中时,不在此处保存结果
+            - 外层任务链会在报告生成完成后一次性保存完整结果(文档 + AI审查 + 报告)
+            - 这解决了原有的"AI审查完成就保存,导致结果不完整"的问题
+            - 当前实现:跳过保存,仅返回状态更新
         """
         """
         try:
         try:
-            logger.info(f"开始保存审查结果,任务ID: {state['callback_task_id']}")
-
-            # 创建temp目录(如果不存在)
-            temp_dir = "temp"
-            os.makedirs(temp_dir, exist_ok=True)
+            logger.info(f"AI审查子图完成,跳过保存(由外层任务链统一保存),任务ID: {state['callback_task_id']}")
 
 
-            # 构建文件路径
-            file_path = os.path.join(temp_dir, f"{state['callback_task_id']}.json")
-
-            # 直接获取并保存review_results数据
-            review_results = state.get("review_results", {})
-
-            # 保存review_results到本地JSON文件
-            with open(file_path, 'w', encoding='utf-8') as f:
-                json.dump(review_results, f, ensure_ascii=False, indent=2)
-
-            logger.info(f"审查结果已保存到: {file_path}")
-
-            # # 更新进度状态
-            # if state["progress_manager"]:
-            #     await state["progress_manager"].update_stage_progress(
-            #         callback_task_id=state["callback_task_id"],
-            #         stage_name="结果保存",
-            #         current=95,
-            #         status="processing",
-            #         message=f"审查结果已保存到 {file_path}",
-            #         overall_task_status="processing",
-            #         event_type="processing"
-            #     )
+            # ⚠️ 注意:根据方案A-方式1,此处不保存结果
+            # 保存工作由外层任务链的 _report_generation_node 调用 _save_complete_results 完成
 
 
             return {
             return {
                 "current_stage": "save_results",
                 "current_stage": "save_results",
-                "messages": [AIMessage(content=f"审查结果已保存到: {file_path}")]
+                "messages": [AIMessage(content="AI审查结果将由外层任务链统一保存")]
             }
             }
 
 
         except Exception as e:
         except Exception as e:
-            logger.error(f"保存审查结果失败: {str(e)}", exc_info=True)
+            logger.error(f"保存审查结果节点执行失败: {str(e)}", exc_info=True)
             return {
             return {
                 "current_stage": "save_results_failed",
                 "current_stage": "save_results_failed",
-                "error_message": f"保存结果失败: {str(e)}",
+                "error_message": f"保存结果节点失败: {str(e)}",
                 "status": "failed",
                 "status": "failed",
-                "messages": [AIMessage(content=f"保存结果失败: {str(e)}")]
+                "messages": [AIMessage(content=f"保存结果节点失败: {str(e)}")]
             }
             }
 
 
     async def _complete_node(self, state: AIReviewState) -> AIReviewState:
     async def _complete_node(self, state: AIReviewState) -> AIReviewState:
@@ -771,27 +761,31 @@ class AIReviewWorkflow:
             AIReviewState: 更新后的工作流状态,标记为已完成
             AIReviewState: 更新后的工作流状态,标记为已完成
 
 
         Note:
         Note:
-            设置最终状态为completed,更新进度管理器的完成状态
-            发送工作流完成的消息日志
+            ⚠️ 重要修改(方案D优化):
+            - AI审查工作流作为子图嵌套时,不应标记整体任务完成
+            - 仅标记 AI审查阶段完成(stage_status),不设置 overall_task_status
+            - overall_task_status 只由外层任务链的 _complete_chain_node 统一设置
+            - 这解决了前端误判"AI审查完成 = 整体任务完成"的问题
         """
         """
-        logger.info(f"AI审查完成: {state['file_id']}")
+        logger.info(f"AI审查阶段完成(子图): {state['file_id']}")
 
 
-        # 更新完成状态
+        # 更新AI审查阶段完成状态(不标记整体任务完成)
         if state["progress_manager"]:
         if state["progress_manager"]:
             await state["progress_manager"].update_stage_progress(
             await state["progress_manager"].update_stage_progress(
                 callback_task_id=state["callback_task_id"],
                 callback_task_id=state["callback_task_id"],
                 stage_name="AI审查",
                 stage_name="AI审查",
                 current=90,
                 current=90,
-                status="processing",
-                message="AI审查完成",
-                overall_task_status="processing",
+                status="processing",  # 阶段状态:AI审查本身已完成
+                message="AI审查阶段成,等待报告生成",
+                overall_task_status="processing",  # ⚠️ 关键:整体任务仍在处理中
                 event_type="processing"
                 event_type="processing"
             )
             )
 
 
         return {
         return {
             "current_stage": "complete",
             "current_stage": "complete",
-            "status": "completed",
-            "messages": [AIMessage(content="AI审查工作流完成")]
+            "status": "completed",  # 子图内部状态:AI审查完成
+            # ⚠️ 注意:不设置 overall_task_status,由外层任务链管理
+            "messages": [AIMessage(content="AI审查阶段完成")]
         }
         }
 
 
     async def _error_handler_node(self, state: AIReviewState) -> AIReviewState:
     async def _error_handler_node(self, state: AIReviewState) -> AIReviewState:

+ 13 - 13
core/construction_review/workflows/core_functions/ai_review_core_fun.py

@@ -434,22 +434,21 @@ class AIReviewCoreFun:
                 state=state,
                 state=state,
                 stage_name=f"{stage_name}_大纲审查"
                 stage_name=f"{stage_name}_大纲审查"
             )
             )
-            logger.info(f"[outline_check完成] 共发现 {len(outline_result.get('details', {}).get('response', []))} 个缺失项")
 
 
-            redis_manager = get_redis_manager()
-            catogues_df = redis_manager.read_all(task_id=state['callback_task_id'])
-            
-            # 使用封装的函数处理目录审查列表
-            catogues_reciew_result = process_catalog_review_list(catogues_df)
-            
-            # 保存结果到CSV文件
-            # catogues_df.to_csv('temp/document_temp/catogues_df.csv', encoding='utf-8-sig', index=False)
-            # pd.DataFrame(catogues_reciew_list).to_csv('temp/document_temp/catogues_df_with_review.csv', encoding='utf-8-sig', index=False)
-            with open('temp/document_temp/catogues_result.json', 'a', encoding='utf-8') as f:
-                json.dump(catogues_reciew_result, f, ensure_ascii=False, indent=4)
 
 
             # 对比逻辑
             # 对比逻辑
             if chapter_code == "catalogue":
             if chapter_code == "catalogue":
+                redis_manager = get_redis_manager()
+                catogues_df = redis_manager.read_all(task_id=state['callback_task_id'])
+                
+                # 使用封装的函数处理目录审查列表
+                catogues_reciew_result = process_catalog_review_list(catogues_df)
+                logger.info(f"[目录审查] 获取目录数据成功:{catogues_df}")
+                # 保存结果到CSV文件
+                # catogues_df.to_csv('temp/document_temp/catogues_df.csv', encoding='utf-8-sig', index=False)
+                # pd.DataFrame(catogues_reciew_list).to_csv('temp/document_temp/catogues_df_with_review.csv', encoding='utf-8-sig', index=False)
+                with open('temp/document_temp/catogues_result.json', 'w', encoding='utf-8') as f:
+                    json.dump(catogues_reciew_result, f, ensure_ascii=False, indent=4)
 
 
                 return UnitReviewResult(
                 return UnitReviewResult(
                     unit_index=chunk_index,
                     unit_index=chunk_index,
@@ -460,6 +459,7 @@ class AIReviewCoreFun:
                     technical_compliance={},
                     technical_compliance={},
                     rag_enhanced={},
                     rag_enhanced={},
                     overall_risk=self._calculate_single_result_risk(completeness_result),
                     overall_risk=self._calculate_single_result_risk(completeness_result),
+                    is_sse_push=True
                 )
                 )
             else:
             else:
                 # 将两个结果都放入 basic_compliance
                 # 将两个结果都放入 basic_compliance
@@ -468,7 +468,7 @@ class AIReviewCoreFun:
                     unit_content=chunk,
                     unit_content=chunk,
                     basic_compliance={
                     basic_compliance={
                         "check_completeness": completeness_result,
                         "check_completeness": completeness_result,
-                        "outline_check": outline_result
+                        # "outline_check": outline_result
                     },
                     },
                     technical_compliance={},
                     technical_compliance={},
                     rag_enhanced={},
                     rag_enhanced={},

+ 70 - 42
core/construction_review/workflows/report_workflow.py

@@ -4,7 +4,7 @@
 """
 """
 
 
 import asyncio
 import asyncio
-from typing import Optional, Callable
+from typing import Optional, Callable, Dict, Any
 from datetime import datetime
 from datetime import datetime
 
 
 from foundation.observability.logger.loggering import server_logger as logger
 from foundation.observability.logger.loggering import server_logger as logger
@@ -13,9 +13,21 @@ from ..component import ReportGenerator
 class ReportWorkflow:
 class ReportWorkflow:
     """报告生成工作流"""
     """报告生成工作流"""
 
 
-    def __init__(self, file_id: str, callback_task_id: str, user_id: str,
+    def __init__(self, file_id: str, file_name: str, callback_task_id: str, user_id: str,
                  ai_review_results: dict, progress_manager=None):
                  ai_review_results: dict, progress_manager=None):
+        """
+        初始化报告生成工作流
+
+        Args:
+            file_id: 文件ID
+            file_name: 文件名称
+            callback_task_id: 回调任务ID
+            user_id: 用户ID
+            ai_review_results: AI审查结果(包含issues列表)
+            progress_manager: 进度管理器
+        """
         self.file_id = file_id
         self.file_id = file_id
+        self.file_name = file_name
         self.callback_task_id = callback_task_id
         self.callback_task_id = callback_task_id
         self.user_id = user_id
         self.user_id = user_id
         self.ai_review_results = ai_review_results
         self.ai_review_results = ai_review_results
@@ -23,58 +35,65 @@ class ReportWorkflow:
         self.report_generator = ReportGenerator()
         self.report_generator = ReportGenerator()
 
 
     async def execute(self) -> dict:
     async def execute(self) -> dict:
-        """执行报告生成工作流"""
+        """
+        执行报告生成工作流
+
+        Returns:
+            dict: 报告生成结果
+        """
         try:
         try:
-            logger.info(f"开始报告生成工作流,文件ID: {self.file_id}")
+            logger.info(f"开始报告生成工作流,文件ID: {self.file_id}, 文件名: {self.file_name}")
 
 
             # 1. 初始化进度
             # 1. 初始化进度
-            await self.progress_manager.update_stage_progress(
-                callback_task_id=self.callback_task_id,
-                stage_name="报告生成",
-                current=0,
-                status="processing",
-                message="开始生成报告"
-            )
+            if self.progress_manager:
+                await self.progress_manager.update_stage_progress(
+                    callback_task_id=self.callback_task_id,
+                    stage_name="报告生成",
+                    current=0,
+                    status="processing",
+                    message="开始生成报告"
+                )
 
 
-            # 2. 生成报告
+            # 2. 生成报告(带进度回调)
             def progress_callback(current: int, message: str):
             def progress_callback(current: int, message: str):
                 # 将报告生成的进度映射到整体进度
                 # 将报告生成的进度映射到整体进度
                 overall_progress = 90 + int(current * 0.1)  # 报告生成占整体进度的10%
                 overall_progress = 90 + int(current * 0.1)  # 报告生成占整体进度的10%
-                asyncio.create_task(
-                    self.progress_manager.update_stage_progress(
-                        callback_task_id=self.callback_task_id,
-                        stage_name="报告生成",
-                        current=overall_progress,
-                        status="processing",
-                        message=message
+                if self.progress_manager:
+                    asyncio.create_task(
+                        self.progress_manager.update_stage_progress(
+                            callback_task_id=self.callback_task_id,
+                            stage_name="报告生成",
+                            current=overall_progress,
+                            status="processing",
+                            message=message
+                        )
                     )
                     )
-                )
 
 
             final_report = await self.report_generator.generate_report(
             final_report = await self.report_generator.generate_report(
                 file_id=self.file_id,
                 file_id=self.file_id,
+                file_name=self.file_name,
                 review_results=self.ai_review_results,
                 review_results=self.ai_review_results,
                 progress_callback=progress_callback
                 progress_callback=progress_callback
             )
             )
 
 
             # 3. 更新完成状态
             # 3. 更新完成状态
-            await self.progress_manager.update_stage_progress(
-                callback_task_id=self.callback_task_id,
-                stage_name="报告生成",
-                current=100,
-                status="completed",
-                message="报告生成完成"
-            )
-
-
+            if self.progress_manager:
+                await self.progress_manager.update_stage_progress(
+                    callback_task_id=self.callback_task_id,
+                    stage_name="报告生成",
+                    current=100,
+                    status="completed",
+                    message="报告生成完成"
+                )
 
 
-            # 5. 处理结果
+            # 4. 转换为字典格式返回
             result = self._convert_report_to_dict(final_report)
             result = self._convert_report_to_dict(final_report)
 
 
             logger.info(f"报告生成工作流完成,文件ID: {self.file_id}")
             logger.info(f"报告生成工作流完成,文件ID: {self.file_id}")
             return result
             return result
 
 
         except Exception as e:
         except Exception as e:
-            logger.error(f"报告生成工作流失败: {str(e)}")
+            logger.error(f"报告生成工作流失败: {str(e)}", exc_info=True)
 
 
             # 更新错误状态
             # 更新错误状态
             if self.progress_manager:
             if self.progress_manager:
@@ -89,20 +108,29 @@ class ReportWorkflow:
             raise
             raise
 
 
     def _convert_report_to_dict(self, final_report) -> dict:
     def _convert_report_to_dict(self, final_report) -> dict:
-        """将报告对象转换为字典"""
+        """
+        将报告对象转换为字典
+
+        Args:
+            final_report: FinalReport对象
+
+        Returns:
+            dict: 报告字典
+        """
         return {
         return {
             'file_id': final_report.file_id,
             'file_id': final_report.file_id,
-            'document_name': final_report.document_name,
-            'risk_stats': final_report.risk_stats,
-            'dimension_scores': {
-                'safety': final_report.dimension_scores.safety,
-                'quality': final_report.dimension_scores.quality,
-                'schedule': final_report.dimension_scores.schedule,
-                'cost': final_report.dimension_scores.cost
+            'file_name': final_report.file_name,
+            'total_issues': final_report.total_issues,
+            'risk_stats': {
+                'high': final_report.high_risk_count,
+                'medium': final_report.medium_risk_count,
+                'low': final_report.low_risk_count
             },
             },
-            'summary_report': final_report.summary_report,
-            'multidimensional_report': final_report.multidimensional_report,
-            'recommendations': final_report.recommendations,
+            'overall_assessment': final_report.overall_assessment,
+            'high_risk_summary': final_report.high_risk_summary,
+            'medium_risk_summary': final_report.medium_risk_summary,
+            'improvement_recommendations': final_report.improvement_recommendations,
+            'risk_alerts': final_report.risk_alerts,
             'generated_at': final_report.generated_at.isoformat()
             'generated_at': final_report.generated_at.isoformat()
         }
         }
 
 

+ 32 - 0
core/construction_review/workflows/types/__init__.py

@@ -31,3 +31,35 @@ class AIReviewState(TypedDict):
     error_message: Optional[str]
     error_message: Optional[str]
     progress_manager: Optional[Any]
     progress_manager: Optional[Any]
     messages: Annotated[List[BaseMessage], add_messages]
     messages: Annotated[List[BaseMessage], add_messages]
+
+
+class TaskChainState(TypedDict):
+    """任务链工作流状态(外层LangGraph状态)"""
+
+    # 任务基本信息
+    file_id: str
+    callback_task_id: str
+    user_id: str
+    file_name: str
+    file_type: str
+    file_content: bytes
+
+    # 任务状态管理
+    current_stage: str  # 当前阶段: document_processing, ai_review, report_generation, complete
+    overall_task_status: str  # 整体任务状态: processing, completed, failed, terminated
+    stage_status: Dict[str, str]  # 各阶段状态: {"document": "completed", "ai_review": "processing", ...}
+
+    # 各阶段结果
+    document_result: Optional[Dict[str, Any]]  # 文档处理结果
+    ai_review_result: Optional[Dict[str, Any]]  # AI审查结果
+    report_result: Optional[Dict[str, Any]]  # 报告生成结果
+
+    # 错误处理
+    error_message: Optional[str]
+
+    # 服务组件
+    progress_manager: Optional[Any]
+    task_file_info: Optional[Any]  # TaskFileInfo 实例
+
+    # 消息列表
+    messages: Annotated[List[BaseMessage], add_messages]

+ 2 - 2
foundation/ai/agent/generate/model_generate.py

@@ -78,8 +78,8 @@ class GenerateModelClient:
                           如果为None,则使用默认模型
                           如果为None,则使用默认模型
         """
         """
         start_time = time.time()
         start_time = time.time()
-        current_timeout = int(timeout) or int(self.default_timeout)
-
+        #current_timeout = int(timeout) or int(self.default_timeout)
+        current_timeout = timeout or self.default_timeout
         try:
         try:
             # 根据model_name选择对应的模型
             # 根据model_name选择对应的模型
             if model_name:
             if model_name:

+ 10 - 4
foundation/ai/rag/retrieval/query_rewrite.py

@@ -65,7 +65,8 @@ class QueryRewriteManager():
                         asyncio.run,
                         asyncio.run,
                         self.generate_model_client.get_model_generate_invoke(
                         self.generate_model_client.get_model_generate_invoke(
                             trace_id=trace_id,
                             trace_id=trace_id,
-                            task_prompt_info=task_prompt_info
+                            task_prompt_info=task_prompt_info,
+                            timeout=60
                         )
                         )
                     )
                     )
                     model_response = future.result()
                     model_response = future.result()
@@ -73,12 +74,17 @@ class QueryRewriteManager():
                 # 没有运行中的事件循环,直接使用asyncio.run
                 # 没有运行中的事件循环,直接使用asyncio.run
                 model_response = asyncio.run(self.generate_model_client.get_model_generate_invoke(
                 model_response = asyncio.run(self.generate_model_client.get_model_generate_invoke(
                     trace_id=trace_id,
                     trace_id=trace_id,
-                    task_prompt_info=task_prompt_info
+                    task_prompt_info=task_prompt_info,
+                    timeout=60
                 ))
                 ))
-            
+
             # 格式化模型响应
             # 格式化模型响应
             formatted_response = self.ai_respose_format(model_response)
             formatted_response = self.ai_respose_format(model_response)
-            server_logger.info(f"查询对构建完成,构建 {len(formatted_response)}条。")
+            # 检查 formatted_response 是否为 None
+            if formatted_response is not None:
+                server_logger.info(f"查询对构建完成,构建 {len(formatted_response)}条。")
+            else:
+                server_logger.warning("查询对构建失败,formatted_response 为 None")
             # 记录日志
             # 记录日志
             if formatted_response:
             if formatted_response:
                 server_logger.info(f"Query 提取成功, 提取到 {len(formatted_response)} 个实体")
                 server_logger.info(f"Query 提取成功, 提取到 {len(formatted_response)} 个实体")

+ 11 - 0
foundation/observability/monitoring/rag/__init__.py

@@ -0,0 +1,11 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+RAG监控模块
+提供RAG链路各环节的监控能力
+"""
+
+from .rag_monitor import RAGMonitor, rag_monitor
+
+__all__ = ["RAGMonitor", "rag_monitor"]

+ 343 - 0
foundation/observability/monitoring/rag/rag_monitor.py

@@ -0,0 +1,343 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+RAG链路监控装饰器
+支持同步/异步函数的输入输出监控
+"""
+
+import time
+import json
+import functools
+import inspect
+from typing import Callable, Optional, Dict, Any, Union
+from pathlib import Path
+
+from foundation.observability.logger.loggering import server_logger as logger
+
+
+class RAGMonitor:
+    """RAG监控管理器"""
+
+    def __init__(self, save_dir: str = "temp/rag_monitoring"):
+        """
+        初始化RAG监控器
+
+        Args:
+            save_dir: 监控数据保存目录
+        """
+        self.save_dir = Path(save_dir)
+        self.save_dir.mkdir(parents=True, exist_ok=True)
+        self.pipeline_data = {}
+        self.current_trace_id = None
+
+    def start_trace(self, trace_id: str, metadata: Optional[Dict] = None):
+        """
+        开始一个新的追踪会话
+
+        Args:
+            trace_id: 追踪会话ID
+            metadata: 会话元数据
+        """
+        self.current_trace_id = trace_id
+        self.pipeline_data[trace_id] = {
+            "trace_id": trace_id,
+            "start_time": time.time(),
+            "metadata": metadata or {},
+            "steps": {}
+        }
+        logger.info(f"[RAG监控] 开始追踪会话: {trace_id}")
+
+    def end_trace(self, trace_id: str = None) -> Optional[Dict]:
+        """
+        结束追踪会话并保存数据
+
+        Args:
+            trace_id: 追踪会话ID,如果为None则使用当前会话
+
+        Returns:
+            追踪会话数据
+        """
+        trace_id = trace_id or self.current_trace_id
+        if trace_id not in self.pipeline_data:
+            logger.warning(f"[RAG监控] 追踪会话不存在: {trace_id}")
+            return None
+
+        data = self.pipeline_data[trace_id]
+        data["end_time"] = time.time()
+        data["total_duration"] = round(data["end_time"] - data["start_time"], 3)
+
+        # 保存到文件
+        file_path = self.save_dir / f"{trace_id}.json"
+        with open(file_path, 'w', encoding='utf-8') as f:
+            json.dump(data, f, ensure_ascii=False, indent=2, default=str)
+
+        logger.info(f"[RAG监控] 追踪会话已保存: {file_path}, 总耗时: {data['total_duration']}秒")
+        return data
+
+    def get_trace_data(self, trace_id: str = None) -> Optional[Dict]:
+        """
+        获取追踪会话数据
+
+        Args:
+            trace_id: 追踪会话ID,如果为None则使用当前会话
+
+        Returns:
+            追踪会话数据
+        """
+        trace_id = trace_id or self.current_trace_id
+        return self.pipeline_data.get(trace_id)
+
+    def monitor_step(
+        self,
+        step_name: str,
+        capture_input: bool = True,
+        capture_output: bool = True,
+        input_transform: Optional[Callable] = None,
+        output_transform: Optional[Callable] = None,
+        max_input_length: int = 500,
+        max_output_length: int = 1000
+    ):
+        """
+        监控装饰器 - 支持同步和异步函数
+
+        Args:
+            step_name: 步骤名称
+            capture_input: 是否捕获输入参数
+            capture_output: 是否捕获输出结果
+            input_transform: 输入数据转换函数(用于过滤敏感信息或压缩数据)
+            output_transform: 输出数据转换函数
+            max_input_length: 输入数据最大长度(超过会截断)
+            max_output_length: 输出数据最大长度(超过会截断)
+
+        Example:
+            @rag_monitor.monitor_step("query_extract", capture_input=True)
+            def extract_query(content: str):
+                return query_rewrite_manager.query_extract(content)
+        """
+        def decorator(func: Callable):
+            # 判断是否为异步函数
+            is_async = inspect.iscoroutinefunction(func)
+
+            if is_async:
+                @functools.wraps(func)
+                async def async_wrapper(*args, **kwargs):
+                    return await self._execute_with_monitoring(
+                        func, step_name, args, kwargs,
+                        capture_input, capture_output,
+                        input_transform, output_transform,
+                        max_input_length, max_output_length,
+                        is_async=True
+                    )
+                return async_wrapper
+            else:
+                @functools.wraps(func)
+                def sync_wrapper(*args, **kwargs):
+                    return self._execute_with_monitoring(
+                        func, step_name, args, kwargs,
+                        capture_input, capture_output,
+                        input_transform, output_transform,
+                        max_input_length, max_output_length,
+                        is_async=False
+                    )
+                return sync_wrapper
+
+        return decorator
+
+    def _execute_with_monitoring(
+        self,
+        func: Callable,
+        step_name: str,
+        args: tuple,
+        kwargs: dict,
+        capture_input: bool,
+        capture_output: bool,
+        input_transform: Optional[Callable],
+        output_transform: Optional[Callable],
+        max_input_length: int,
+        max_output_length: int,
+        is_async: bool
+    ):
+        """执行函数并监控"""
+        trace_id = self.current_trace_id
+        if not trace_id:
+            logger.warning(f"[RAG监控] 未找到活跃的追踪会话,跳过监控: {step_name}")
+            # 即使没有追踪会话,也要正常执行函数
+            if is_async:
+                import asyncio
+                return asyncio.create_task(func(*args, **kwargs))
+            else:
+                return func(*args, **kwargs)
+
+        # 记录步骤数据
+        step_data = {
+            "step_name": step_name,
+            "function_name": func.__name__,
+            "start_time": time.time()
+        }
+
+        # 捕获输入
+        if capture_input:
+            input_data = {
+                "args": self._safe_serialize(args, max_input_length),
+                "kwargs": self._safe_serialize(kwargs, max_input_length)
+            }
+            if input_transform:
+                try:
+                    input_data = input_transform(input_data)
+                except Exception as e:
+                    logger.warning(f"[RAG监控] 输入转换失败: {e}")
+            step_data["input"] = input_data
+
+        # 执行函数
+        try:
+            if is_async:
+                # 对于异步函数,需要特殊处理
+                import asyncio
+
+                async def async_exec():
+                    result = await func(*args, **kwargs)
+                    self._finalize_step(step_data, result, trace_id, capture_output,
+                                       output_transform, max_output_length, success=True)
+                    return result
+
+                return asyncio.create_task(async_exec())
+            else:
+                result = func(*args, **kwargs)
+                self._finalize_step(step_data, result, trace_id, capture_output,
+                                   output_transform, max_output_length, success=True)
+                return result
+
+        except Exception as e:
+            self._finalize_step(step_data, None, trace_id, capture_output,
+                               output_transform, max_output_length, success=False, error=e)
+            raise
+
+    def _finalize_step(
+        self,
+        step_data: Dict,
+        result: Any,
+        trace_id: str,
+        capture_output: bool,
+        output_transform: Optional[Callable],
+        max_output_length: int,
+        success: bool,
+        error: Optional[Exception] = None
+    ):
+        """完成步骤监控数据记录"""
+        if success:
+            step_data["status"] = "success"
+
+            # 捕获输出
+            if capture_output:
+                output_data = self._safe_serialize(result, max_output_length)
+                if output_transform:
+                    try:
+                        output_data = output_transform(output_data)
+                    except Exception as e:
+                        logger.warning(f"[RAG监控] 输出转换失败: {e}")
+                step_data["output"] = output_data
+        else:
+            step_data["status"] = "error"
+            step_data["error"] = {
+                "type": type(error).__name__,
+                "message": str(error)
+            }
+            logger.error(f"[RAG监控] 步骤执行失败: {step_data['step_name']}, 错误: {error}")
+
+        step_data["end_time"] = time.time()
+        step_data["duration"] = round(step_data["end_time"] - step_data["start_time"], 3)
+
+        # 保存步骤数据
+        if trace_id in self.pipeline_data:
+            # 如果步骤名称已存在,添加序号
+            original_step_name = step_data['step_name']
+            step_name = original_step_name
+            counter = 1
+            while step_name in self.pipeline_data[trace_id]["steps"]:
+                step_name = f"{original_step_name}_{counter}"
+                counter += 1
+
+            self.pipeline_data[trace_id]["steps"][step_name] = step_data
+            logger.info(f"[RAG监控] 步骤完成: {step_name}, 耗时: {step_data['duration']}秒")
+
+    def _safe_serialize(self, obj: Any, max_length: int = 500) -> Any:
+        """
+        安全序列化对象(防止大对象占用过多内存)
+
+        Args:
+            obj: 要序列化的对象
+            max_length: 字符串最大长度
+
+        Returns:
+            序列化后的对象
+        """
+        if obj is None:
+            return None
+
+        # 基本类型直接返回
+        if isinstance(obj, (int, float, bool)):
+            return obj
+
+        if isinstance(obj, str):
+            if len(obj) > max_length:
+                return {
+                    "type": "string",
+                    "length": len(obj),
+                    "preview": obj[:max_length],
+                    "truncated": True
+                }
+            return obj
+
+        # 列表类型
+        if isinstance(obj, (list, tuple)):
+            result = {
+                "type": "list" if isinstance(obj, list) else "tuple",
+                "count": len(obj)
+            }
+
+            # 只保留前3项的预览
+            if len(obj) > 0:
+                result["preview"] = [self._safe_serialize(item, max_length) for item in obj[:3]]
+
+            # 如果列表项少于10项,保存完整数据
+            if len(obj) <= 10:
+                result["full_data"] = [self._safe_serialize(item, max_length) for item in obj]
+            else:
+                result["truncated"] = True
+
+            return result
+
+        # 字典类型
+        if isinstance(obj, dict):
+            result = {}
+            keys_list = list(obj.keys())
+
+            # 最多保留20个键
+            for key in keys_list[:20]:
+                try:
+                    result[str(key)] = self._safe_serialize(obj[key], max_length)
+                except Exception as e:
+                    result[str(key)] = f"<序列化失败: {e}>"
+
+            if len(keys_list) > 20:
+                result["__truncated__"] = f"省略了 {len(keys_list) - 20} 个键"
+
+            return result
+
+        # 其他类型尝试转为字符串
+        try:
+            str_repr = str(obj)
+            if len(str_repr) > max_length:
+                return {
+                    "type": type(obj).__name__,
+                    "preview": str_repr[:max_length],
+                    "truncated": True
+                }
+            return {"type": type(obj).__name__, "value": str_repr}
+        except Exception as e:
+            return {"type": type(obj).__name__, "error": f"无法序列化: {e}"}
+
+
+# 全局监控实例
+rag_monitor = RAGMonitor()

+ 274 - 0
foundation/observability/monitoring/rag/rag_trace_panel.html

@@ -0,0 +1,274 @@
+<!DOCTYPE html>
+<html lang="zh">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>施工方案RAG链路透视镜</title>
+    <script src="https://unpkg.com/react@17/umd/react.development.js"></script>
+    <script src="https://unpkg.com/react-dom@17/umd/react-dom.development.js"></script>
+    <script src="https://unpkg.com/@babel/standalone/babel.min.js"></script>
+    <script src="https://cdn.tailwindcss.com"></script>
+    <style>
+        .score-bar { transition: width 0.5s ease; }
+        .lane-connector { 
+            position: absolute; 
+            left: 50%; 
+            border-left: 2px dashed #cbd5e1; 
+            z-index: 0;
+        }
+    </style>
+</head>
+<body class="bg-slate-50 text-slate-800">
+    <div id="root"></div>
+
+    <script type="text/babel">
+        // 模拟你提供的日志数据
+        const logData = {
+          "stage": "rag_enhanced_check",
+          "steps": {
+            "1_query_extract": {
+              "output": {
+                "query_pairs": [
+                  {
+                    "entity": "T梁",
+                    "background": "安装作业,涉及外观检查、焊接连接...",
+                    "parameter": "梁端面平齐,梁缝符合要求..."
+                  },
+                  {
+                    "entity": "安全带",
+                    "background": "高处作业人员使用...",
+                    "parameter": "挂钩或绳子应挂在结实牢固的构件上..."
+                  }
+                ]
+              }
+            },
+            "2_entity_enhance_retrieval": {
+              "output": {
+                "results": [
+                  [ // T梁的结果
+                    {
+                      "metadata": { "title": "17.2.7 构件的存放" },
+                      "text_content": "构件应按其安装的先后顺序编号存放...",
+                      "hybrid_similarity": 0.78,
+                      "rerank_score": 0.96,
+                      "bfp_rerank_score": 0.09, // 低分致死原因
+                      "source_entity": "T梁"
+                    },
+                    {
+                      "metadata": { "title": "17.2.6 构件的场内移运" },
+                      "bfp_rerank_score": 0.02,
+                      "source_entity": "T梁"
+                    }
+                  ],
+                  [ // 安全带的结果
+                    {
+                      "metadata": { "title": "坠落防护 安全带" },
+                      "text_content": "坠落防护 安全带...",
+                      "hybrid_similarity": 0.65,
+                      "rerank_score": 0.92,
+                      "bfp_rerank_score": 0.97,
+                      "source_entity": "安全带"
+                    }
+                  ]
+                ]
+              }
+            },
+            "4_extract_query_pairs_results": {
+              "output": {
+                // 这里模拟那个 Bug:Entity是T梁,内容却是安全带
+                "entity_results": [
+                  {
+                    "entity": "T梁", 
+                    "query_index": 0,
+                    "metadata": { "title": "坠落防护 安全带" }, 
+                    "final_score": 0.97,
+                    "is_bug": true // 标记用于前端高亮
+                  }
+                ]
+              }
+            }
+          }
+        };
+
+        const ScoreBadge = ({ label, value, threshold = 0.5 }) => {
+            const isLow = value < threshold;
+            const colorClass = isLow ? "bg-red-100 text-red-700 border-red-200" : "bg-green-100 text-green-700 border-green-200";
+            return (
+                <div className={`flex flex-col border rounded px-2 py-1 text-xs ${colorClass} mb-1`}>
+                    <span className="opacity-70">{label}</span>
+                    <span className="font-bold text-sm">{value ? value.toFixed(2) : 'N/A'}</span>
+                </div>
+            );
+        };
+
+        const DocCard = ({ doc, isFinal = false, isBug = false }) => {
+            return (
+                <div className={`relative p-3 rounded-lg border-2 mb-2 bg-white shadow-sm ${isBug ? 'border-red-500 ring-2 ring-red-200' : 'border-slate-200'}`}>
+                    {isBug && (
+                        <div className="absolute -top-3 -right-3 bg-red-600 text-white text-xs font-bold px-2 py-1 rounded shadow animate-pulse">
+                            BUG DETECTED: 索引错位
+                        </div>
+                    )}
+                    <div className="font-semibold text-slate-700 text-sm truncate" title={doc.metadata?.title}>
+                        {doc.metadata?.title || doc.file_name || "Unknown Doc"}
+                    </div>
+                    
+                    {!isFinal && (
+                        <div className="grid grid-cols-3 gap-1 mt-2">
+                            <ScoreBadge label="混合检索" value={doc.hybrid_similarity} />
+                            <ScoreBadge label="粗排分数" value={doc.rerank_score} />
+                            <ScoreBadge label="背景重排" value={doc.bfp_rerank_score} threshold={0.5} />
+                        </div>
+                    )}
+                    {doc.bfp_rerank_score < 0.5 && !isFinal && (
+                        <div className="mt-1 text-xs text-red-500 font-bold text-center bg-red-50 py-1 rounded">
+                            {'[X] 因背景不匹配被过滤 (Score < 0.5)'}
+                        </div>
+                    )}
+                    
+                    <div className="mt-2 text-xs text-slate-500 line-clamp-2">
+                        {doc.text_content}
+                    </div>
+                </div>
+            );
+        };
+
+        const SwimLane = ({ queryPair, retrievalResults, finalResult, index }) => {
+            const hasResults = retrievalResults && retrievalResults.length > 0;
+            // 检查是否存在 Bug:Final result 的 entity 和 query 的 entity 不一致
+            const isMismatchBug = finalResult && finalResult.entity !== queryPair.entity;
+            const isContentMismatch = finalResult && finalResult.metadata?.title.includes("安全带") && queryPair.entity.includes("T梁");
+            const bugDetected = isMismatchBug || isContentMismatch;
+
+            return (
+                <div className="flex flex-row gap-4 mb-8 min-w-[1000px]">
+                    {/* 1. 提取阶段 */}
+                    <div className="w-1/4 flex-shrink-0">
+                        <div className="bg-blue-50 border-l-4 border-blue-500 p-4 rounded shadow-sm h-full relative">
+                            <div className="absolute -left-3 -top-3 w-8 h-8 bg-blue-600 text-white rounded-full flex items-center justify-center font-bold">
+                                {index + 1}
+                            </div>
+                            <h3 className="font-bold text-lg text-blue-900">{queryPair.entity}</h3>
+                            <div className="mt-2 text-xs text-slate-600">
+                                <span className="font-semibold bg-blue-100 px-1 rounded">背景</span> {queryPair.background}
+                            </div>
+                            <div className="mt-2 text-xs text-slate-600">
+                                <span className="font-semibold bg-purple-100 px-1 rounded">参数</span> {queryPair.parameter}
+                            </div>
+                        </div>
+                    </div>
+
+                    {/* 箭头 */}
+                    <div className="flex flex-col justify-center items-center w-8 text-slate-300">
+                        →
+                    </div>
+
+                    {/* 2. 检索 & 排序阶段 */}
+                    <div className="w-1/3 flex-shrink-0 flex flex-col gap-2">
+                        {hasResults ? (
+                            retrievalResults.map((doc, idx) => (
+                                <DocCard key={idx} doc={doc} />
+                            ))
+                        ) : (
+                            <div className="text-center p-4 border-2 border-dashed border-gray-300 rounded text-gray-400">
+                                未召回到相关文档
+                            </div>
+                        )}
+                    </div>
+
+                     {/* 箭头 */}
+                     <div className="flex flex-col justify-center items-center w-8 text-slate-300">
+                        →
+                    </div>
+
+                    {/* 3. 最终结果阶段 (展示 Bug) */}
+                    <div className="w-1/4 flex-shrink-0">
+                        {finalResult ? (
+                            <div className={`h-full ${bugDetected ? 'bg-red-50' : 'bg-green-50'} p-1 rounded`}>
+                                <DocCard doc={finalResult} isFinal={true} isBug={bugDetected} />
+                                {bugDetected && (
+                                    <div className="text-xs text-red-600 mt-2 font-mono p-2 bg-red-100 rounded">
+                                        错误分析: <br/>
+                                        Query: <strong>{queryPair.entity}</strong><br/>
+                                        Result: <strong>{finalResult.metadata?.title}</strong><br/>
+                                        <span className="font-bold">索引匹配错误!</span>
+                                    </div>
+                                )}
+                            </div>
+                        ) : (
+                             <div className="h-full flex items-center justify-center border-2 border-dashed border-slate-200 rounded bg-slate-50 text-slate-400 text-sm">
+                                此链路被过滤
+                            </div>
+                        )}
+                    </div>
+                </div>
+            );
+        };
+
+        const Dashboard = () => {
+            const queryPairs = logData.steps["1_query_extract"].output.query_pairs;
+            const retrievalOutputs = logData.steps["2_entity_enhance_retrieval"].output.results;
+            const finalResults = logData.steps["4_extract_query_pairs_results"].output.entity_results;
+
+            return (
+                <div className="p-8 min-h-screen">
+                    <header className="mb-8 border-b pb-4">
+                        <h1 className="text-3xl font-bold text-slate-800">RAG 链路透视看板</h1>
+                        <p className="text-slate-500 mt-2">
+                            针对 "T梁安装" 案例的执行轨迹回放与问题诊断
+                        </p>
+                    </header>
+
+                    <div className="flex flex-col">
+                        {/* 表头 */}
+                        <div className="flex flex-row gap-4 mb-4 text-sm font-bold text-slate-400 uppercase tracking-wider min-w-[1000px]">
+                            <div className="w-1/4">Step 1: 语义提取 (Query Construction)</div>
+                            <div className="w-8"></div>
+                            <div className="w-1/3">Step 2 & 3: 召回与重排序 (Recall & Rerank)</div>
+                            <div className="w-8"></div>
+                            <div className="w-1/4">Step 4: 最终映射 (Final Mapping)</div>
+                        </div>
+
+                        {/* 泳道 */}
+                        {queryPairs.map((pair, index) => {
+                            // 简单的按索引获取,模拟代码中的行为
+                            const results = retrievalOutputs[index] || [];
+                            // 模拟代码中的 Bug:这里强制让 index 0 的 T梁 显示 结果列表里的第 0 个 (其实是安全带,因为T梁被过滤了)
+                            // 在真实逻辑中,你需要根据 query_index 来 find
+                            const finalRes = finalResults.find(r => r.query_index === index) || (index === 0 && finalResults[0]); 
+                            
+                            return (
+                                <SwimLane 
+                                    key={index} 
+                                    index={index}
+                                    queryPair={pair} 
+                                    retrievalResults={results}
+                                    finalResult={finalRes}
+                                />
+                            );
+                        })}
+                    </div>
+                    
+                    <div className="mt-12 bg-slate-800 text-slate-200 p-6 rounded-lg">
+                        <h3 className="text-xl font-bold mb-4 text-white">[工具] 修复建议 (基于日志分析)</h3>
+                        <ul className="list-disc pl-5 space-y-2">
+                            <li>
+                                <strong className="text-red-400">修复索引错位 Bug (Critical):</strong> 
+                                在 `extract_query_pairs_results` 函数中,不要假设 `enhanced_results` 的索引与 `query_pairs` 一一对应。
+                                因为中间有过滤步骤,列表长度变了。应该使用 Map 或字典来通过 `query_index` 锚定结果。
+                            </li>
+                            <li>
+                                <strong className="text-yellow-400">解决 T梁 召回相关性低:</strong> 
+                                观察到 `bfp_rerank_score` 只有 0.09。这说明虽然召回了"T梁",但内容是关于"存放"的,而你需要"安装"。
+                                建议在 Step 2 检索时,强制将 `background` 中的关键词(如"安装"、"焊接"、"支撑")加入到 Keyword Search 中,而不仅仅是搜索 Entity。
+                            </li>
+                        </ul>
+                    </div>
+                </div>
+            );
+        };
+
+        ReactDOM.render(<Dashboard />, document.getElementById('root'));
+    </script>
+</body>
+</html>

+ 3 - 3
prompts.txt

@@ -11,13 +11,13 @@
 任务:审查施工方案目录,找出缺失的目录项。
 任务:审查施工方案目录,找出缺失的目录项。
 
 
 待审查章节标题:
 待审查章节标题:
-第十二章编制及审核人员情况
+八、应急救援预案
 
 
 待审查目录内容:
 待审查目录内容:
-待审查目录为空
+待审查目录包含:(一)目的、(二)工作原则、(三)应急救援预案组织机构、(四)现场急救措施、(五)现场应急措施、(六)施工安全事故应急响应
 
 
 规范要求:
 规范要求:
-其他资料章节应包含1.计算书、2.相关施工图纸、3.附图附表、4.编制及审核人员情况等方面的内容
+安全保证措施章节应包含1.安全保证体系、2.组织保证措施、3.技术保证措施、4.监测监控措施、5.应急处置措施等方面的内容
 
 
 输出格式:
 输出格式:
 - 只输出缺失的目录项序号数字,多个数字用逗号分隔(如:3,5)
 - 只输出缺失的目录项序号数字,多个数字用逗号分隔(如:3,5)

+ 170 - 143
utils_test/RAG_Test/rag_pipeline_web/rag_pipeline_server.py

@@ -27,6 +27,7 @@ from core.construction_review.component.infrastructure.parent_tool import (
 from foundation.ai.rag.retrieval.entities_enhance import entity_enhance
 from foundation.ai.rag.retrieval.entities_enhance import entity_enhance
 from foundation.ai.rag.retrieval.query_rewrite import query_rewrite_manager
 from foundation.ai.rag.retrieval.query_rewrite import query_rewrite_manager
 from foundation.observability.logger.loggering import server_logger as logger
 from foundation.observability.logger.loggering import server_logger as logger
+from foundation.observability.monitoring.rag import rag_monitor
 
 
 # 全局Milvus Manager
 # 全局Milvus Manager
 milvus_manager = None
 milvus_manager = None
@@ -56,173 +57,199 @@ def run_async(coro):
 
 
 def rag_enhanced_check(query_content: str) -> dict:
 def rag_enhanced_check(query_content: str) -> dict:
     """
     """
-    RAG增强检查 - 完整链路
+    RAG增强检查 - 完整链路(使用装饰器监控版本)
     复用 ai_review_engine.py rag_enhanced_check 方法的逻辑
     复用 ai_review_engine.py rag_enhanced_check 方法的逻辑
     """
     """
     global milvus_manager
     global milvus_manager
     if milvus_manager is None:
     if milvus_manager is None:
         init_milvus()
         init_milvus()
 
 
-    pipeline_data = {
-        "stage": "rag_enhanced_check",
-        "timestamp": time.time(),
-        "input_content": query_content,
-        "steps": {}
-    }
-
-    logger.info(f"[RAG增强] 开始处理, 内容长度: {len(query_content)}")
-
-    # Step 1: 查询提取
-    step1_start = time.time()
-    query_pairs = query_rewrite_manager.query_extract(query_content)
-    step1_time = time.time() - step1_start
-
-    pipeline_data["steps"]["1_query_extract"] = {
-        "name": "查询提取",
-        "execution_time": round(step1_time, 3),
-        "input": {
-            "content_length": len(query_content),
-            "content_full": query_content,
-            "content_preview": query_content[:200]
-        },
-        "output": {
-            "query_pairs_count": len(query_pairs),
-            "query_pairs": query_pairs
-        }
-    }
-    logger.info(f"[RAG增强] 提取到 {len(query_pairs)} 个查询对")
-
-    # Step 2: 实体增强检索 - 直接复用 entity_enhance.entities_enhance_retrieval()
-    step2_start = time.time()
-    bfp_result_lists = entity_enhance.entities_enhance_retrieval(query_pairs)
-    step2_time = time.time() - step2_start
-
-    pipeline_data["steps"]["2_entity_enhance_retrieval"] = {
-        "name": "实体增强检索",
-        "execution_time": round(step2_time, 3),
-        "input": {
-            "query_pairs_count": len(query_pairs),
-            "query_pairs": query_pairs
-        },
-        "output": {
-            "results_count": len(bfp_result_lists) if bfp_result_lists else 0,
-            "results": bfp_result_lists if bfp_result_lists else []
-        }
-    }
+    # 开始追踪会话
+    trace_id = f"rag_{int(time.time() * 1000)}"
+    rag_monitor.start_trace(trace_id, metadata={
+        "content_length": len(query_content),
+        "content_preview": query_content[:100] if len(query_content) > 100 else query_content,
+        "stage": "rag_enhanced_check"
+    })
 
 
-    # # 🔍 保存关键节点结果(用于对比分析)
-    # os.makedirs(os.path.join(project_root, "temp", "rag_pipeline_server"), exist_ok=True)
-    # with open(os.path.join(project_root, "temp", "rag_pipeline_server", "bfp_result_lists.json"), "w", encoding='utf-8') as f:
-    #     json.dump(bfp_result_lists, f, ensure_ascii=False, indent=4)
-    # logger.info("[RAG增强] ✅ 已保存 bfp_result_lists 到 temp/rag_pipeline_server/bfp_result_lists.json")
-
-    # 检查检索结果
-    if not bfp_result_lists:
-        logger.warning("[RAG增强] 实体检索未返回结果")
-        # 返回空结果
-        pipeline_data["final_result"] = {
-            'vector_search': [],
-            'retrieval_status': 'no_results',
-            'file_name': '',
-            'text_content': '',
-            'metadata': {}
-        }
-        pipeline_data["total_execution_time"] = round(time.time() - pipeline_data["timestamp"], 3)
+    logger.info(f"[RAG增强] 开始处理, trace_id: {trace_id}, 内容长度: {len(query_content)}")
 
 
-
-        return pipeline_data
-
-    # Step 3: 父文档增强(使用分组增强策略 - 每个查询对独立处理 + 按分数筛选)
-    step3_start = time.time()
     try:
     try:
-        enhancement_result = enhance_with_parent_docs_grouped(
-            milvus_manager,
-            bfp_result_lists,
-            score_threshold=0.3,  # bfp_rerank_score 阈值
-            max_parents_per_pair=3  # 每个查询对最多3个父文档
+        # Step 1: 查询提取(使用装饰器监控)
+        @rag_monitor.monitor_step(
+            step_name="1_query_extract",
+            capture_input=True,
+            capture_output=True,
+            input_transform=lambda x: {
+                "content_length": len(query_content),
+                "content_preview": query_content[:200] if len(query_content) > 200 else query_content
+            }
+        )
+        def step1_query_extract(content):
+            return query_rewrite_manager.query_extract(content)
+
+        query_pairs = step1_query_extract(query_content)
+        logger.info(f"[RAG增强] 提取到 {len(query_pairs)} 个查询对")
+
+        # Step 2: 实体增强检索
+        @rag_monitor.monitor_step(
+            step_name="2_entity_enhance_retrieval",
+            capture_input=True,
+            capture_output=True,
+            output_transform=lambda x: {
+                "results_count": len(x) if x else 0,
+                "has_results": bool(x)
+            } if isinstance(x, list) else x
         )
         )
-        enhanced_results = enhancement_result['enhanced_results']
-        enhanced_count = enhancement_result['enhanced_count']
-        parent_docs = enhancement_result['parent_docs']
-        enhanced_pairs = enhancement_result.get('enhanced_pairs', 0)
-        total_pairs = enhancement_result.get('total_pairs', 0)
-
-        # 保存增强后的结果
-        # with open(os.path.join(project_root, "temp", "rag_pipeline_server", "enhance_with_parent_docs_grouped.json"), "w", encoding='utf-8') as f:
-        #     json.dump(enhanced_results, f, ensure_ascii=False, indent=4)
-
-        logger.info(f"[RAG增强] 分组增强完成: {enhanced_pairs}/{total_pairs} 个查询对进行了增强")
-        logger.info(f"[RAG增强] 成功增强 {enhanced_count} 个结果,使用了 {len(parent_docs)} 个父文档")
-
-        pipeline_data["steps"]["3_parent_doc_enhancement"] = {
-            "name": "父文档增强(分组策略)",
-            "execution_time": round(time.time() - step3_start, 3),
-            "input": {
+        def step2_entity_retrieval(pairs):
+            return entity_enhance.entities_enhance_retrieval(pairs)
+
+        bfp_result_lists = step2_entity_retrieval(query_pairs)
+
+        # 检查检索结果
+        if not bfp_result_lists:
+            logger.warning("[RAG增强] 实体检索未返回结果")
+            final_result = _build_empty_result()
+            return _finalize_pipeline_result(trace_id, final_result)
+
+        # Step 3: 父文档增强(使用分组增强策略)
+        @rag_monitor.monitor_step(
+            step_name="3_parent_doc_enhancement",
+            capture_input=True,
+            capture_output=True,
+            input_transform=lambda x: {
                 "bfp_results_count": len(bfp_result_lists),
                 "bfp_results_count": len(bfp_result_lists),
-                "score_threshold": 0.7,
+                "score_threshold": 0.3,
                 "max_parents_per_pair": 3
                 "max_parents_per_pair": 3
             },
             },
-            "output": {
-                "enhanced_count": enhanced_count,
-                "parent_docs_count": len(parent_docs),
-                "enhanced_pairs": enhanced_pairs,
-                "total_pairs": total_pairs,
-                "parent_docs": parent_docs,
-                "enhanced_results": enhanced_results
-            }
-        }
-    except Exception as e:
-        logger.error(f"[RAG增强] 父文档增强失败: {e}", exc_info=True)
-        enhanced_results = bfp_result_lists
-        pipeline_data["steps"]["3_parent_doc_enhancement"] = {
-            "name": "父文档增强(分组策略)",
-            "execution_time": round(time.time() - step3_start, 3),
-            "input": {"bfp_results_count": len(bfp_result_lists)},
-            "output": {"error": str(e), "enhanced_results": enhanced_results}
-        }
+            output_transform=lambda x: {
+                "enhanced_count": x.get("enhanced_count", 0),
+                "parent_docs_count": len(x.get("parent_docs", [])),
+                "enhanced_pairs": x.get("enhanced_pairs", 0),
+                "total_pairs": x.get("total_pairs", 0)
+            } if isinstance(x, dict) else {"error": str(x)}
+        )
+        def step3_parent_enhancement(results):
+            return enhance_with_parent_docs_grouped(
+                milvus_manager,
+                results,
+                score_threshold=0.3,  # bfp_rerank_score 阈值
+                max_parents_per_pair=3  # 每个查询对最多3个父文档
+            )
+
+        try:
+            enhancement_result = step3_parent_enhancement(bfp_result_lists)
+            enhanced_results = enhancement_result['enhanced_results']
+            enhanced_count = enhancement_result['enhanced_count']
+            parent_docs = enhancement_result['parent_docs']
+            enhanced_pairs = enhancement_result.get('enhanced_pairs', 0)
+            total_pairs = enhancement_result.get('total_pairs', 0)
+
+            logger.info(f"[RAG增强] 分组增强完成: {enhanced_pairs}/{total_pairs} 个查询对进行了增强")
+            logger.info(f"[RAG增强] 成功增强 {enhanced_count} 个结果,使用了 {len(parent_docs)} 个父文档")
+
+        except Exception as e:
+            logger.error(f"[RAG增强] 父文档增强失败: {e}", exc_info=True)
+            enhanced_results = bfp_result_lists
+
+        # Step 4: 提取结果(按查询对区分,只保留得分>0.5的结果)
+        @rag_monitor.monitor_step(
+            step_name="4_extract_query_pairs_results",
+            capture_input=True,
+            capture_output=True,
+            input_transform=lambda x: {
+                "enhanced_results_count": len(enhanced_results) if enhanced_results else 0,
+                "query_pairs_count": len(query_pairs),
+                "score_threshold": 0.5
+            },
+            output_transform=lambda x: {
+                "entities_count": len(x) if x else 0,
+                "has_results": bool(x)
+            } if isinstance(x, list) else x
+        )
+        def step4_extract_results(results, pairs):
+            return extract_query_pairs_results(results, pairs, score_threshold=0.5) if results else []
 
 
-    # Step 4: 提取结果(按查询对区分,只保留得分>0.5的结果)
-    step4_start = time.time()
-    entity_results = extract_query_pairs_results(enhanced_results, query_pairs, score_threshold=0.5) if enhanced_results else []
-
-    pipeline_data["steps"]["4_extract_query_pairs_results"] = {
-        "name": "按查询对提取结果",
-        "execution_time": round(time.time() - step4_start, 3),
-        "input": {
-            "enhanced_results_count": len(enhanced_results) if enhanced_results else 0,
-            "query_pairs_count": len(query_pairs),
-            "score_threshold": 0.5
-        },
-        "output": {
-            "entity_results": entity_results,
-            "entities_count": len(entity_results)
-        }
+        entity_results = step4_extract_results(enhanced_results, query_pairs)
+
+        # 构建最终结果
+        final_result = _build_final_result(entity_results)
+
+        return _finalize_pipeline_result(trace_id, final_result)
+
+    except Exception as e:
+        logger.error(f"[RAG增强] 处理失败: {e}", exc_info=True)
+        raise
+    finally:
+        # 结束追踪并保存(即使出错也要保存)
+        rag_monitor.end_trace(trace_id)
+
+
+def _build_empty_result() -> dict:
+    """构建空结果"""
+    return {
+        'retrieval_status': 'no_results',
+        'entity_results': [],
+        'total_entities': 0,
+        'vector_search': [],
+        'file_name': '',
+        'text_content': '',
+        'metadata': {}
     }
     }
 
 
-    # 构建最终结果
+
+def _build_final_result(entity_results: list) -> dict:
+    """构建最终结果"""
     if not entity_results:
     if not entity_results:
-        final_result = {
+        return {
             'retrieval_status': 'no_results',
             'retrieval_status': 'no_results',
             'entity_results': [],
             'entity_results': [],
             'total_entities': 0,
             'total_entities': 0,
-            'message': '没有结果通过阈值过滤(得分>0.8)'
-        }
-    else:
-        final_result = {
-            'retrieval_status': 'success',
-            'entity_results': entity_results,
-            'total_entities': len(entity_results)
+            'message': '没有结果通过阈值过滤(得分>0.5)'
         }
         }
+    return {
+        'retrieval_status': 'success',
+        'entity_results': entity_results,
+        'total_entities': len(entity_results)
+    }
 
 
-    pipeline_data["final_result"] = final_result
-    pipeline_data["total_execution_time"] = round(time.time() - pipeline_data["timestamp"], 3)
 
 
-    # 保存到文件
-    os.makedirs(os.path.join(project_root, "temp", "rag_pipeline_server"), exist_ok=True)
-    with open(os.path.join(project_root, "temp", "rag_pipeline_server", "rag_pipeline_data.json"), "w", encoding='utf-8') as f:
-        json.dump(pipeline_data, f, ensure_ascii=False, indent=2, default=str)
+def _finalize_pipeline_result(trace_id: str, final_result: dict) -> dict:
+    """
+    完成管道结果处理,合并监控数据和业务结果
+
+    Args:
+        trace_id: 追踪会话ID
+        final_result: 业务最终结果
 
 
-    return pipeline_data
+    Returns:
+        合并后的完整结果
+    """
+    # 获取监控数据
+    trace_data = rag_monitor.get_trace_data(trace_id)
+
+    if trace_data:
+        # 合并监控数据和业务结果
+        pipeline_data = {
+            "trace_id": trace_id,
+            "stage": "rag_enhanced_check",
+            "timestamp": trace_data.get("start_time"),
+            "total_execution_time": trace_data.get("total_duration", 0),
+            "metadata": trace_data.get("metadata", {}),
+            "steps": trace_data.get("steps", {}),
+            "final_result": final_result
+        }
+
+        # 保存到旧的位置(兼容前端)
+        os.makedirs(os.path.join(project_root, "temp", "rag_pipeline_server"), exist_ok=True)
+        with open(os.path.join(project_root, "temp", "rag_pipeline_server", "rag_pipeline_data.json"), "w", encoding='utf-8') as f:
+            json.dump(pipeline_data, f, ensure_ascii=False, indent=2, default=str)
+
+        return pipeline_data
+    else:
+        # 如果没有监控数据,直接返回业务结果
+        logger.warning(f"[RAG增强] 未找到追踪数据: {trace_id}")
+        return final_result
 
 
 
 
 class RAGPipelineHandler(SimpleHTTPRequestHandler):
 class RAGPipelineHandler(SimpleHTTPRequestHandler):

+ 340 - 0
utils_test/RAG_Test/test_rag_monitor.py

@@ -0,0 +1,340 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+"""
+RAG监控装饰器使用示例和测试脚本
+展示如何使用 rag_monitor 装饰器监控RAG链路
+"""
+
+import sys
+import os
+import time
+import json
+import asyncio
+from pathlib import Path
+
+# 添加项目根目录到路径
+project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+sys.path.insert(0, project_root)
+
+from foundation.observability.monitoring.rag import rag_monitor
+from foundation.observability.logger.loggering import server_logger as logger
+
+
+# ========== 示例1: 同步函数监控 ==========
+
+@rag_monitor.monitor_step(
+    step_name="example_sync_query_extract",
+    capture_input=True,
+    capture_output=True
+)
+def example_query_extract(content: str):
+    """示例:查询提取函数"""
+    logger.info(f"正在提取查询,内容长度: {len(content)}")
+    time.sleep(0.5)  # 模拟处理时间
+
+    # 模拟提取结果
+    return [
+        {"query": "安全生产条件", "entity": "安全"},
+        {"query": "施工管理制度", "entity": "施工"}
+    ]
+
+
+@rag_monitor.monitor_step(
+    step_name="example_sync_vector_search",
+    capture_input=True,
+    capture_output=True,
+    output_transform=lambda x: {  # 只保留关键信息
+        "results_count": len(x),
+        "has_results": bool(x)
+    }
+)
+def example_vector_search(query_pairs: list):
+    """示例:向量检索函数"""
+    logger.info(f"正在进行向量检索,查询对数量: {len(query_pairs)}")
+    time.sleep(1.0)  # 模拟检索时间
+
+    # 模拟检索结果
+    results = []
+    for pair in query_pairs:
+        results.append({
+            "query": pair["query"],
+            "doc_id": f"doc_{hash(pair['query']) % 100}",
+            "score": 0.85,
+            "content": f"这是关于{pair['query']}的内容..."
+        })
+    return results
+
+
+# ========== 示例2: 异步函数监控 ==========
+
+@rag_monitor.monitor_step(
+    step_name="example_async_rerank",
+    capture_input=True,
+    capture_output=True,
+    input_transform=lambda x: {  # 只记录输入统计信息
+        "results_count": len(x["args"][0]) if x["args"] else 0
+    }
+)
+async def example_async_rerank(results: list):
+    """示例:异步重排序函数"""
+    logger.info(f"正在进行重排序,结果数量: {len(results)}")
+    await asyncio.sleep(0.8)  # 模拟异步处理
+
+    # 模拟重排序
+    sorted_results = sorted(results, key=lambda x: x["score"], reverse=True)
+    return sorted_results[:5]  # 只返回前5个
+
+
+@rag_monitor.monitor_step(
+    step_name="example_async_parent_enhance",
+    capture_input=True,
+    capture_output=True
+)
+async def example_async_parent_enhance(results: list):
+    """示例:异步父文档增强函数"""
+    logger.info(f"正在进行父文档增强,结果数量: {len(results)}")
+    await asyncio.sleep(1.2)  # 模拟异步处理
+
+    # 模拟父文档增强
+    enhanced = []
+    for res in results:
+        enhanced.append({
+            **res,
+            "parent_content": f"父文档内容: {res['content']}的完整上下文...",
+            "enhanced": True
+        })
+    return enhanced
+
+
+# ========== 示例3: 完整的RAG链路测试 ==========
+
+def test_sync_rag_pipeline():
+    """测试同步RAG链路"""
+    print("\n" + "="*60)
+    print("示例1: 同步RAG链路监控")
+    print("="*60)
+
+    # 开始追踪会话
+    trace_id = f"test_sync_{int(time.time() * 1000)}"
+    rag_monitor.start_trace(trace_id, metadata={
+        "test_type": "sync",
+        "description": "同步RAG链路测试"
+    })
+
+    try:
+        # Step 1: 查询提取
+        query_content = "请检查施工方案中的安全生产条件和施工管理制度是否符合规范要求。"
+        query_pairs = example_query_extract(query_content)
+        print(f"✅ 查询提取完成,提取到 {len(query_pairs)} 个查询对")
+
+        # Step 2: 向量检索
+        search_results = example_vector_search(query_pairs)
+        print(f"✅ 向量检索完成,找到 {len(search_results)} 个结果")
+
+        print(f"\n✅ 同步RAG链路测试完成")
+
+    finally:
+        # 结束追踪并保存
+        trace_data = rag_monitor.end_trace(trace_id)
+        print(f"\n📊 追踪数据已保存: temp/rag_monitoring/{trace_id}.json")
+        print(f"⏱️  总耗时: {trace_data['total_duration']}秒")
+        print(f"📝 步骤数量: {len(trace_data['steps'])}")
+
+
+async def test_async_rag_pipeline():
+    """测试异步RAG链路"""
+    print("\n" + "="*60)
+    print("示例2: 异步RAG链路监控")
+    print("="*60)
+
+    # 开始追踪会话
+    trace_id = f"test_async_{int(time.time() * 1000)}"
+    rag_monitor.start_trace(trace_id, metadata={
+        "test_type": "async",
+        "description": "异步RAG链路测试"
+    })
+
+    try:
+        # 模拟一些初始数据
+        initial_results = [
+            {"query": "安全", "doc_id": "doc_1", "score": 0.82, "content": "安全内容..."},
+            {"query": "施工", "doc_id": "doc_2", "score": 0.91, "content": "施工内容..."},
+            {"query": "管理", "doc_id": "doc_3", "score": 0.75, "content": "管理内容..."}
+        ]
+
+        # Step 1: 异步重排序
+        reranked_results = await example_async_rerank(initial_results)
+        print(f"✅ 重排序完成,保留前 {len(reranked_results)} 个结果")
+
+        # Step 2: 异步父文档增强
+        enhanced_results = await example_async_parent_enhance(reranked_results)
+        print(f"✅ 父文档增强完成,增强了 {len(enhanced_results)} 个结果")
+
+        print(f"\n✅ 异步RAG链路测试完成")
+
+    finally:
+        # 结束追踪并保存
+        trace_data = rag_monitor.end_trace(trace_id)
+        print(f"\n📊 追踪数据已保存: temp/rag_monitoring/{trace_id}.json")
+        print(f"⏱️  总耗时: {trace_data['total_duration']}秒")
+        print(f"📝 步骤数量: {len(trace_data['steps'])}")
+
+
+def test_mixed_rag_pipeline():
+    """测试混合(同步+异步)RAG链路"""
+    print("\n" + "="*60)
+    print("示例3: 混合RAG链路监控(同步+异步)")
+    print("="*60)
+
+    # 开始追踪会话
+    trace_id = f"test_mixed_{int(time.time() * 1000)}"
+    rag_monitor.start_trace(trace_id, metadata={
+        "test_type": "mixed",
+        "description": "混合RAG链路测试"
+    })
+
+    try:
+        # Step 1: 同步查询提取
+        query_content = "检查项目的环境保护措施和质量管理体系。"
+        query_pairs = example_query_extract(query_content)
+        print(f"✅ [同步] 查询提取完成")
+
+        # Step 2: 同步向量检索
+        search_results = example_vector_search(query_pairs)
+        print(f"✅ [同步] 向量检索完成")
+
+        # Step 3: 异步重排序
+        async def async_part():
+            reranked = await example_async_rerank(search_results)
+            print(f"✅ [异步] 重排序完成")
+
+            # Step 4: 异步父文档增强
+            enhanced = await example_async_parent_enhance(reranked)
+            print(f"✅ [异步] 父文档增强完成")
+            return enhanced
+
+        # 运行异步部分
+        final_results = asyncio.run(async_part())
+
+        print(f"\n✅ 混合RAG链路测试完成,最终得到 {len(final_results)} 个结果")
+
+    finally:
+        # 结束追踪并保存
+        trace_data = rag_monitor.end_trace(trace_id)
+        print(f"\n📊 追踪数据已保存: temp/rag_monitoring/{trace_id}.json")
+        print(f"⏱️  总耗时: {trace_data['total_duration']}秒")
+        print(f"📝 步骤数量: {len(trace_data['steps'])}")
+
+
+# ========== 示例4: 自定义输入输出转换 ==========
+
+@rag_monitor.monitor_step(
+    step_name="example_sensitive_data",
+    capture_input=True,
+    capture_output=True,
+    input_transform=lambda x: {
+        # 过滤敏感信息,只保留统计数据
+        "user_id": "***",  # 隐藏用户ID
+        "data_length": len(str(x))
+    },
+    output_transform=lambda x: {
+        # 只保留关键指标
+        "success": x.get("success"),
+        "count": x.get("count")
+    }
+)
+def example_process_sensitive_data(user_id: str, data: dict):
+    """示例:处理敏感数据(自定义转换)"""
+    time.sleep(0.3)
+    return {
+        "success": True,
+        "user_id": user_id,
+        "count": len(data),
+        "details": data  # 这些详细信息不会被记录
+    }
+
+
+def test_custom_transform():
+    """测试自定义输入输出转换"""
+    print("\n" + "="*60)
+    print("示例4: 自定义输入输出转换(敏感数据保护)")
+    print("="*60)
+
+    trace_id = f"test_transform_{int(time.time() * 1000)}"
+    rag_monitor.start_trace(trace_id, metadata={
+        "test_type": "custom_transform"
+    })
+
+    try:
+        result = example_process_sensitive_data(
+            user_id="user_12345",
+            data={"key1": "value1", "key2": "value2"}
+        )
+        print(f"✅ 处理完成,成功: {result['success']}")
+        print(f"ℹ️  敏感信息已被过滤,只记录统计数据")
+
+    finally:
+        trace_data = rag_monitor.end_trace(trace_id)
+        print(f"\n📊 追踪数据已保存: temp/rag_monitoring/{trace_id}.json")
+
+
+# ========== 查看监控结果 ==========
+
+def view_trace_result(trace_id: str):
+    """查看追踪结果"""
+    file_path = Path("temp/rag_monitoring") / f"{trace_id}.json"
+
+    if file_path.exists():
+        print(f"\n📄 追踪结果: {trace_id}")
+        print("="*60)
+
+        with open(file_path, 'r', encoding='utf-8') as f:
+            data = json.load(f)
+
+        print(f"⏱️  总耗时: {data.get('total_duration')}秒")
+        print(f"📝 步骤数量: {len(data.get('steps', {}))}")
+        print(f"\n步骤详情:")
+
+        for step_name, step_data in data.get('steps', {}).items():
+            print(f"\n  [{step_data.get('status', 'unknown').upper()}] {step_name}")
+            print(f"    函数: {step_data.get('function_name')}")
+            print(f"    耗时: {step_data.get('duration')}秒")
+
+            if step_data.get('status') == 'error':
+                print(f"    ❌ 错误: {step_data.get('error', {}).get('message')}")
+    else:
+        print(f"❌ 找不到追踪文件: {file_path}")
+
+
+# ========== 主函数 ==========
+
+def main():
+    """运行所有测试示例"""
+    print("\n" + "🚀 RAG监控装饰器测试 🚀".center(60, "="))
+
+    try:
+        # 示例1: 同步RAG链路
+        test_sync_rag_pipeline()
+
+        # 示例2: 异步RAG链路
+        asyncio.run(test_async_rag_pipeline())
+
+        # 示例3: 混合RAG链路
+        test_mixed_rag_pipeline()
+
+        # 示例4: 自定义转换
+        test_custom_transform()
+
+        print("\n" + "✅ 所有测试完成!".center(60, "="))
+        print(f"\n💡 提示: 查看监控数据文件在: temp/rag_monitoring/")
+        print(f"💡 提示: 每个trace_id对应一个JSON文件,包含完整的执行链路信息")
+
+    except Exception as e:
+        print(f"\n❌ 测试失败: {e}")
+        import traceback
+        traceback.print_exc()
+
+
+if __name__ == "__main__":
+    main()