ソースを参照

dev:目录审查模块的j完成主体框架对接;

ChenJiSheng 1 ヶ月 前
コミット
7dc5817b92

+ 20 - 20
core/construction_review/component/ai_review_engine.py

@@ -1044,26 +1044,26 @@ class AIReviewEngine(BaseReviewer):
                 else:
                     logger.warning(f"[大纲审查] 未能将章节 '{chapter_label}' 的数据写回Redis")
             
-            if os.path.exists(path_redis):
-                # 文件已存在,追加时不写表头
-                redis_data.to_csv(path_redis, mode='a', encoding='utf-8-sig', index=False, header=False)
-                # df.to_csv(csv_path, mode='a', encoding='utf-8-sig', index=False, header=False)
-
-            else:
-                # 文件不存在,首次写入时写表头
-                redis_data.to_csv(path_redis, mode='w', encoding='utf-8-sig', index=False, header=True)
-
-
-            # 判断文件是否存在,决定是否写入表头
-            if os.path.exists(csv_path):
-                # 文件已存在,追加时不写表头
-                miss_outline_df.to_csv(csv_path, mode='a', encoding='utf-8-sig', index=False, header=False)
-                # df.to_csv(csv_path, mode='a', encoding='utf-8-sig', index=False, header=False)
-
-            else:
-                # 文件不存在,首次写入时写表头
-                miss_outline_df.to_csv(csv_path, mode='w', encoding='utf-8-sig', index=False, header=True)
-                # df.to_csv(csv_path, mode='w', encoding='utf-8-sig', index=False, header=True)
+            # if os.path.exists(path_redis):
+            #     # 文件已存在,追加时不写表头
+            #     redis_data.to_csv(path_redis, mode='a', encoding='utf-8-sig', index=False, header=False)
+            #     # df.to_csv(csv_path, mode='a', encoding='utf-8-sig', index=False, header=False)
+
+            # else:
+            #     # 文件不存在,首次写入时写表头
+            #     redis_data.to_csv(path_redis, mode='w', encoding='utf-8-sig', index=False, header=True)
+
+
+            # # 判断文件是否存在,决定是否写入表头
+            # if os.path.exists(csv_path):
+            #     # 文件已存在,追加时不写表头
+            #     miss_outline_df.to_csv(csv_path, mode='a', encoding='utf-8-sig', index=False, header=False)
+            #     # df.to_csv(csv_path, mode='a', encoding='utf-8-sig', index=False, header=False)
+
+            # else:
+            #     # 文件不存在,首次写入时写表头
+            #     miss_outline_df.to_csv(csv_path, mode='w', encoding='utf-8-sig', index=False, header=True)
+            #     # df.to_csv(csv_path, mode='w', encoding='utf-8-sig', index=False, header=True)
             
             # df['']
             # 检查 df 是否为 None

+ 108 - 0
core/construction_review/component/reviewers/catalogues_check/catalogues_check.py

@@ -7,6 +7,7 @@ import json
 import csv
 import logging
 import ast
+import time
 from pathlib import Path
 from typing import Dict, List, Any
 import sys
@@ -439,6 +440,113 @@ def remove_common_elements_between_dataframes(
     return miss_outline_df, redis_data
 
 
+def process_catalog_review_list(catogues_df: pd.DataFrame) -> List[Dict[str, Any]]:
+    """
+    处理目录审查列表,从DataFrame中提取missing_items和miss_outline并生成审查项
+    
+    Args:
+        catogues_df: 包含目录审查数据的DataFrame,需要包含以下列:
+            - title: 标题
+            - chapter_label: 章节标签
+            - chapter_classification: 章节分类
+            - missing_items: 目录缺失项(列表或字符串)
+            - miss_outline: 大纲缺失项(列表或字符串)
+    
+    Returns:
+        List[Dict[str, Any]]: 审查项列表,每个项包含:
+            - issue_point: 问题点
+            - location: 位置
+            - suggestion: 建议
+            - reason: 原因
+            - risk_level: 风险等级
+            - reference_source: 参考来源
+    """
+    start_time = time.time()
+    catogues_reciew_list = []
+    
+    for index, row in catogues_df.iterrows():
+        title = row.get('title', '')
+        chapter_label = row.get('chapter_label', '')
+        chapter_classification = row.get('chapter_classification', '')
+        
+        # 解析 missing_items 列(目录缺失)
+        missing_items_str = row.get('missing_items', '')
+        try:
+            if pd.isna(missing_items_str) or missing_items_str == '':
+                missing_items_list = []
+            elif isinstance(missing_items_str, list):
+                missing_items_list = missing_items_str
+            else:
+                # 尝试使用 ast.literal_eval 解析
+                missing_items_list = ast.literal_eval(missing_items_str)
+        except (ValueError, SyntaxError):
+            try:
+                # 尝试使用 json.loads 解析
+                missing_items_list = json.loads(missing_items_str)
+            except (json.JSONDecodeError, TypeError):
+                logger.warning(f"第 {index} 行无法解析missing_items: {missing_items_str}")
+                missing_items_list = []
+        
+        # 解析 miss_outline 列(大纲缺失)
+        miss_outline_str = row.get('miss_outline', '')
+        try:
+            if pd.isna(miss_outline_str) or miss_outline_str == '':
+                miss_outline_list = []
+            elif isinstance(miss_outline_str, list):
+                miss_outline_list = miss_outline_str
+            else:
+                # 尝试使用 ast.literal_eval 解析
+                miss_outline_list = ast.literal_eval(miss_outline_str)
+        except (ValueError, SyntaxError):
+            try:
+                # 尝试使用 json.loads 解析
+                miss_outline_list = json.loads(miss_outline_str)
+            except (json.JSONDecodeError, TypeError):
+                logger.warning(f"第 {index} 行无法解析miss_outline: {miss_outline_str}")
+                miss_outline_list = []
+        
+        # 处理 missing_items(目录缺失)
+        if isinstance(missing_items_list, list) and len(missing_items_list) > 0:
+            for missing_item in missing_items_list:
+                catalog_item = {
+                    "issue_point": f"{missing_item}缺失",
+                    "location": title if title else chapter_label,
+                    "suggestion": f"目录缺失(missing_items):在待审查目录中未找到与'{missing_item}'对应的章节;当前章节仅涉及'{title if title else chapter_label}',未涵盖'{missing_item}'相关内容;整改建议:建议在本章或前序章节中增设'{missing_item}'相关内容,确保与审查规范要求一致。",
+                    "reason": "",
+                    "risk_level": "高风险",
+                    "reference_source": '《桥梁公司危险性较大工程管理实施细则(2025版)》',
+                }
+                catogues_reciew_list.append(catalog_item)
+        
+        # 处理 miss_outline(大纲缺失)
+        if isinstance(miss_outline_list, list) and len(miss_outline_list) > 0:
+            for miss_outline in miss_outline_list:
+                outline_item = {
+                    "issue_point": f"{miss_outline}缺失",
+                    "location": title if title else chapter_label,
+                    "suggestion": f"大纲缺失(miss_outline):在待审查大纲中未找到与'{miss_outline}'对应的章节;当前章节仅涉及'{title if title else chapter_label}',未涵盖'{miss_outline}'相关内容;整改建议:建议在本章或前序章节中增设'{miss_outline}'相关内容,确保与审查规范要求一致。",
+                    "reason": "",
+                    "risk_level": "高风险",
+                    "reference_source": '《桥梁公司危险性较大工程管理实施细则(2025版)》',
+                }
+                catogues_reciew_list.append(outline_item)
+    
+    execution_time = time.time() - start_time
+    logger.info(f"目录审查完成,共发现 {len(catogues_reciew_list)} 个缺失项,耗时 {execution_time:.2f} 秒")
+    
+    return  {
+                "details": {
+                    "name": "completeness_check",
+                    "response": catogues_reciew_list,
+                    "review_location_label": "",
+                    "chapter_code": "catalogue",
+                    "original_content": ""
+                },
+                "success": False,
+                "execution_time": execution_time
+            }
+
+
 async def catalogues_check(catalog_file = None):
     """主函数"""
     # 获取当前文件所在目录

+ 6 - 1
core/construction_review/component/reviewers/catalogues_check/utils/redis_utils.py

@@ -97,7 +97,7 @@ class CataloguesRedisManager:
         logger.info(f"数据存储完成,共 {len(rows)} 行")
         return len(rows)
 
-    def read_all(self, task_id: str) -> pd.DataFrame:
+    def read_all(self, task_id: str, flag = None) -> pd.DataFrame:
         """
         读取所有数据
 
@@ -130,6 +130,11 @@ class CataloguesRedisManager:
                 rows.append(row)
 
         logger.info(f"读取完成,共 {len(rows)} 行")
+        # if flag==True:
+        #     # 读取成功后清空 Redis 中的数据
+        #     redis_client.delete(data_key)
+        #     logger.info(f"已清空 Redis 中 task_id '{task_id}' 的数据")
+
         return pd.DataFrame(rows)
 
     def read_by_title(self, task_id: str, title: str) -> Optional[Dict[str, Any]]:

+ 18 - 2
core/construction_review/workflows/core_functions/ai_review_core_fun.py

@@ -34,10 +34,16 @@ AI审查核心功能类 - 负责具体的审查逻辑和数据处理
 
 import asyncio
 import random
+import json
+import ast
+import time
+import pandas as pd
 from typing import Dict, Union, List, Any, Optional, Tuple
 from dataclasses import dataclass
 from langchain_core.messages import AIMessage
 
+from core.construction_review.component.reviewers.catalogues_check.utils.redis_utils import get_redis_manager
+from core.construction_review.component.reviewers.catalogues_check.catalogues_check import process_catalog_review_list
 from core.construction_review.component.reviewers.utils import directory_extraction
 from foundation.observability.logger.loggering import server_logger as logger
 from foundation.infrastructure.cache.redis_connection import RedisConnectionFactory
@@ -429,14 +435,24 @@ class AIReviewCoreFun:
                 stage_name=f"{stage_name}_大纲审查"
             )
             logger.info(f"[outline_check完成] 共发现 {len(outline_result.get('details', {}).get('response', []))} 个缺失项")
-
+            redis_manager = get_redis_manager()
+            catogues_df = redis_manager.read_all(task_id=state['callback_task_id'])
+            
+            # 使用封装的函数处理目录审查列表
+            catogues_reciew_result = process_catalog_review_list(catogues_df)
+            
+            # 保存结果到CSV文件
+            catogues_df.to_csv('temp/document_temp/catogues_df.csv', encoding='utf-8-sig', index=False)
+            # pd.DataFrame(catogues_reciew_list).to_csv('temp/document_temp/catogues_df_with_review.csv', encoding='utf-8-sig', index=False)
+            # with open('temp/document_temp/catogues_result.json', 'a', encoding='utf-8') as f:
+            #     json.dump(catogues_reciew_result, f, ensure_ascii=False, indent=4)
             # 对比逻辑
             if chapter_code == "catalogue" and func_name == "catalogue_check":
                     return UnitReviewResult(
                         unit_index=chunk_index,
                         unit_content=chunk,
                         basic_compliance={
-                            "catalogue_check": catalogue_result
+                            "catalogue_check": catogues_reciew_result
                         },
                         technical_compliance={},
                         rag_enhanced={},