Просмотр исходного кода

dev:目录审查模块的j完成主体框架对接;

ChenJiSheng 1 месяц назад
Родитель
Сommit
7dc5817b92

+ 20 - 20
core/construction_review/component/ai_review_engine.py

@@ -1044,26 +1044,26 @@ class AIReviewEngine(BaseReviewer):
                 else:
                 else:
                     logger.warning(f"[大纲审查] 未能将章节 '{chapter_label}' 的数据写回Redis")
                     logger.warning(f"[大纲审查] 未能将章节 '{chapter_label}' 的数据写回Redis")
             
             
-            if os.path.exists(path_redis):
-                # 文件已存在,追加时不写表头
-                redis_data.to_csv(path_redis, mode='a', encoding='utf-8-sig', index=False, header=False)
-                # df.to_csv(csv_path, mode='a', encoding='utf-8-sig', index=False, header=False)
-
-            else:
-                # 文件不存在,首次写入时写表头
-                redis_data.to_csv(path_redis, mode='w', encoding='utf-8-sig', index=False, header=True)
-
-
-            # 判断文件是否存在,决定是否写入表头
-            if os.path.exists(csv_path):
-                # 文件已存在,追加时不写表头
-                miss_outline_df.to_csv(csv_path, mode='a', encoding='utf-8-sig', index=False, header=False)
-                # df.to_csv(csv_path, mode='a', encoding='utf-8-sig', index=False, header=False)
-
-            else:
-                # 文件不存在,首次写入时写表头
-                miss_outline_df.to_csv(csv_path, mode='w', encoding='utf-8-sig', index=False, header=True)
-                # df.to_csv(csv_path, mode='w', encoding='utf-8-sig', index=False, header=True)
+            # if os.path.exists(path_redis):
+            #     # 文件已存在,追加时不写表头
+            #     redis_data.to_csv(path_redis, mode='a', encoding='utf-8-sig', index=False, header=False)
+            #     # df.to_csv(csv_path, mode='a', encoding='utf-8-sig', index=False, header=False)
+
+            # else:
+            #     # 文件不存在,首次写入时写表头
+            #     redis_data.to_csv(path_redis, mode='w', encoding='utf-8-sig', index=False, header=True)
+
+
+            # # 判断文件是否存在,决定是否写入表头
+            # if os.path.exists(csv_path):
+            #     # 文件已存在,追加时不写表头
+            #     miss_outline_df.to_csv(csv_path, mode='a', encoding='utf-8-sig', index=False, header=False)
+            #     # df.to_csv(csv_path, mode='a', encoding='utf-8-sig', index=False, header=False)
+
+            # else:
+            #     # 文件不存在,首次写入时写表头
+            #     miss_outline_df.to_csv(csv_path, mode='w', encoding='utf-8-sig', index=False, header=True)
+            #     # df.to_csv(csv_path, mode='w', encoding='utf-8-sig', index=False, header=True)
             
             
             # df['']
             # df['']
             # 检查 df 是否为 None
             # 检查 df 是否为 None

+ 108 - 0
core/construction_review/component/reviewers/catalogues_check/catalogues_check.py

@@ -7,6 +7,7 @@ import json
 import csv
 import csv
 import logging
 import logging
 import ast
 import ast
+import time
 from pathlib import Path
 from pathlib import Path
 from typing import Dict, List, Any
 from typing import Dict, List, Any
 import sys
 import sys
@@ -439,6 +440,113 @@ def remove_common_elements_between_dataframes(
     return miss_outline_df, redis_data
     return miss_outline_df, redis_data
 
 
 
 
+def process_catalog_review_list(catogues_df: pd.DataFrame) -> List[Dict[str, Any]]:
+    """
+    处理目录审查列表,从DataFrame中提取missing_items和miss_outline并生成审查项
+    
+    Args:
+        catogues_df: 包含目录审查数据的DataFrame,需要包含以下列:
+            - title: 标题
+            - chapter_label: 章节标签
+            - chapter_classification: 章节分类
+            - missing_items: 目录缺失项(列表或字符串)
+            - miss_outline: 大纲缺失项(列表或字符串)
+    
+    Returns:
+        List[Dict[str, Any]]: 审查项列表,每个项包含:
+            - issue_point: 问题点
+            - location: 位置
+            - suggestion: 建议
+            - reason: 原因
+            - risk_level: 风险等级
+            - reference_source: 参考来源
+    """
+    start_time = time.time()
+    catogues_reciew_list = []
+    
+    for index, row in catogues_df.iterrows():
+        title = row.get('title', '')
+        chapter_label = row.get('chapter_label', '')
+        chapter_classification = row.get('chapter_classification', '')
+        
+        # 解析 missing_items 列(目录缺失)
+        missing_items_str = row.get('missing_items', '')
+        try:
+            if pd.isna(missing_items_str) or missing_items_str == '':
+                missing_items_list = []
+            elif isinstance(missing_items_str, list):
+                missing_items_list = missing_items_str
+            else:
+                # 尝试使用 ast.literal_eval 解析
+                missing_items_list = ast.literal_eval(missing_items_str)
+        except (ValueError, SyntaxError):
+            try:
+                # 尝试使用 json.loads 解析
+                missing_items_list = json.loads(missing_items_str)
+            except (json.JSONDecodeError, TypeError):
+                logger.warning(f"第 {index} 行无法解析missing_items: {missing_items_str}")
+                missing_items_list = []
+        
+        # 解析 miss_outline 列(大纲缺失)
+        miss_outline_str = row.get('miss_outline', '')
+        try:
+            if pd.isna(miss_outline_str) or miss_outline_str == '':
+                miss_outline_list = []
+            elif isinstance(miss_outline_str, list):
+                miss_outline_list = miss_outline_str
+            else:
+                # 尝试使用 ast.literal_eval 解析
+                miss_outline_list = ast.literal_eval(miss_outline_str)
+        except (ValueError, SyntaxError):
+            try:
+                # 尝试使用 json.loads 解析
+                miss_outline_list = json.loads(miss_outline_str)
+            except (json.JSONDecodeError, TypeError):
+                logger.warning(f"第 {index} 行无法解析miss_outline: {miss_outline_str}")
+                miss_outline_list = []
+        
+        # 处理 missing_items(目录缺失)
+        if isinstance(missing_items_list, list) and len(missing_items_list) > 0:
+            for missing_item in missing_items_list:
+                catalog_item = {
+                    "issue_point": f"{missing_item}缺失",
+                    "location": title if title else chapter_label,
+                    "suggestion": f"目录缺失(missing_items):在待审查目录中未找到与'{missing_item}'对应的章节;当前章节仅涉及'{title if title else chapter_label}',未涵盖'{missing_item}'相关内容;整改建议:建议在本章或前序章节中增设'{missing_item}'相关内容,确保与审查规范要求一致。",
+                    "reason": "",
+                    "risk_level": "高风险",
+                    "reference_source": '《桥梁公司危险性较大工程管理实施细则(2025版)》',
+                }
+                catogues_reciew_list.append(catalog_item)
+        
+        # 处理 miss_outline(大纲缺失)
+        if isinstance(miss_outline_list, list) and len(miss_outline_list) > 0:
+            for miss_outline in miss_outline_list:
+                outline_item = {
+                    "issue_point": f"{miss_outline}缺失",
+                    "location": title if title else chapter_label,
+                    "suggestion": f"大纲缺失(miss_outline):在待审查大纲中未找到与'{miss_outline}'对应的章节;当前章节仅涉及'{title if title else chapter_label}',未涵盖'{miss_outline}'相关内容;整改建议:建议在本章或前序章节中增设'{miss_outline}'相关内容,确保与审查规范要求一致。",
+                    "reason": "",
+                    "risk_level": "高风险",
+                    "reference_source": '《桥梁公司危险性较大工程管理实施细则(2025版)》',
+                }
+                catogues_reciew_list.append(outline_item)
+    
+    execution_time = time.time() - start_time
+    logger.info(f"目录审查完成,共发现 {len(catogues_reciew_list)} 个缺失项,耗时 {execution_time:.2f} 秒")
+    
+    return  {
+                "details": {
+                    "name": "completeness_check",
+                    "response": catogues_reciew_list,
+                    "review_location_label": "",
+                    "chapter_code": "catalogue",
+                    "original_content": ""
+                },
+                "success": False,
+                "execution_time": execution_time
+            }
+
+
 async def catalogues_check(catalog_file = None):
 async def catalogues_check(catalog_file = None):
     """主函数"""
     """主函数"""
     # 获取当前文件所在目录
     # 获取当前文件所在目录

+ 6 - 1
core/construction_review/component/reviewers/catalogues_check/utils/redis_utils.py

@@ -97,7 +97,7 @@ class CataloguesRedisManager:
         logger.info(f"数据存储完成,共 {len(rows)} 行")
         logger.info(f"数据存储完成,共 {len(rows)} 行")
         return len(rows)
         return len(rows)
 
 
-    def read_all(self, task_id: str) -> pd.DataFrame:
+    def read_all(self, task_id: str, flag = None) -> pd.DataFrame:
         """
         """
         读取所有数据
         读取所有数据
 
 
@@ -130,6 +130,11 @@ class CataloguesRedisManager:
                 rows.append(row)
                 rows.append(row)
 
 
         logger.info(f"读取完成,共 {len(rows)} 行")
         logger.info(f"读取完成,共 {len(rows)} 行")
+        # if flag==True:
+        #     # 读取成功后清空 Redis 中的数据
+        #     redis_client.delete(data_key)
+        #     logger.info(f"已清空 Redis 中 task_id '{task_id}' 的数据")
+
         return pd.DataFrame(rows)
         return pd.DataFrame(rows)
 
 
     def read_by_title(self, task_id: str, title: str) -> Optional[Dict[str, Any]]:
     def read_by_title(self, task_id: str, title: str) -> Optional[Dict[str, Any]]:

+ 18 - 2
core/construction_review/workflows/core_functions/ai_review_core_fun.py

@@ -34,10 +34,16 @@ AI审查核心功能类 - 负责具体的审查逻辑和数据处理
 
 
 import asyncio
 import asyncio
 import random
 import random
+import json
+import ast
+import time
+import pandas as pd
 from typing import Dict, Union, List, Any, Optional, Tuple
 from typing import Dict, Union, List, Any, Optional, Tuple
 from dataclasses import dataclass
 from dataclasses import dataclass
 from langchain_core.messages import AIMessage
 from langchain_core.messages import AIMessage
 
 
+from core.construction_review.component.reviewers.catalogues_check.utils.redis_utils import get_redis_manager
+from core.construction_review.component.reviewers.catalogues_check.catalogues_check import process_catalog_review_list
 from core.construction_review.component.reviewers.utils import directory_extraction
 from core.construction_review.component.reviewers.utils import directory_extraction
 from foundation.observability.logger.loggering import server_logger as logger
 from foundation.observability.logger.loggering import server_logger as logger
 from foundation.infrastructure.cache.redis_connection import RedisConnectionFactory
 from foundation.infrastructure.cache.redis_connection import RedisConnectionFactory
@@ -429,14 +435,24 @@ class AIReviewCoreFun:
                 stage_name=f"{stage_name}_大纲审查"
                 stage_name=f"{stage_name}_大纲审查"
             )
             )
             logger.info(f"[outline_check完成] 共发现 {len(outline_result.get('details', {}).get('response', []))} 个缺失项")
             logger.info(f"[outline_check完成] 共发现 {len(outline_result.get('details', {}).get('response', []))} 个缺失项")
-
+            redis_manager = get_redis_manager()
+            catogues_df = redis_manager.read_all(task_id=state['callback_task_id'])
+            
+            # 使用封装的函数处理目录审查列表
+            catogues_reciew_result = process_catalog_review_list(catogues_df)
+            
+            # 保存结果到CSV文件
+            catogues_df.to_csv('temp/document_temp/catogues_df.csv', encoding='utf-8-sig', index=False)
+            # pd.DataFrame(catogues_reciew_list).to_csv('temp/document_temp/catogues_df_with_review.csv', encoding='utf-8-sig', index=False)
+            # with open('temp/document_temp/catogues_result.json', 'a', encoding='utf-8') as f:
+            #     json.dump(catogues_reciew_result, f, ensure_ascii=False, indent=4)
             # 对比逻辑
             # 对比逻辑
             if chapter_code == "catalogue" and func_name == "catalogue_check":
             if chapter_code == "catalogue" and func_name == "catalogue_check":
                     return UnitReviewResult(
                     return UnitReviewResult(
                         unit_index=chunk_index,
                         unit_index=chunk_index,
                         unit_content=chunk,
                         unit_content=chunk,
                         basic_compliance={
                         basic_compliance={
-                            "catalogue_check": catalogue_result
+                            "catalogue_check": catogues_reciew_result
                         },
                         },
                         technical_compliance={},
                         technical_compliance={},
                         rag_enhanced={},
                         rag_enhanced={},