Преглед на файлове

dev:添加了大纲审查的内存初版完成主框架接入;

ChenJiSheng преди 1 месец
родител
ревизия
44122e5f46

+ 42 - 8
core/construction_review/component/ai_review_engine.py

@@ -91,6 +91,15 @@ from .reviewers.check_completeness.components.result_analyzer import ResultAnaly
 from .reviewers.check_completeness.utils.file_utils import write_json
 from core.construction_review.component.reviewers.base_reviewer import ReviewResult
 from .reviewers.outline_check import outline_review_results_df
+from .reviewers.check_completeness.utils.redis_csv_utils import (
+    df_store_to_redis,
+    get_redis_connection,
+    main,
+    display_redis_data,
+    store_header_to_redis,
+    read_from_redis_and_save_csv,
+    store_row_to_redis,
+)
 @dataclass
 class ReviewResult:
     """审查结果"""
@@ -158,6 +167,7 @@ class AIReviewEngine(BaseReviewer):
         self.outline_reviewer = OutlineReviewer()
 
         self.milvus = MilvusManager(MilvusConfig())
+        self.redis_client = get_redis_connection()   # 获取Redis连接
 
     def _process_review_result(self, result):
         """
@@ -744,11 +754,20 @@ class AIReviewEngine(BaseReviewer):
             review_results_df = pd.DataFrame(review_results)
             chapter_labels = review_results_df['section_label'].str.split('->').str[0]
             review_results_df['title'] = chapter_labels
-            review_results_df.to_csv(Path('temp') / 'document_temp' / '2_spec_review_results.csv', encoding='utf-8-sig', index=False)
-            csv_file = rf'temp\document_temp\2_spec_review_results.csv'
-            path2 = rf'temp\document_temp\outlines_review_results.csv'
-            data_df = pd.read_csv(csv_file, encoding='utf-8-sig')
-            outline_review_results_df(data=data_df, path=path2)
+            # review_results_df.to_csv(Path('temp') / 'document_temp' / '2_spec_review_results.csv', encoding='utf-8-sig', index=False)
+            # csv_file = rf'temp\document_temp\2_spec_review_results.csv'
+            # path2 = rf'temp\document_temp\outlines_review_results.csv'
+            # data_df = pd.read_csv(csv_file, encoding='utf-8-sig')
+            # data_df = review_results_df
+            outline_review_results = outline_review_results_df(data=review_results_df)
+            
+            logger.info(f"[完整性检查] 准备将大纲审查结果存储到Redis,bind_id: {trace_id_idx}")
+            logger.info(f"[完整性检查] 大纲审查结果行数: {len(outline_review_results) if outline_review_results is not None else 'None'}")
+
+            df_store_to_redis(self.redis_client, data=outline_review_results, bind_id=trace_id_idx)
+            
+            logger.info(f"[完整性检查] 数据已成功存储到Redis,bind_id: {trace_id_idx}")
+            
             df_filtered = review_results_df.drop_duplicates(subset='title', keep='first').reset_index(drop=True)
             unique_chapter_labels = chapter_labels.unique().tolist()
             chapter_classifications = df_filtered['chapter_classification']
@@ -969,9 +988,21 @@ class AIReviewEngine(BaseReviewer):
         missing_items = []
         
         try:
-            # 读取CSV文件
-            df = pd.read_csv(csv_path, encoding='utf-8-sig')
-            logger.info(f"成功读取CSV文件: {csv_path}, 共 {len(df)} 行")
+            # 从Redis读取并保存为新的CSV文件
+            rows_df = read_from_redis_and_save_csv(self.redis_client, bind_id=trace_id_idx)
+            df = rows_df
+            
+            # 检查 df 是否为 None
+            if df is None:
+                logger.error(f"[大纲审查] Redis中不存在ID '{trace_id_idx}' 的数据,无法进行大纲审查")
+                return {
+                    'outline_review_result': {
+                        "response": [],
+                    },
+                    'error': f'Redis中不存在ID \'{trace_id_idx}\' 的数据'
+                }
+            
+            logger.info(f"[大纲审查] 成功从Redis读取数据,共 {len(df)} 行")
             
             # 兼容新旧字段名
             chapter_label_col = 'chapter_label' if 'chapter_label' in df.columns else 'section_label_first'
@@ -986,6 +1017,9 @@ class AIReviewEngine(BaseReviewer):
                 try:
                     if pd.isna(merged_results_str) or merged_results_str == '':
                         merged_results = {}
+                    elif isinstance(merged_results_str, dict):
+                        # 如果已经是字典,直接使用
+                        merged_results = merged_results_str
                     else:
                         # 尝试使用ast.literal_eval解析
                         merged_results = ast.literal_eval(merged_results_str)

+ 243 - 0
core/construction_review/component/reviewers/check_completeness/utils/redis_csv_utils.py

@@ -0,0 +1,243 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Redis CSV处理器
+功能:读取CSV文件,存入Redis,然后一次性读取并保存为新CSV文件
+"""
+
+import csv
+import pandas as pd
+import redis
+import json
+import configparser
+import os
+
+from foundation.observability.logger.loggering import server_logger as logger
+
+# 从config.ini读取Redis配置
+config = configparser.ConfigParser()
+config_path = os.path.join(os.path.dirname(__file__), '../../../../../../config/config.ini')
+config.read(config_path, encoding='utf-8')
+
+# Redis配置
+REDIS_HOST = config.get('redis', 'REDIS_HOST', fallback='localhost')
+REDIS_PORT = config.getint('redis', 'REDIS_PORT', fallback=6379)
+REDIS_PASSWORD = config.get('redis', 'REDIS_PASSWORD', fallback='')
+REDIS_DB = config.getint('redis', 'REDIS_DB', fallback=0)
+
+# 绑定ID
+BIND_ID = '2d5d99c823a6b1a19f770932f3237bf8-1768535328'
+
+# 文件路径
+INPUT_CSV = 'outlines_review_results.csv'
+OUTPUT_CSV = 'outlines_review_results_redis.csv'
+
+
+def get_redis_connection():
+    """获取Redis连接"""
+    try:
+        r = redis.Redis(
+            host=REDIS_HOST,
+            port=REDIS_PORT,
+            password=REDIS_PASSWORD,
+            db=REDIS_DB,
+            decode_responses=True
+        )
+        # 测试连接
+        r.ping()
+        logger.info(f"[OK] Redis连接成功 (host={REDIS_HOST}, port={REDIS_PORT})")
+        return r
+    except Exception as e:
+        logger.info(f"[ERROR] Redis连接失败: {e}")
+        raise
+
+
+def store_row_to_redis(redis_client, bind_id, row_key, row_data):
+    """将单行数据存储到Redis"""
+    row_json = json.dumps(row_data, ensure_ascii=False)
+    redis_client.hset(bind_id, row_key, row_json)
+
+
+def store_header_to_redis(redis_client, bind_id, header):
+    """将表头存储到Redis"""
+    header_json = json.dumps(header, ensure_ascii=False)
+    redis_client.hset(bind_id, "header", header_json)
+
+
+def df_store_to_redis(redis_client, data=None, bind_id=None):
+    """读取CSV文件并按行存入Redis
+    
+    Args:
+        redis_client: Redis客户端
+        data: DataFrame数据(可选),如果为None则从CSV文件读取
+        bind_id: 绑定ID
+    
+    Returns:
+        list: 存储的行数据列表
+    """
+    logger.info(f"\n[Redis存储] 开始存储数据到Redis,bind_id: {bind_id}")
+    
+    # 调试信息:记录Redis连接信息
+    try:
+        logger.info(f"[DEBUG] Redis连接信息: host={redis_client.connection_pool.connection_kwargs.get('host')}, "
+                   f"port={redis_client.connection_pool.connection_kwargs.get('port')}, "
+                   f"db={redis_client.connection_pool.connection_kwargs.get('db')}")
+    except Exception as e:
+        logger.warning(f"[DEBUG] 无法获取Redis连接信息: {e}")
+    
+    if data is None:
+        # 使用pandas读取CSV文件
+        logger.info(f"[Redis存储] 从CSV文件读取数据: {INPUT_CSV}")
+        df = pd.read_csv(INPUT_CSV, encoding='utf-8-sig')
+        # header = df.columns.tolist()
+        rows = df.to_dict('records')
+        
+        logger.info(f"[OK] 读取到 {len(rows)} 行数据")
+        # logger.info(f"[OK] CSV表头: {header}")
+    else:
+        logger.info(f"[Redis存储] 使用传入的DataFrame数据,共 {len(data)} 行")
+        rows = data.to_dict('records')
+    
+    # 清空Redis中该ID的数据
+    redis_client.delete(bind_id)
+    logger.info(f"[OK] 清空Redis中ID '{bind_id}' 的旧数据")
+    
+    # 按行循环存入Redis(使用Hash结构)
+    for idx, row in enumerate(rows, start=1):
+        store_row_to_redis(redis_client, bind_id, f"row_{idx}", row)
+    
+    # # 存储表头
+    # store_header_to_redis(redis_client, BIND_ID)
+    
+    # 存储行数
+    redis_client.hset(bind_id, "row_count", len(rows))
+    
+    logger.info(f"[OK] 成功将 {len(rows)} 行数据存入Redis")
+    logger.info(f"[OK] Redis Key: {bind_id}")
+    logger.info(f"[Redis存储] 数据存储完成")
+    
+    return rows
+
+
+def read_from_redis_and_save_csv(redis_client, bind_id=None, csv_save_path=None):
+    """从Redis一次性读取所有数据并保存为CSV文件
+    
+    Args:
+        redis_client: Redis客户端
+        bind_id: 绑定ID
+        csv_save_path: CSV文件保存路径(可选)
+    
+    Returns:
+        pandas.DataFrame: 包含数据的DataFrame,如果Redis中不存在数据则返回空DataFrame
+    """
+    logger.info(f"\n从Redis读取数据 (ID: {bind_id})")
+    
+    # 调试信息:记录Redis连接信息
+    try:
+        logger.info(f"[DEBUG] Redis连接信息: host={redis_client.connection_pool.connection_kwargs.get('host')}, "
+                   f"port={redis_client.connection_pool.connection_kwargs.get('port')}, "
+                   f"db={redis_client.connection_pool.connection_kwargs.get('db')}")
+    except Exception as e:
+        logger.warning(f"[DEBUG] 无法获取Redis连接信息: {e}")
+    
+    # 调试信息:检查键是否存在
+    key_exists = redis_client.exists(bind_id)
+    logger.info(f"[DEBUG] Redis键 '{bind_id}' 存在状态: {key_exists}")
+    
+    # 调试信息:列出所有匹配的键
+    try:
+        all_keys = redis_client.keys(f"*{bind_id}*")
+        logger.info(f"[DEBUG] Redis中匹配的键: {all_keys}")
+    except Exception as e:
+        logger.warning(f"[DEBUG] 无法列出Redis键: {e}")
+    
+    # 检查数据是否存在
+    if not key_exists:
+        logger.warning(f"[WARN] Redis中不存在ID '{bind_id}' 的数据,返回空DataFrame")
+        return pd.DataFrame()  # 返回空DataFrame而不是None
+    
+    # # 获取表头
+    # header_json = redis_client.hget(bind_id, "header")
+    # if not header_json:
+    #     logger.info("[ERROR] 未找到表头数据")
+    #     return
+    
+    # header = json.loads(header_json)
+    # logger.info(f"[OK] 读取到表头: {header}")
+    
+    # 获取行数
+    row_count = int(redis_client.hget(bind_id, "row_count") or 0)
+    logger.info(f"[OK] 总行数: {row_count}")
+    
+    # 读取所有行数据
+    rows = []
+    for idx in range(1, row_count + 1):
+        row_key = f"row_{idx}"
+        row_json = redis_client.hget(bind_id, row_key)
+        if row_json:
+            row = json.loads(row_json)
+            rows.append(row)
+    
+    logger.info(f"[OK] 成功从Redis读取 {len(rows)} 行数据")
+    
+    # 使用pandas保存为新的CSV文件
+    df_output = pd.DataFrame(rows)
+    if csv_save_path:
+        df_output.to_csv(OUTPUT_CSV, index=False, encoding='utf-8-sig')
+    
+    logger.info(f"[OK] 数据已保存到: {OUTPUT_CSV}")
+    
+    # 读取完成后删除Redis中的bind_id数据
+    redis_client.delete(bind_id)
+    logger.info(f"[OK] 已删除Redis中ID '{bind_id}' 的数据")
+    
+    return df_output
+
+
+def display_redis_data(redis_client, bind_id=None):
+    """显示Redis中存储的数据摘要"""
+    logger.info(f"\nRedis数据摘要 (ID: {bind_id})")
+    logger.info("-" * 50)
+    
+    # 获取所有字段
+    all_fields = redis_client.hkeys(bind_id)
+    logger.info(f"字段数量: {len(all_fields)}")
+    logger.info(f"字段列表: {all_fields}")
+    
+    logger.info("-" * 50)
+
+
+def main():
+    """主函数"""
+    logger.info("=" * 60)
+    logger.info("Redis CSV 处理器")
+    logger.info("=" * 60)
+    
+    try:
+        # 获取Redis连接
+        redis_client = get_redis_connection()
+        
+        # 读取CSV并存入Redis
+        df_store_to_redis(redis_client)
+        
+        # 显示Redis数据摘要
+        display_redis_data(redis_client)
+        
+        # 从Redis读取并保存为新的CSV文件
+        rows = read_from_redis_and_save_csv(redis_client)
+        # 将字典列表转换为JSON字符串列表
+        # rows_str = [json.dumps(row, ensure_ascii=False) for row in rows]
+        # logger.info(f"[OK] 保存到CSV文件成功 (\n{header}\n{'\n'.join(rows_str)})")
+        logger.info(f"{pd.DataFrame(rows)}")
+        logger.info("\n" + "=" * 60)
+        logger.info("[OK] 处理完成!")
+        logger.info("=" * 60)
+        
+    except Exception as e:
+        logger.info(f"\n[ERROR] 处理过程中发生错误: {e}")
+        import traceback
+        traceback.logger.info_exc()
+
+
+if __name__ == "__main__":
+    main()

+ 2 - 0
core/construction_review/component/reviewers/outline_check.py

@@ -15,6 +15,8 @@ def parse_review_result(review_result_str):
     """
     try:
         # 尝试解析JSON格式的字符串
+        if isinstance(review_result_str, dict):
+            return review_result_str
         if pd.isna(review_result_str) or review_result_str == '':
             return {}
         return json.loads(review_result_str)

+ 8 - 3
core/construction_review/workflows/ai_review_workflow.py

@@ -498,15 +498,20 @@ class AIReviewWorkflow:
                 logger.info(f"开始执行大纲审查")
 
 
-                outline_review_result = await self.ai_review_engine.outline_check(state["callback_task_id"], state["structured_content"],
-                                                    state, state.get("stage_name", "大纲审查"))
-                # outline_review_result = {} 
+
+
                 check_completeness_result = await self.ai_review_engine.check_completeness(
                     trace_id_idx = state["callback_task_id"],
                     review_content = state["structured_content"]["chunks"],
                     state = state,
                     stage_name = state.get("stage_name", "完整性审查")
                 )
+                # outline_review_result = {} 
+                outline_review_result = await self.ai_review_engine.outline_check(
+                    trace_id_idx = state["callback_task_id"],
+                    outline_content = state["structured_content"],
+                    state = state,
+                    stage_name = state.get("stage_name", "大纲审查"))
                 # with open(r"temp\document_temp\4_check_completeness_result.json", "w", encoding="utf-8") as f:
                 #     json.dump(check_completeness_result, f, ensure_ascii=False, indent=4)