Browse Source

dev:规范目录审查模块的字段明明与去除中间结果保存的逻辑;

ChenJiSheng 1 month ago
parent
commit
97a9c06148

+ 10 - 10
core/construction_review/component/reviewers/catalogues_check/catalogues_check.py

@@ -399,7 +399,7 @@ def remove_common_elements_between_dataframes(
     merged_df = pd.merge(miss_outline_df, redis_data, on='chapter_label', how='inner', suffixes=('_outline', '_redis'))
     
     # 创建一个字典来存储公共元素(用于去除操作)
-    common_elements_dict = {}
+    common_elements_list = {}
     
     # 创建一个列表来存储所有公共元素
     all_common_elements = []
@@ -424,7 +424,7 @@ def remove_common_elements_between_dataframes(
         common_elements = miss_outline_set & missing_items_set
         
         # 存储公共元素到字典(用于去除操作)
-        common_elements_dict[chapter_label] = list(common_elements)
+        common_elements_list[chapter_label] = list(common_elements)
         
         # 将公共元素添加到总列表中
         all_common_elements.extend(list(common_elements))
@@ -433,14 +433,14 @@ def remove_common_elements_between_dataframes(
     
     # 更新miss_outline_df,去除公共元素
     miss_outline_df['miss_outline'] = miss_outline_df.apply(
-        lambda row: list(set(row['miss_outline']) - set(common_elements_dict.get(row['chapter_label'], [])))
+        lambda row: list(set(row['miss_outline']) - set(common_elements_list.get(row['chapter_label'], [])))
         if isinstance(row['miss_outline'], list) else [],
         axis=1
     )
     
     # 更新redis_data,去除公共元素
     redis_data['missing_items'] = redis_data.apply(
-        lambda row: list(set(row['missing_items']) - set(common_elements_dict.get(row['chapter_label'], [])))
+        lambda row: list(set(row['missing_items']) - set(common_elements_list.get(row['chapter_label'], [])))
         if isinstance(row['missing_items'], list) else [],
         axis=1
     )
@@ -452,7 +452,7 @@ def remove_common_elements_between_dataframes(
 
 def process_catalog_review_list(catogues_df: pd.DataFrame) -> List[Dict[str, Any]]:
     """
-    处理目录审查列表,从DataFrame中提取missing_items、common_elements_dict和miss_outline并生成审查项
+    处理目录审查列表,从DataFrame中提取missing_items、common_elements_list和miss_outline并生成审查项
     
     Args:
         catogues_df: 包含目录审查数据的DataFrame,需要包含以下列:
@@ -460,7 +460,7 @@ def process_catalog_review_list(catogues_df: pd.DataFrame) -> List[Dict[str, Any
             - chapter_label: 章节标签
             - chapter_classification: 章节分类
             - missing_items: 目录缺失项(列表或字符串)- 目录中缺失但大纲可能存在
-            - common_elements_dict: 目录与大纲都缺失的公共元素(列表或字符串)
+            - common_elements_list: 目录与大纲都缺失的公共元素(列表或字符串)
             - miss_outline: 大纲缺失项(列表或字符串)
             - specification_items: 规范项(列表或字符串)
     
@@ -522,8 +522,8 @@ def process_catalog_review_list(catogues_df: pd.DataFrame) -> List[Dict[str, Any
                 logger.warning(f"第 {index} 行无法解析missing_items: {missing_items_str}")
                 missing_items_list = []
         
-        # 解析 common_elements_dict 列(目录与大纲都缺失)
-        common_elements_str = row.get('common_elements_dict', '')
+        # 解析 common_elements_list 列(目录与大纲都缺失)
+        common_elements_str = row.get('common_elements_list', '')
         try:
             if pd.isna(common_elements_str) or common_elements_str == '':
                 common_elements_list = []
@@ -537,7 +537,7 @@ def process_catalog_review_list(catogues_df: pd.DataFrame) -> List[Dict[str, Any
                 # 尝试使用 json.loads 解析
                 common_elements_list = json.loads(common_elements_str)
             except (json.JSONDecodeError, TypeError):
-                logger.warning(f"第 {index} 行无法解析common_elements_dict: {common_elements_str}")
+                logger.warning(f"第 {index} 行无法解析common_elements_list: {common_elements_str}")
                 common_elements_list = []
         
         # 解析 miss_outline 列(大纲缺失)
@@ -571,7 +571,7 @@ def process_catalog_review_list(catogues_df: pd.DataFrame) -> List[Dict[str, Any
                 }
                 catogues_reciew_list.append(catalog_item)
         
-        # 处理 common_elements_dict(目录与大纲都缺失)
+        # 处理 common_elements_list(目录与大纲都缺失)
         if isinstance(common_elements_list, list) and len(common_elements_list) > 0:
             for common_element in common_elements_list:
                 common_item = {

+ 3 - 3
core/construction_review/workflows/ai_review_workflow.py

@@ -345,11 +345,11 @@ class AIReviewWorkflow:
             outline_results = pd.DataFrame(outline_results)
             # outline_results[['title', 'chapter_classification', 'missing_items']].to_csv(r"temp/document_temp/outline_results.csv", encoding='utf-8-sig', index=False)
             
-            # 初始化 miss_outline 和 common_elements_dict 列为空列表的 JSON 字符串
+            # 初始化 miss_outline 和 common_elements_list 列为空列表的 JSON 字符串
             outline_results['miss_outline'] = outline_results.get('miss_outline', '[]')
-            outline_results['common_elements_dict'] = outline_results.get('common_elements_dict', '[]')
+            outline_results['common_elements_list'] = outline_results.get('common_elements_list', '[]')
             # outline_results['chapter_label'] = outline_results['title']
-            outline_results.to_csv(r"temp/document_temp/outline_results11.csv", encoding='utf-8-sig', index=False)
+            # outline_results.to_csv(r"temp/document_temp/outline_results11.csv", encoding='utf-8-sig', index=False)
             
             # 存储到 Redis(使用 callback_task_id 作为任务 ID)
             try: