2 mesi fa · 835e6fc0cd
--- a/core/construction_review/component/ai_review_engine.py
+++ b/core/construction_review/component/ai_review_engine.py
@@ -1023,7 +1023,8 @@ class AIReviewEngine(BaseReviewer):
 
				             path_redis = 'temp/document_temp/redis_data.csv'
			
 
				 
			
 
				             # 去除两个DataFrame中相同chapter_label行的miss_outline列与missing_items列的公共元素
			
 
				-            miss_outline_df, redis_data = remove_common_elements_between_dataframes(miss_outline_df, redis_data)
			
 
				+            miss_outline_df, redis_data, common_elements_list = remove_common_elements_between_dataframes(miss_outline_df, redis_data)
			
 
				+            logger.info(f"[大纲审查] 公共元素列表: {common_elements_list}")
			
 
				             
			
 
				             # 将更新后的数据写回Redis
			
 
				             for index, row in redis_data.iterrows():
			
@@ -1034,6 +1035,7 @@ class AIReviewEngine(BaseReviewer):
 
				                     'chapter_label': chapter_label,
			
 
				                     'chapter_classification': row.get('chapter_classification', ''),
			
 
				                     'missing_items': row.get('missing_items', []),
			
 
				+                    'common_elements_list': common_elements_list,
			
 
				                     'miss_outline': miss_outline_df.loc[miss_outline_df['chapter_label'] == chapter_label, 'miss_outline'].values[0]
			
 
				                                    if len(miss_outline_df.loc[miss_outline_df['chapter_label'] == chapter_label]) > 0 else [],
			
 
				                     'specification_items': row.get('specification_items', []),
			
--- a/core/construction_review/component/reviewers/catalogues_check/catalogues_check.py
+++ b/core/construction_review/component/reviewers/catalogues_check/catalogues_check.py
@@ -383,23 +383,27 @@ class CatalogCheckProcessor:
 
				 def remove_common_elements_between_dataframes(
			
 
				     miss_outline_df: pd.DataFrame,
			
 
				     redis_data: pd.DataFrame
			
 
				-) -> tuple[pd.DataFrame, pd.DataFrame]:
			
 
				+) -> tuple[pd.DataFrame, pd.DataFrame, List]:
			
 
				     """
			
 
				     去除两个DataFrame中相同chapter_label行的miss_outline列与missing_items列的公共元素
			
 
				+    同时返回所有公共元素的列表
			
 
				 
			
 
				     Args:
			
 
				         miss_outline_df: 包含miss_outline列的DataFrame
			
 
				         redis_data: 包含missing_items列的DataFrame
			
 
				 
			
 
				     Returns:
			
 
				-        tuple: (更新后的miss_outline_df, 更新后的redis_data)
			
 
				+        tuple: (更新后的miss_outline_df, 更新后的redis_data, 所有公共元素列表)
			
 
				     """
			
 
				     # 合并两个DataFrame，基于chapter_label
			
 
				     merged_df = pd.merge(miss_outline_df, redis_data, on='chapter_label', how='inner', suffixes=('_outline', '_redis'))
			
 
				     
			
 
				-    # 创建一个字典来存储公共元素
			
 
				+    # 创建一个字典来存储公共元素（用于去除操作）
			
 
				     common_elements_dict = {}
			
 
				     
			
 
				+    # 创建一个列表来存储所有公共元素
			
 
				+    all_common_elements = []
			
 
				+    
			
 
				     # 遍历合并后的DataFrame，计算公共元素
			
 
				     for index, row in merged_df.iterrows():
			
 
				         chapter_label = row['chapter_label']
			
@@ -419,39 +423,44 @@ def remove_common_elements_between_dataframes(
 
				         # 计算公共元素
			
 
				         common_elements = miss_outline_set & missing_items_set
			
 
				         
			
 
				-        # 存储公共元素
			
 
				-        common_elements_dict[chapter_label] = common_elements
			
 
				+        # 存储公共元素到字典（用于去除操作）
			
 
				+        common_elements_dict[chapter_label] = list(common_elements)
			
 
				+        
			
 
				+        # 将公共元素添加到总列表中
			
 
				+        all_common_elements.extend(list(common_elements))
			
 
				         
			
 
				         logger.info(f"[目录审查] 章节: {chapter_label}, 公共元素: {common_elements}")
			
 
				     
			
 
				-    # 同时更新两个DataFrame，去除公共元素
			
 
				+    # 更新miss_outline_df，去除公共元素
			
 
				     miss_outline_df['miss_outline'] = miss_outline_df.apply(
			
 
				-        lambda row: list(set(row['miss_outline']) - common_elements_dict.get(row['chapter_label'], set()))
			
 
				+        lambda row: list(set(row['miss_outline']) - set(common_elements_dict.get(row['chapter_label'], [])))
			
 
				         if isinstance(row['miss_outline'], list) else [],
			
 
				         axis=1
			
 
				     )
			
 
				     
			
 
				+    # 更新redis_data，去除公共元素
			
 
				     redis_data['missing_items'] = redis_data.apply(
			
 
				-        lambda row: list(set(row['missing_items']) - common_elements_dict.get(row['chapter_label'], set()))
			
 
				+        lambda row: list(set(row['missing_items']) - set(common_elements_dict.get(row['chapter_label'], [])))
			
 
				         if isinstance(row['missing_items'], list) else [],
			
 
				         axis=1
			
 
				     )
			
 
				     
			
 
				-    logger.info(f"[目录审查] 已去除公共元素，同时更新了miss_outline_df和redis_data")
			
 
				+    logger.info(f"[目录审查] 已去除公共元素，同时更新了miss_outline_df和redis_data，所有公共元素: {all_common_elements}")
			
 
				     
			
 
				-    return miss_outline_df, redis_data
			
 
				+    return miss_outline_df, redis_data, all_common_elements
			
 
				 
			
 
				 
			
 
				 def process_catalog_review_list(catogues_df: pd.DataFrame) -> List[Dict[str, Any]]:
			
 
				     """
			
 
				-    处理目录审查列表，从DataFrame中提取missing_items和miss_outline并生成审查项
			
 
				+    处理目录审查列表，从DataFrame中提取missing_items、common_elements_dict和miss_outline并生成审查项
			
 
				     
			
 
				     Args:
			
 
				         catogues_df: 包含目录审查数据的DataFrame，需要包含以下列：
			
 
				             - title: 标题
			
 
				             - chapter_label: 章节标签
			
 
				             - chapter_classification: 章节分类
			
 
				-            - missing_items: 目录缺失项（列表或字符串）
			
 
				+            - missing_items: 目录缺失项（列表或字符串）- 目录中缺失但大纲可能存在
			
 
				+            - common_elements_dict: 目录与大纲都缺失的公共元素（列表或字符串）
			
 
				             - miss_outline: 大纲缺失项（列表或字符串）
			
 
				             - specification_items: 规范项（列表或字符串）
			
 
				     
			
@@ -495,7 +504,7 @@ def process_catalog_review_list(catogues_df: pd.DataFrame) -> List[Dict[str, Any
 
				         # 将规范项列表拼接为字符串（用、号连接）
			
 
				         specification_items_text = '、'.join(specification_items_list) if specification_items_list else ''
			
 
				         
			
 
				-        # 解析 missing_items 列（目录缺失）
			
 
				+        # 解析 missing_items 列（目录缺失但大纲存在）
			
 
				         missing_items_str = row.get('missing_items', '')
			
 
				         try:
			
 
				             if pd.isna(missing_items_str) or missing_items_str == '':
			
@@ -513,6 +522,24 @@ def process_catalog_review_list(catogues_df: pd.DataFrame) -> List[Dict[str, Any
 
				                 logger.warning(f"第 {index} 行无法解析missing_items: {missing_items_str}")
			
 
				                 missing_items_list = []
			
 
				         
			
 
				+        # 解析 common_elements_dict 列（目录与大纲都缺失）
			
 
				+        common_elements_str = row.get('common_elements_dict', '')
			
 
				+        try:
			
 
				+            if pd.isna(common_elements_str) or common_elements_str == '':
			
 
				+                common_elements_list = []
			
 
				+            elif isinstance(common_elements_str, list):
			
 
				+                common_elements_list = common_elements_str
			
 
				+            else:
			
 
				+                # 尝试使用 ast.literal_eval 解析
			
 
				+                common_elements_list = ast.literal_eval(common_elements_str)
			
 
				+        except (ValueError, SyntaxError):
			
 
				+            try:
			
 
				+                # 尝试使用 json.loads 解析
			
 
				+                common_elements_list = json.loads(common_elements_str)
			
 
				+            except (json.JSONDecodeError, TypeError):
			
 
				+                logger.warning(f"第 {index} 行无法解析common_elements_dict: {common_elements_str}")
			
 
				+                common_elements_list = []
			
 
				+        
			
 
				         # 解析 miss_outline 列（大纲缺失）
			
 
				         miss_outline_str = row.get('miss_outline', '')
			
 
				         try:
			
@@ -531,26 +558,39 @@ def process_catalog_review_list(catogues_df: pd.DataFrame) -> List[Dict[str, Any
 
				                 logger.warning(f"第 {index} 行无法解析miss_outline: {miss_outline_str}")
			
 
				                 miss_outline_list = []
			
 
				         
			
 
				-        # 处理 missing_items（目录缺失）
			
 
				+        # 处理 missing_items（目录缺失但大纲存在）
			
 
				         if isinstance(missing_items_list, list) and len(missing_items_list) > 0:
			
 
				             for missing_item in missing_items_list:
			
 
				                 catalog_item = {
			
 
				                     "issue_point": f"{missing_item}缺失",
			
 
				                     "location": title if title else chapter_label,
			
 
				-                    "suggestion": f"目录缺失(missing_items)：在待审查目录中未找到与'{missing_item}'对应的章节；当前章节仅涉及'{title if title else chapter_label}'，未涵盖'{missing_item}'相关内容；整改建议：建议在本章或前序章节中增设'{missing_item}'相关内容，确保与审查规范要求一致。",
			
 
				+                    "suggestion": f"目录缺失：要点目录中缺失'{missing_item}'这个小节，但大纲中存在该小节；当前章节仅涉及'{title if title else chapter_label}'，目录中未体现'{missing_item}'相关内容；整改建议：建议在目录中补充'{missing_item}'相关内容，确保目录与大纲一致。",
			
 
				                     "reason": f"该章节应具备要点：{specification_items_text}" if specification_items_text else "",
			
 
				                     "risk_level": "高风险",
			
 
				                     "reference_source": '《桥梁公司危险性较大工程管理实施细则（2025版）》',
			
 
				                 }
			
 
				                 catogues_reciew_list.append(catalog_item)
			
 
				         
			
 
				+        # 处理 common_elements_dict（目录与大纲都缺失）
			
 
				+        if isinstance(common_elements_list, list) and len(common_elements_list) > 0:
			
 
				+            for common_element in common_elements_list:
			
 
				+                common_item = {
			
 
				+                    "issue_point": f"{common_element}缺失",
			
 
				+                    "location": title if title else chapter_label,
			
 
				+                    "suggestion": f"目录与大纲都缺失：要点目录和大纲中都缺失'{common_element}'这个小节；当前章节仅涉及'{title if title else chapter_label}'，目录和大纲均未涵盖'{common_element}'相关内容；整改建议：建议在本章或前序章节中增设'{common_element}'相关内容，确保目录和大纲都包含该小节。",
			
 
				+                    "reason": f"该章节应具备要点：{specification_items_text}" if specification_items_text else "",
			
 
				+                    "risk_level": "高风险",
			
 
				+                    "reference_source": '《桥梁公司危险性较大工程管理实施细则（2025版）》',
			
 
				+                }
			
 
				+                catogues_reciew_list.append(common_item)
			
 
				+        
			
 
				         # 处理 miss_outline（大纲缺失）
			
 
				         if isinstance(miss_outline_list, list) and len(miss_outline_list) > 0:
			
 
				             for miss_outline in miss_outline_list:
			
 
				                 outline_item = {
			
 
				                     "issue_point": f"{miss_outline}缺失",
			
 
				                     "location": title if title else chapter_label,
			
 
				-                    "suggestion": f"大纲缺失(miss_outline)：在待审查大纲中未找到与'{miss_outline}'对应的章节；当前章节仅涉及'{title if title else chapter_label}'，未涵盖'{miss_outline}'相关内容；整改建议：建议在本章或前序章节中增设'{miss_outline}'相关内容，确保与审查规范要求一致。",
			
 
				+                    "suggestion": f"大纲缺失：大纲中缺失'{miss_outline}'这个小节；当前章节仅涉及'{title if title else chapter_label}'，大纲中未涵盖'{miss_outline}'相关内容；整改建议：建议在大纲中补充'{miss_outline}'相关内容，确保大纲完整性。",
			
 
				                     "reason": f"该章节应具备要点：{specification_items_text}" if specification_items_text else "",
			
 
				                     "risk_level": "高风险",
			
 
				                     "reference_source": '《桥梁公司危险性较大工程管理实施细则（2025版）》',
			
--- a/core/construction_review/workflows/core_functions/ai_review_core_fun.py
+++ b/core/construction_review/workflows/core_functions/ai_review_core_fun.py
@@ -435,6 +435,7 @@ class AIReviewCoreFun:
 
				                 stage_name=f"{stage_name}_大纲审查"
			
 
				             )
			
 
				             logger.info(f"[outline_check完成] 共发现 {len(outline_result.get('details', {}).get('response', []))} 个缺失项")
			
 
				+
			
 
				             redis_manager = get_redis_manager()
			
 
				             catogues_df = redis_manager.read_all(task_id=state['callback_task_id'])
			
 
				             
			
@@ -444,20 +445,22 @@ class AIReviewCoreFun:
 
				             # 保存结果到CSV文件
			
 
				             # catogues_df.to_csv('temp/document_temp/catogues_df.csv', encoding='utf-8-sig', index=False)
			
 
				             # pd.DataFrame(catogues_reciew_list).to_csv('temp/document_temp/catogues_df_with_review.csv', encoding='utf-8-sig', index=False)
			
 
				-            # with open('temp/document_temp/catogues_result.json', 'a', encoding='utf-8') as f:
			
 
				-            #     json.dump(catogues_reciew_result, f, ensure_ascii=False, indent=4)
			
 
				+            with open('temp/document_temp/catogues_result.json', 'a', encoding='utf-8') as f:
			
 
				+                json.dump(catogues_reciew_result, f, ensure_ascii=False, indent=4)
			
 
				+
			
 
				             # 对比逻辑
			
 
				             if chapter_code == "catalogue":
			
 
				-                    return UnitReviewResult(
			
 
				-                        unit_index=chunk_index,
			
 
				-                        unit_content=chunk,
			
 
				-                        basic_compliance={
			
 
				-                            "catalogue_check": catogues_reciew_result
			
 
				-                        },
			
 
				-                        technical_compliance={},
			
 
				-                        rag_enhanced={},
			
 
				-                        overall_risk=self._calculate_single_result_risk(completeness_result),
			
 
				-                    )
			
 
				+
			
 
				+                return UnitReviewResult(
			
 
				+                    unit_index=chunk_index,
			
 
				+                    unit_content=chunk,
			
 
				+                    basic_compliance={
			
 
				+                        "catalogue_check": catogues_reciew_result
			
 
				+                    },
			
 
				+                    technical_compliance={},
			
 
				+                    rag_enhanced={},
			
 
				+                    overall_risk=self._calculate_single_result_risk(completeness_result),
			
 
				+                )
			
 
				             else:
			
 
				                 # 将两个结果都放入 basic_compliance
			
 
				                 return UnitReviewResult(