Sfoglia il codice sorgente

dev:目录审查模块的字段构造;

ChenJiSheng 1 mese fa
parent
commit
835e6fc0cd

+ 3 - 1
core/construction_review/component/ai_review_engine.py

@@ -1023,7 +1023,8 @@ class AIReviewEngine(BaseReviewer):
             path_redis = 'temp/document_temp/redis_data.csv'
 
             # 去除两个DataFrame中相同chapter_label行的miss_outline列与missing_items列的公共元素
-            miss_outline_df, redis_data = remove_common_elements_between_dataframes(miss_outline_df, redis_data)
+            miss_outline_df, redis_data, common_elements_list = remove_common_elements_between_dataframes(miss_outline_df, redis_data)
+            logger.info(f"[大纲审查] 公共元素列表: {common_elements_list}")
             
             # 将更新后的数据写回Redis
             for index, row in redis_data.iterrows():
@@ -1034,6 +1035,7 @@ class AIReviewEngine(BaseReviewer):
                     'chapter_label': chapter_label,
                     'chapter_classification': row.get('chapter_classification', ''),
                     'missing_items': row.get('missing_items', []),
+                    'common_elements_list': common_elements_list,
                     'miss_outline': miss_outline_df.loc[miss_outline_df['chapter_label'] == chapter_label, 'miss_outline'].values[0]
                                    if len(miss_outline_df.loc[miss_outline_df['chapter_label'] == chapter_label]) > 0 else [],
                     'specification_items': row.get('specification_items', []),

+ 56 - 16
core/construction_review/component/reviewers/catalogues_check/catalogues_check.py

@@ -383,23 +383,27 @@ class CatalogCheckProcessor:
 def remove_common_elements_between_dataframes(
     miss_outline_df: pd.DataFrame,
     redis_data: pd.DataFrame
-) -> tuple[pd.DataFrame, pd.DataFrame]:
+) -> tuple[pd.DataFrame, pd.DataFrame, List]:
     """
     去除两个DataFrame中相同chapter_label行的miss_outline列与missing_items列的公共元素
+    同时返回所有公共元素的列表
 
     Args:
         miss_outline_df: 包含miss_outline列的DataFrame
         redis_data: 包含missing_items列的DataFrame
 
     Returns:
-        tuple: (更新后的miss_outline_df, 更新后的redis_data)
+        tuple: (更新后的miss_outline_df, 更新后的redis_data, 所有公共元素列表)
     """
     # 合并两个DataFrame,基于chapter_label
     merged_df = pd.merge(miss_outline_df, redis_data, on='chapter_label', how='inner', suffixes=('_outline', '_redis'))
     
-    # 创建一个字典来存储公共元素
+    # 创建一个字典来存储公共元素(用于去除操作)
     common_elements_dict = {}
     
+    # 创建一个列表来存储所有公共元素
+    all_common_elements = []
+    
     # 遍历合并后的DataFrame,计算公共元素
     for index, row in merged_df.iterrows():
         chapter_label = row['chapter_label']
@@ -419,39 +423,44 @@ def remove_common_elements_between_dataframes(
         # 计算公共元素
         common_elements = miss_outline_set & missing_items_set
         
-        # 存储公共元素
-        common_elements_dict[chapter_label] = common_elements
+        # 存储公共元素到字典(用于去除操作)
+        common_elements_dict[chapter_label] = list(common_elements)
+        
+        # 将公共元素添加到总列表中
+        all_common_elements.extend(list(common_elements))
         
         logger.info(f"[目录审查] 章节: {chapter_label}, 公共元素: {common_elements}")
     
-    # 同时更新两个DataFrame,去除公共元素
+    # 更新miss_outline_df,去除公共元素
     miss_outline_df['miss_outline'] = miss_outline_df.apply(
-        lambda row: list(set(row['miss_outline']) - common_elements_dict.get(row['chapter_label'], set()))
+        lambda row: list(set(row['miss_outline']) - set(common_elements_dict.get(row['chapter_label'], [])))
         if isinstance(row['miss_outline'], list) else [],
         axis=1
     )
     
+    # 更新redis_data,去除公共元素
     redis_data['missing_items'] = redis_data.apply(
-        lambda row: list(set(row['missing_items']) - common_elements_dict.get(row['chapter_label'], set()))
+        lambda row: list(set(row['missing_items']) - set(common_elements_dict.get(row['chapter_label'], [])))
         if isinstance(row['missing_items'], list) else [],
         axis=1
     )
     
-    logger.info(f"[目录审查] 已去除公共元素,同时更新了miss_outline_df和redis_data")
+    logger.info(f"[目录审查] 已去除公共元素,同时更新了miss_outline_df和redis_data,所有公共元素: {all_common_elements}")
     
-    return miss_outline_df, redis_data
+    return miss_outline_df, redis_data, all_common_elements
 
 
 def process_catalog_review_list(catogues_df: pd.DataFrame) -> List[Dict[str, Any]]:
     """
-    处理目录审查列表,从DataFrame中提取missing_items和miss_outline并生成审查项
+    处理目录审查列表,从DataFrame中提取missing_items、common_elements_dict和miss_outline并生成审查项
     
     Args:
         catogues_df: 包含目录审查数据的DataFrame,需要包含以下列:
             - title: 标题
             - chapter_label: 章节标签
             - chapter_classification: 章节分类
-            - missing_items: 目录缺失项(列表或字符串)
+            - missing_items: 目录缺失项(列表或字符串)- 目录中缺失但大纲可能存在
+            - common_elements_dict: 目录与大纲都缺失的公共元素(列表或字符串)
             - miss_outline: 大纲缺失项(列表或字符串)
             - specification_items: 规范项(列表或字符串)
     
@@ -495,7 +504,7 @@ def process_catalog_review_list(catogues_df: pd.DataFrame) -> List[Dict[str, Any
         # 将规范项列表拼接为字符串(用、号连接)
         specification_items_text = '、'.join(specification_items_list) if specification_items_list else ''
         
-        # 解析 missing_items 列(目录缺失)
+        # 解析 missing_items 列(目录缺失但大纲存在
         missing_items_str = row.get('missing_items', '')
         try:
             if pd.isna(missing_items_str) or missing_items_str == '':
@@ -513,6 +522,24 @@ def process_catalog_review_list(catogues_df: pd.DataFrame) -> List[Dict[str, Any
                 logger.warning(f"第 {index} 行无法解析missing_items: {missing_items_str}")
                 missing_items_list = []
         
+        # 解析 common_elements_dict 列(目录与大纲都缺失)
+        common_elements_str = row.get('common_elements_dict', '')
+        try:
+            if pd.isna(common_elements_str) or common_elements_str == '':
+                common_elements_list = []
+            elif isinstance(common_elements_str, list):
+                common_elements_list = common_elements_str
+            else:
+                # 尝试使用 ast.literal_eval 解析
+                common_elements_list = ast.literal_eval(common_elements_str)
+        except (ValueError, SyntaxError):
+            try:
+                # 尝试使用 json.loads 解析
+                common_elements_list = json.loads(common_elements_str)
+            except (json.JSONDecodeError, TypeError):
+                logger.warning(f"第 {index} 行无法解析common_elements_dict: {common_elements_str}")
+                common_elements_list = []
+        
         # 解析 miss_outline 列(大纲缺失)
         miss_outline_str = row.get('miss_outline', '')
         try:
@@ -531,26 +558,39 @@ def process_catalog_review_list(catogues_df: pd.DataFrame) -> List[Dict[str, Any
                 logger.warning(f"第 {index} 行无法解析miss_outline: {miss_outline_str}")
                 miss_outline_list = []
         
-        # 处理 missing_items(目录缺失)
+        # 处理 missing_items(目录缺失但大纲存在
         if isinstance(missing_items_list, list) and len(missing_items_list) > 0:
             for missing_item in missing_items_list:
                 catalog_item = {
                     "issue_point": f"{missing_item}缺失",
                     "location": title if title else chapter_label,
-                    "suggestion": f"目录缺失(missing_items):在待审查目录中未找到与'{missing_item}'对应的章节;当前章节仅涉及'{title if title else chapter_label}',未涵盖'{missing_item}'相关内容;整改建议:建议在本章或前序章节中增设'{missing_item}'相关内容,确保与审查规范要求一致。",
+                    "suggestion": f"目录缺失:要点目录中缺失'{missing_item}'这个小节,但大纲中存在该小节;当前章节仅涉及'{title if title else chapter_label}',目录中未体现'{missing_item}'相关内容;整改建议:建议在目录中补充'{missing_item}'相关内容,确保目录与大纲一致。",
                     "reason": f"该章节应具备要点:{specification_items_text}" if specification_items_text else "",
                     "risk_level": "高风险",
                     "reference_source": '《桥梁公司危险性较大工程管理实施细则(2025版)》',
                 }
                 catogues_reciew_list.append(catalog_item)
         
+        # 处理 common_elements_dict(目录与大纲都缺失)
+        if isinstance(common_elements_list, list) and len(common_elements_list) > 0:
+            for common_element in common_elements_list:
+                common_item = {
+                    "issue_point": f"{common_element}缺失",
+                    "location": title if title else chapter_label,
+                    "suggestion": f"目录与大纲都缺失:要点目录和大纲中都缺失'{common_element}'这个小节;当前章节仅涉及'{title if title else chapter_label}',目录和大纲均未涵盖'{common_element}'相关内容;整改建议:建议在本章或前序章节中增设'{common_element}'相关内容,确保目录和大纲都包含该小节。",
+                    "reason": f"该章节应具备要点:{specification_items_text}" if specification_items_text else "",
+                    "risk_level": "高风险",
+                    "reference_source": '《桥梁公司危险性较大工程管理实施细则(2025版)》',
+                }
+                catogues_reciew_list.append(common_item)
+        
         # 处理 miss_outline(大纲缺失)
         if isinstance(miss_outline_list, list) and len(miss_outline_list) > 0:
             for miss_outline in miss_outline_list:
                 outline_item = {
                     "issue_point": f"{miss_outline}缺失",
                     "location": title if title else chapter_label,
-                    "suggestion": f"大纲缺失(miss_outline):在待审查大纲中未找到与'{miss_outline}'对应的章节;当前章节仅涉及'{title if title else chapter_label}',未涵盖'{miss_outline}'相关内容;整改建议:建议在本章或前序章节中增设'{miss_outline}'相关内容,确保与审查规范要求一致。",
+                    "suggestion": f"大纲缺失:大纲中缺失'{miss_outline}'这个小节;当前章节仅涉及'{title if title else chapter_label}',大纲中未涵盖'{miss_outline}'相关内容;整改建议:建议在大纲中补充'{miss_outline}'相关内容,确保大纲完整性。",
                     "reason": f"该章节应具备要点:{specification_items_text}" if specification_items_text else "",
                     "risk_level": "高风险",
                     "reference_source": '《桥梁公司危险性较大工程管理实施细则(2025版)》',

+ 15 - 12
core/construction_review/workflows/core_functions/ai_review_core_fun.py

@@ -435,6 +435,7 @@ class AIReviewCoreFun:
                 stage_name=f"{stage_name}_大纲审查"
             )
             logger.info(f"[outline_check完成] 共发现 {len(outline_result.get('details', {}).get('response', []))} 个缺失项")
+
             redis_manager = get_redis_manager()
             catogues_df = redis_manager.read_all(task_id=state['callback_task_id'])
             
@@ -444,20 +445,22 @@ class AIReviewCoreFun:
             # 保存结果到CSV文件
             # catogues_df.to_csv('temp/document_temp/catogues_df.csv', encoding='utf-8-sig', index=False)
             # pd.DataFrame(catogues_reciew_list).to_csv('temp/document_temp/catogues_df_with_review.csv', encoding='utf-8-sig', index=False)
-            # with open('temp/document_temp/catogues_result.json', 'a', encoding='utf-8') as f:
-            #     json.dump(catogues_reciew_result, f, ensure_ascii=False, indent=4)
+            with open('temp/document_temp/catogues_result.json', 'a', encoding='utf-8') as f:
+                json.dump(catogues_reciew_result, f, ensure_ascii=False, indent=4)
+
             # 对比逻辑
             if chapter_code == "catalogue":
-                    return UnitReviewResult(
-                        unit_index=chunk_index,
-                        unit_content=chunk,
-                        basic_compliance={
-                            "catalogue_check": catogues_reciew_result
-                        },
-                        technical_compliance={},
-                        rag_enhanced={},
-                        overall_risk=self._calculate_single_result_risk(completeness_result),
-                    )
+
+                return UnitReviewResult(
+                    unit_index=chunk_index,
+                    unit_content=chunk,
+                    basic_compliance={
+                        "catalogue_check": catogues_reciew_result
+                    },
+                    technical_compliance={},
+                    rag_enhanced={},
+                    overall_risk=self._calculate_single_result_risk(completeness_result),
+                )
             else:
                 # 将两个结果都放入 basic_compliance
                 return UnitReviewResult(