Prechádzať zdrojové kódy

dev:目录审查模块的完成结果字段数据的构建;

ChenJiSheng 1 mesiac pred
rodič
commit
fd7bba28ec

+ 2 - 1
core/construction_review/component/ai_review_engine.py

@@ -1035,7 +1035,8 @@ class AIReviewEngine(BaseReviewer):
                     'chapter_classification': row.get('chapter_classification', ''),
                     'missing_items': row.get('missing_items', []),
                     'miss_outline': miss_outline_df.loc[miss_outline_df['chapter_label'] == chapter_label, 'miss_outline'].values[0]
-                                   if len(miss_outline_df.loc[miss_outline_df['chapter_label'] == chapter_label]) > 0 else []
+                                   if len(miss_outline_df.loc[miss_outline_df['chapter_label'] == chapter_label]) > 0 else [],
+                    'specification_items': row.get('specification_items', []),
                 }
                 # 使用 update_row_by_title 方法更新Redis中的数据
                 update_success = redis_manager.update_row_by_title(state['callback_task_id'], chapter_label, update_data)

+ 31 - 5
core/construction_review/component/reviewers/catalogues_check/catalogues_check.py

@@ -274,7 +274,8 @@ class CatalogCheckProcessor:
                     'index': catalog['index'],
                     'title': catalog['title'],
                     'chapter_classification': label,
-                    'missing_items': f"未找到标签 '{label}' 的规范要求"
+                    'missing_items': f"未找到标签 '{label}' 的规范要求",
+                    'specification_items': []
                 })
                 continue
 
@@ -298,7 +299,8 @@ class CatalogCheckProcessor:
                 'index': catalog['index'],
                 'title': catalog['title'],
                 'chapter_classification': label,
-                'missing_items': missing_items
+                'missing_items': missing_items,
+                'specification_items': spec["二级目录"]
             })
 
             logger.info(f"审查结果: {missing_items}")
@@ -371,7 +373,7 @@ class CatalogCheckProcessor:
         logger.info(f"保存审查结果到: {output_file}")
 
         with open(output_file, 'w', encoding='utf-8-sig', newline='') as f:
-            writer = csv.DictWriter(f, fieldnames=['index', 'title', 'chapter_classification', 'missing_items'])
+            writer = csv.DictWriter(f, fieldnames=['index', 'title', 'chapter_classification', 'missing_items', 'specification_items'])
             writer.writeheader()
             writer.writerows(results)
 
@@ -451,6 +453,7 @@ def process_catalog_review_list(catogues_df: pd.DataFrame) -> List[Dict[str, Any
             - chapter_classification: 章节分类
             - missing_items: 目录缺失项(列表或字符串)
             - miss_outline: 大纲缺失项(列表或字符串)
+            - specification_items: 规范项(列表或字符串)
     
     Returns:
         List[Dict[str, Any]]: 审查项列表,每个项包含:
@@ -469,6 +472,29 @@ def process_catalog_review_list(catogues_df: pd.DataFrame) -> List[Dict[str, Any
         chapter_label = row.get('chapter_label', '')
         chapter_classification = row.get('chapter_classification', '')
         
+        # 解析 specification_items 列(规范项)
+        specification_items_str = row.get('specification_items', '')
+        specification_items_list = []
+        try:
+            # 先检查是否为列表类型(避免 pd.isna 对列表返回 True)
+            if isinstance(specification_items_str, list):
+                specification_items_list = specification_items_str
+            elif pd.isna(specification_items_str) or specification_items_str == '':
+                specification_items_list = []
+            else:
+                # 尝试使用 ast.literal_eval 解析
+                specification_items_list = ast.literal_eval(specification_items_str)
+        except (ValueError, SyntaxError):
+            try:
+                # 尝试使用 json.loads 解析
+                specification_items_list = json.loads(specification_items_str)
+            except (json.JSONDecodeError, TypeError):
+                logger.warning(f"第 {index} 行无法解析specification_items: {specification_items_str}")
+                specification_items_list = []
+        
+        # 将规范项列表拼接为字符串(用、号连接)
+        specification_items_text = '、'.join(specification_items_list) if specification_items_list else ''
+        
         # 解析 missing_items 列(目录缺失)
         missing_items_str = row.get('missing_items', '')
         try:
@@ -512,7 +538,7 @@ def process_catalog_review_list(catogues_df: pd.DataFrame) -> List[Dict[str, Any
                     "issue_point": f"{missing_item}缺失",
                     "location": title if title else chapter_label,
                     "suggestion": f"目录缺失(missing_items):在待审查目录中未找到与'{missing_item}'对应的章节;当前章节仅涉及'{title if title else chapter_label}',未涵盖'{missing_item}'相关内容;整改建议:建议在本章或前序章节中增设'{missing_item}'相关内容,确保与审查规范要求一致。",
-                    "reason": "",
+                    "reason": f"该章节应具备要点:{specification_items_text}" if specification_items_text else "",
                     "risk_level": "高风险",
                     "reference_source": '《桥梁公司危险性较大工程管理实施细则(2025版)》',
                 }
@@ -525,7 +551,7 @@ def process_catalog_review_list(catogues_df: pd.DataFrame) -> List[Dict[str, Any
                     "issue_point": f"{miss_outline}缺失",
                     "location": title if title else chapter_label,
                     "suggestion": f"大纲缺失(miss_outline):在待审查大纲中未找到与'{miss_outline}'对应的章节;当前章节仅涉及'{title if title else chapter_label}',未涵盖'{miss_outline}'相关内容;整改建议:建议在本章或前序章节中增设'{miss_outline}'相关内容,确保与审查规范要求一致。",
-                    "reason": "",
+                    "reason": f"该章节应具备要点:{specification_items_text}" if specification_items_text else "",
                     "risk_level": "高风险",
                     "reference_source": '《桥梁公司危险性较大工程管理实施细则(2025版)》',
                 }

+ 45 - 6
core/construction_review/component/reviewers/catalogues_check/utils/redis_utils.py

@@ -97,7 +97,7 @@ class CataloguesRedisManager:
         logger.info(f"数据存储完成,共 {len(rows)} 行")
         return len(rows)
 
-    def read_all(self, task_id: str, flag = None) -> pd.DataFrame:
+    def read_all(self, task_id: str) -> pd.DataFrame:
         """
         读取所有数据
 
@@ -130,10 +130,6 @@ class CataloguesRedisManager:
                 rows.append(row)
 
         logger.info(f"读取完成,共 {len(rows)} 行")
-        # if flag==True:
-        #     # 读取成功后清空 Redis 中的数据
-        #     redis_client.delete(data_key)
-        #     logger.info(f"已清空 Redis 中 task_id '{task_id}' 的数据")
 
         return pd.DataFrame(rows)
 
@@ -264,6 +260,48 @@ class CataloguesRedisManager:
 
         return True
 
+    def delete_by_id(self, task_id: str) -> bool:
+        """
+        根据任务 ID 删除该 ID 下所有函数写入的 Redis 数据
+
+        此函数会清空指定 task_id 对应的所有 Redis 数据,包括:
+        - catalogues_check:{task_id}:data 下的所有数据
+
+        Args:
+            task_id: 任务 ID
+
+        Returns:
+            bool: 删除是否成功
+        """
+        redis_client = self.get_redis_client()
+        data_key = self._get_data_key(task_id)
+
+        logger.info(f"开始删除 task_id '{task_id}' 下的所有 Redis 数据")
+
+        try:
+            # 检查数据是否存在
+            if not redis_client.exists(data_key):
+                logger.warning(f"task_id '{task_id}' 对应的数据不存在")
+                return False
+
+            # 获取删除前的数据量(用于日志)
+            row_count = int(redis_client.hget(data_key, "_row_count") or 0)
+            field_count = len(redis_client.hkeys(data_key))
+
+            # 删除整个 hash
+            deleted_count = redis_client.delete(data_key)
+
+            if deleted_count > 0:
+                logger.info(f"成功删除 task_id '{task_id}' 的数据,共 {field_count} 个字段({row_count} 行数据)")
+                return True
+            else:
+                logger.warning(f"删除 task_id '{task_id}' 的数据失败")
+                return False
+
+        except Exception as e:
+            logger.error(f"删除 task_id '{task_id}' 的数据时发生错误: {e}")
+            return False
+
     def get_row_count(self, task_id: str) -> int:
         """
         获取指定任务的行数
@@ -337,7 +375,8 @@ class CataloguesRedisManager:
                 catalogues_data_list.append({
                     'chapter_label': chapter_label,
                     'chapter_classification': redis_row.get('chapter_classification', ''),
-                    'missing_items': missing_items
+                    'missing_items': missing_items,
+                    'specification_items': redis_row.get('specification_items', ''),
                 })
                 logger.info(f"[Redis] 从Redis查询到章节: {chapter_label}, 缺失项: {missing_items}")
             else:

+ 1 - 1
core/construction_review/workflows/ai_review_workflow.py

@@ -326,7 +326,7 @@ class AIReviewWorkflow:
             
             outline_results['miss_outline'] = None
             # outline_results['chapter_label'] = outline_results['title']
-            # outline_results.to_csv(r"temp/document_temp/outline_results.csv", encoding='utf-8-sig', index=False)
+            outline_results.to_csv(r"temp/document_temp/outline_results11.csv", encoding='utf-8-sig', index=False)
             
             # 存储到 Redis(使用 callback_task_id 作为任务 ID)
             try:

+ 2 - 2
core/construction_review/workflows/core_functions/ai_review_core_fun.py

@@ -442,12 +442,12 @@ class AIReviewCoreFun:
             catogues_reciew_result = process_catalog_review_list(catogues_df)
             
             # 保存结果到CSV文件
-            catogues_df.to_csv('temp/document_temp/catogues_df.csv', encoding='utf-8-sig', index=False)
+            # catogues_df.to_csv('temp/document_temp/catogues_df.csv', encoding='utf-8-sig', index=False)
             # pd.DataFrame(catogues_reciew_list).to_csv('temp/document_temp/catogues_df_with_review.csv', encoding='utf-8-sig', index=False)
             # with open('temp/document_temp/catogues_result.json', 'a', encoding='utf-8') as f:
             #     json.dump(catogues_reciew_result, f, ensure_ascii=False, indent=4)
             # 对比逻辑
-            if chapter_code == "catalogue" and func_name == "catalogue_check":
+            if chapter_code == "catalogue":
                     return UnitReviewResult(
                         unit_index=chunk_index,
                         unit_content=chunk,