|
@@ -762,25 +762,13 @@ class AIReviewEngine(BaseReviewer):
|
|
|
review_results_df = pd.DataFrame(review_results)
|
|
review_results_df = pd.DataFrame(review_results)
|
|
|
chapter_labels = review_results_df['section_label'].str.split('->').str[0]
|
|
chapter_labels = review_results_df['section_label'].str.split('->').str[0]
|
|
|
review_results_df['title'] = chapter_labels
|
|
review_results_df['title'] = chapter_labels
|
|
|
- # review_results_df.to_csv(Path('temp') / 'document_temp' / '2_spec_review_results.csv', encoding='utf-8-sig', index=False)
|
|
|
|
|
- # csv_file = rf'temp\document_temp\2_spec_review_results.csv'
|
|
|
|
|
- # path2 = rf'temp\document_temp\outlines_review_results.csv'
|
|
|
|
|
- # data_df = pd.read_csv(csv_file, encoding='utf-8-sig')
|
|
|
|
|
- # data_df = review_results_df
|
|
|
|
|
- outline_review_results = outline_review_results_df(data=review_results_df)
|
|
|
|
|
-
|
|
|
|
|
|
|
+
|
|
|
|
|
+ # 将审查结果存储到Redis,供 outline_check 使用
|
|
|
logger.info(f"[完整性检查] 准备将大纲审查结果存储到Redis,bind_id: {trace_id_idx}")
|
|
logger.info(f"[完整性检查] 准备将大纲审查结果存储到Redis,bind_id: {trace_id_idx}")
|
|
|
- logger.info(f"[完整性检查] 大纲审查结果行数: {len(outline_review_results) if outline_review_results is not None else 'None'}")
|
|
|
|
|
- outline_review_result = await self.outline_check(
|
|
|
|
|
- trace_id_idx = state["callback_task_id"],
|
|
|
|
|
- outline_content = outline_review_results,
|
|
|
|
|
- state = state,
|
|
|
|
|
- stage_name = state.get("stage_name", "大纲审查")
|
|
|
|
|
- )
|
|
|
|
|
- # df_store_to_redis(self.redis_client, data=outline_review_results, bind_id=trace_id_idx)
|
|
|
|
|
-
|
|
|
|
|
|
|
+ from .reviewers.check_completeness.utils.redis_csv_utils import df_store_to_redis
|
|
|
|
|
+ df_store_to_redis(self.redis_client, data=review_results_df, bind_id=trace_id_idx)
|
|
|
logger.info(f"[完整性检查] 数据已成功存储到Redis,bind_id: {trace_id_idx}")
|
|
logger.info(f"[完整性检查] 数据已成功存储到Redis,bind_id: {trace_id_idx}")
|
|
|
-
|
|
|
|
|
|
|
+
|
|
|
df_filtered = review_results_df.drop_duplicates(subset='title', keep='first').reset_index(drop=True)
|
|
df_filtered = review_results_df.drop_duplicates(subset='title', keep='first').reset_index(drop=True)
|
|
|
unique_chapter_labels = chapter_labels.unique().tolist()
|
|
unique_chapter_labels = chapter_labels.unique().tolist()
|
|
|
chapter_classifications = df_filtered['chapter_classification']
|
|
chapter_classifications = df_filtered['chapter_classification']
|
|
@@ -1006,10 +994,17 @@ class AIReviewEngine(BaseReviewer):
|
|
|
metadata = {}
|
|
metadata = {}
|
|
|
try:
|
|
try:
|
|
|
# 从Redis读取并保存为新的CSV文件
|
|
# 从Redis读取并保存为新的CSV文件
|
|
|
- # rows_df = read_from_redis_and_save_csv(self.redis_client, bind_id=trace_id_idx)
|
|
|
|
|
- df = outline_content
|
|
|
|
|
|
|
+ # 如果传入的 outline_content 为 None,尝试从 Redis 获取数据
|
|
|
|
|
+ if outline_content is None:
|
|
|
|
|
+ logger.info(f"[大纲审查] outline_content 为 None,尝试从 Redis 获取数据 (bind_id: {trace_id_idx})")
|
|
|
|
|
+ from .reviewers.check_completeness.utils.redis_csv_utils import read_from_redis_and_save_csv
|
|
|
|
|
+ df = read_from_redis_and_save_csv(self.redis_client, bind_id=trace_id_idx)
|
|
|
|
|
+ else:
|
|
|
|
|
+ df = outline_content
|
|
|
|
|
+
|
|
|
# df = merge_results_by_classification(rows_df)
|
|
# df = merge_results_by_classification(rows_df)
|
|
|
- df.to_csv(csv_path, encoding='utf-8-sig', index=False)
|
|
|
|
|
|
|
+ if df is not None:
|
|
|
|
|
+ df.to_csv(csv_path, encoding='utf-8-sig', index=False)
|
|
|
# 检查 df 是否为 None
|
|
# 检查 df 是否为 None
|
|
|
if df is None:
|
|
if df is None:
|
|
|
logger.error(f"[大纲审查] Redis中不存在ID '{trace_id_idx}' 的数据,无法进行大纲审查")
|
|
logger.error(f"[大纲审查] Redis中不存在ID '{trace_id_idx}' 的数据,无法进行大纲审查")
|
|
@@ -1021,16 +1016,32 @@ class AIReviewEngine(BaseReviewer):
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
logger.info(f"[大纲审查] 成功从Redis读取数据,共 {len(df)} 行")
|
|
logger.info(f"[大纲审查] 成功从Redis读取数据,共 {len(df)} 行")
|
|
|
-
|
|
|
|
|
|
|
+
|
|
|
|
|
+ # 检查 df 是否为空
|
|
|
|
|
+ if df.empty or len(df) == 0:
|
|
|
|
|
+ logger.warning(f"[大纲审查] DataFrame为空,无法进行大纲审查")
|
|
|
|
|
+ return {
|
|
|
|
|
+ 'outline_review_result': {
|
|
|
|
|
+ "response": [],
|
|
|
|
|
+ },
|
|
|
|
|
+ 'success': False,
|
|
|
|
|
+ 'execution_time': time.time() - start_time
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
# 兼容新旧字段名
|
|
# 兼容新旧字段名
|
|
|
- chapter_label_col = 'chapter_label' if 'chapter_label' in df.columns else 'section_label_first'
|
|
|
|
|
- review_results_col = 'review_results_summary' if 'review_results_summary' in df.columns else 'merged_review_results'
|
|
|
|
|
-
|
|
|
|
|
|
|
+ chapter_label_col = 'chapter_label' if 'chapter_label' in df.columns else ('section_label_first' if 'section_label_first' in df.columns else 'section_label')
|
|
|
|
|
+ review_results_col = 'review_results_summary' if 'review_results_summary' in df.columns else ('merged_review_results' if 'merged_review_results' in df.columns else 'review_result')
|
|
|
|
|
+
|
|
|
|
|
+ logger.info(f"[大纲审查] 使用 chapter_label_col={chapter_label_col}, review_results_col={review_results_col}")
|
|
|
|
|
+ logger.info(f"[大纲审查] DataFrame 列: {list(df.columns)}")
|
|
|
|
|
+
|
|
|
# 遍历每一行
|
|
# 遍历每一行
|
|
|
for index, row in df.iterrows():
|
|
for index, row in df.iterrows():
|
|
|
chapter_label = row.get(chapter_label_col, '')
|
|
chapter_label = row.get(chapter_label_col, '')
|
|
|
merged_results_str = row.get(review_results_col, '')
|
|
merged_results_str = row.get(review_results_col, '')
|
|
|
-
|
|
|
|
|
|
|
+
|
|
|
|
|
+ logger.info(f"第 {index} 行: chapter_label={chapter_label}, {review_results_col}={merged_results_str}")
|
|
|
|
|
+
|
|
|
# 解析review_results_summary字典字符串
|
|
# 解析review_results_summary字典字符串
|
|
|
try:
|
|
try:
|
|
|
if pd.isna(merged_results_str) or merged_results_str == '':
|
|
if pd.isna(merged_results_str) or merged_results_str == '':
|
|
@@ -1048,21 +1059,39 @@ class AIReviewEngine(BaseReviewer):
|
|
|
except (json.JSONDecodeError, TypeError):
|
|
except (json.JSONDecodeError, TypeError):
|
|
|
logger.warning(f"第 {index} 行无法解析review_results_summary: {merged_results_str}")
|
|
logger.warning(f"第 {index} 行无法解析review_results_summary: {merged_results_str}")
|
|
|
merged_results = {}
|
|
merged_results = {}
|
|
|
-
|
|
|
|
|
|
|
+
|
|
|
# 检查字典中的每个字段
|
|
# 检查字典中的每个字段
|
|
|
if isinstance(merged_results, dict):
|
|
if isinstance(merged_results, dict):
|
|
|
- logger.info(f"开始大纲审查")
|
|
|
|
|
-
|
|
|
|
|
|
|
+ # 检查是否有错误信息
|
|
|
|
|
+ if 'error' in merged_results:
|
|
|
|
|
+ logger.warning(f"第 {index} 行审查结果包含错误: {merged_results.get('error')}")
|
|
|
|
|
+ continue # 跳过错误行,不进行大纲审查
|
|
|
|
|
+
|
|
|
|
|
+ logger.info(f"第 {index} 行 merged_results 键: {list(merged_results.keys())}")
|
|
|
|
|
+
|
|
|
# 获取chapter_label列表
|
|
# 获取chapter_label列表
|
|
|
chapter_labels_list = row.get(chapter_label_col, [])
|
|
chapter_labels_list = row.get(chapter_label_col, [])
|
|
|
if not isinstance(chapter_labels_list, list):
|
|
if not isinstance(chapter_labels_list, list):
|
|
|
chapter_labels_list = [str(chapter_labels_list)]
|
|
chapter_labels_list = [str(chapter_labels_list)]
|
|
|
-
|
|
|
|
|
|
|
+
|
|
|
# 获取review_results_summary字典的所有键,用于reason字段
|
|
# 获取review_results_summary字典的所有键,用于reason字段
|
|
|
merged_results_keys = list(merged_results.keys())
|
|
merged_results_keys = list(merged_results.keys())
|
|
|
|
|
+ # 排除error等非审查字段
|
|
|
|
|
+ merged_results_keys = [k for k in merged_results_keys if k not in ['error', 'chunk_id', 'page', 'section_label', 'chapter_classification', 'chapter_code', 'title']]
|
|
|
|
|
+
|
|
|
|
|
+ logger.info(f"第 {index} 行过滤后的有效字段: {merged_results_keys}")
|
|
|
|
|
+
|
|
|
|
|
+ if not merged_results_keys:
|
|
|
|
|
+ logger.info(f"第 {index} 行没有有效的审查字段,跳过大纲审查")
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
merged_results_keys_str = "、".join(merged_results_keys)
|
|
merged_results_keys_str = "、".join(merged_results_keys)
|
|
|
-
|
|
|
|
|
|
|
+
|
|
|
|
|
+ # 只处理有效的审查字段
|
|
|
for field_name, field_value in merged_results.items():
|
|
for field_name, field_value in merged_results.items():
|
|
|
|
|
+ # 跳过非审查字段
|
|
|
|
|
+ if field_name in ['error', 'chunk_id', 'page']:
|
|
|
|
|
+ continue
|
|
|
# 检查列表是否为空
|
|
# 检查列表是否为空
|
|
|
if isinstance(field_value, list) and len(field_value) == 0:
|
|
if isinstance(field_value, list) and len(field_value) == 0:
|
|
|
# 为chapter_label列表中的每个值创建单独的缺失项
|
|
# 为chapter_label列表中的每个值创建单独的缺失项
|
|
@@ -1074,8 +1103,8 @@ class AIReviewEngine(BaseReviewer):
|
|
|
"suggestion": f"在待审查目录中未找到与'{field_name}'对应的章节;当前章节仅涉及'{chapter_label}',未涵盖'{field_name}'相关内容;整改建议:建议在本章或前序章节中增设'{field_name}'相关内容,确保与审查规范要求一致。",
|
|
"suggestion": f"在待审查目录中未找到与'{field_name}'对应的章节;当前章节仅涉及'{chapter_label}',未涵盖'{field_name}'相关内容;整改建议:建议在本章或前序章节中增设'{field_name}'相关内容,确保与审查规范要求一致。",
|
|
|
"reason": f"本章应包含{merged_results_keys_str}等{len(merged_results_keys)}个方面。",
|
|
"reason": f"本章应包含{merged_results_keys_str}等{len(merged_results_keys)}个方面。",
|
|
|
"risk_level": "高风险",
|
|
"risk_level": "高风险",
|
|
|
- "review_references": '',
|
|
|
|
|
- "reference_source": '',
|
|
|
|
|
|
|
+ # "review_references": '',
|
|
|
|
|
+ "reference_source": '《桥梁公司危险性较大工程管理实施细则(2025版)》',
|
|
|
|
|
|
|
|
}
|
|
}
|
|
|
missing_items.append(missing_item)
|
|
missing_items.append(missing_item)
|
|
@@ -1093,7 +1122,7 @@ class AIReviewEngine(BaseReviewer):
|
|
|
execution_time = time.time() - start_time
|
|
execution_time = time.time() - start_time
|
|
|
return {
|
|
return {
|
|
|
"details": {
|
|
"details": {
|
|
|
- "name": "outline_check",
|
|
|
|
|
|
|
+ "name": "completeness_check",
|
|
|
"response": [],
|
|
"response": [],
|
|
|
"review_location_label": "",
|
|
"review_location_label": "",
|
|
|
"chapter_code": "catalogue",
|
|
"chapter_code": "catalogue",
|
|
@@ -1107,7 +1136,7 @@ class AIReviewEngine(BaseReviewer):
|
|
|
execution_time = time.time() - start_time
|
|
execution_time = time.time() - start_time
|
|
|
return {
|
|
return {
|
|
|
"details": {
|
|
"details": {
|
|
|
- "name": "outline_check",
|
|
|
|
|
|
|
+ "name": "completeness_check",
|
|
|
"response": [],
|
|
"response": [],
|
|
|
"review_location_label": "",
|
|
"review_location_label": "",
|
|
|
"chapter_code": "catalogue",
|
|
"chapter_code": "catalogue",
|
|
@@ -1117,11 +1146,19 @@ class AIReviewEngine(BaseReviewer):
|
|
|
"execution_time": execution_time
|
|
"execution_time": execution_time
|
|
|
}
|
|
}
|
|
|
logger.info(f"大纲审查完成,耗时 {execution_time:.2f} 秒")
|
|
logger.info(f"大纲审查完成,耗时 {execution_time:.2f} 秒")
|
|
|
|
|
+
|
|
|
|
|
+ # 获取 review_location_label,使用兼容的字段名
|
|
|
|
|
+ review_location_col = 'chapter_label' if 'chapter_label' in df.columns else 'section_label_first'
|
|
|
|
|
+ if review_location_col not in df.columns or len(df) == 0:
|
|
|
|
|
+ review_location_label = "未知位置"
|
|
|
|
|
+ else:
|
|
|
|
|
+ review_location_label = df[review_location_col].to_list()[-1]
|
|
|
|
|
+
|
|
|
outcheck_result = {
|
|
outcheck_result = {
|
|
|
"details": {
|
|
"details": {
|
|
|
"name": "completeness_check",
|
|
"name": "completeness_check",
|
|
|
"response": missing_items,
|
|
"response": missing_items,
|
|
|
- "review_location_label": df['chapter_label'].to_list()[-1],
|
|
|
|
|
|
|
+ "review_location_label": review_location_label,
|
|
|
"chapter_code": 'catalogue',
|
|
"chapter_code": 'catalogue',
|
|
|
"original_content": ""
|
|
"original_content": ""
|
|
|
},
|
|
},
|