|
@@ -54,7 +54,9 @@ import time
|
|
|
from dataclasses import dataclass
|
|
from dataclasses import dataclass
|
|
|
from enum import Enum
|
|
from enum import Enum
|
|
|
from typing import Any, Dict, List, Optional, Sequence
|
|
from typing import Any, Dict, List, Optional, Sequence
|
|
|
-
|
|
|
|
|
|
|
+import pandas as pd
|
|
|
|
|
+import json
|
|
|
|
|
+import ast # 用于安全解析字符串为Python对象
|
|
|
import pandas as pd
|
|
import pandas as pd
|
|
|
|
|
|
|
|
from core.base.task_models import TaskFileInfo
|
|
from core.base.task_models import TaskFileInfo
|
|
@@ -88,7 +90,7 @@ from .reviewers.check_completeness.components.result_saver import ResultSaver
|
|
|
from .reviewers.check_completeness.components.result_analyzer import ResultAnalyzer
|
|
from .reviewers.check_completeness.components.result_analyzer import ResultAnalyzer
|
|
|
from .reviewers.check_completeness.utils.file_utils import write_json
|
|
from .reviewers.check_completeness.utils.file_utils import write_json
|
|
|
from core.construction_review.component.reviewers.base_reviewer import ReviewResult
|
|
from core.construction_review.component.reviewers.base_reviewer import ReviewResult
|
|
|
-
|
|
|
|
|
|
|
+from .reviewers.outline_check import outline_review_results_df
|
|
|
@dataclass
|
|
@dataclass
|
|
|
class ReviewResult:
|
|
class ReviewResult:
|
|
|
"""审查结果"""
|
|
"""审查结果"""
|
|
@@ -668,7 +670,6 @@ class AIReviewEngine(BaseReviewer):
|
|
|
# json.dump(review_content, f, ensure_ascii=False, indent=4)
|
|
# json.dump(review_content, f, ensure_ascii=False, indent=4)
|
|
|
name = "completeness_check"
|
|
name = "completeness_check"
|
|
|
start_time = time.time()
|
|
start_time = time.time()
|
|
|
-
|
|
|
|
|
try:
|
|
try:
|
|
|
# 验证review_content格式
|
|
# 验证review_content格式
|
|
|
if not isinstance(review_content, list):
|
|
if not isinstance(review_content, list):
|
|
@@ -741,10 +742,17 @@ class AIReviewEngine(BaseReviewer):
|
|
|
|
|
|
|
|
review_results = await review_pipeline.review(documents, specification)
|
|
review_results = await review_pipeline.review(documents, specification)
|
|
|
review_results_df = pd.DataFrame(review_results)
|
|
review_results_df = pd.DataFrame(review_results)
|
|
|
- df_section_label = review_results_df['section_label'].str.split('->').str[0]
|
|
|
|
|
- review_results_df['title'] = df_section_label
|
|
|
|
|
|
|
+ chapter_labels = review_results_df['section_label'].str.split('->').str[0]
|
|
|
|
|
+ review_results_df['title'] = chapter_labels
|
|
|
review_results_df.to_csv(Path('temp') / 'document_temp' / '2_spec_review_results.csv', encoding='utf-8-sig', index=False)
|
|
review_results_df.to_csv(Path('temp') / 'document_temp' / '2_spec_review_results.csv', encoding='utf-8-sig', index=False)
|
|
|
- review_results_flag = review_results_df["chapter_classification"].unique().tolist()
|
|
|
|
|
|
|
+ csv_file = rf'temp\document_temp\2_spec_review_results.csv'
|
|
|
|
|
+ path2 = rf'temp\document_temp\outlines_review_results.csv'
|
|
|
|
|
+ data_df = pd.read_csv(csv_file, encoding='utf-8-sig')
|
|
|
|
|
+ outline_review_results_df(data=data_df, path=path2)
|
|
|
|
|
+ df_filtered = review_results_df.drop_duplicates(subset='title', keep='first').reset_index(drop=True)
|
|
|
|
|
+ unique_chapter_labels = chapter_labels.unique().tolist()
|
|
|
|
|
+ chapter_classifications = df_filtered['chapter_classification']
|
|
|
|
|
+ review_results_flag = chapter_classifications.unique().tolist()
|
|
|
|
|
|
|
|
# with open(r'temp\document_temp\1_spec_review_results.json', 'w', encoding='utf-8') as f:
|
|
# with open(r'temp\document_temp\1_spec_review_results.json', 'w', encoding='utf-8') as f:
|
|
|
# json.dump(review_results, f, ensure_ascii=False, indent=4)
|
|
# json.dump(review_results, f, ensure_ascii=False, indent=4)
|
|
@@ -762,7 +770,7 @@ class AIReviewEngine(BaseReviewer):
|
|
|
# logger.info(f" 规范覆盖汇总结果已保存至: {spec_summary_csv_path}")
|
|
# logger.info(f" 规范覆盖汇总结果已保存至: {spec_summary_csv_path}")
|
|
|
summary_rows = pd.DataFrame(summary_rows)
|
|
summary_rows = pd.DataFrame(summary_rows)
|
|
|
summary_rows = summary_rows[summary_rows['标签'].isin(review_results_flag)]
|
|
summary_rows = summary_rows[summary_rows['标签'].isin(review_results_flag)]
|
|
|
- # summary_rows.to_csv(str(spec_summary_csv_path), encoding='utf-8-sig', index=False)
|
|
|
|
|
|
|
+ summary_rows.to_csv(str(spec_summary_csv_path), encoding='utf-8-sig', index=False)
|
|
|
summary_rows = summary_rows.to_dict('records')
|
|
summary_rows = summary_rows.to_dict('records')
|
|
|
# 生成缺失要点 JSON 列表,便于前端消费
|
|
# 生成缺失要点 JSON 列表,便于前端消费
|
|
|
|
|
|
|
@@ -954,74 +962,80 @@ class AIReviewEngine(BaseReviewer):
|
|
|
"""
|
|
"""
|
|
|
logger.info(f"开始大纲审查,trace_id: {trace_id_idx}")
|
|
logger.info(f"开始大纲审查,trace_id: {trace_id_idx}")
|
|
|
|
|
|
|
|
- # 1. 获取整体大纲(1级大纲目录)
|
|
|
|
|
- overall_outline = ""
|
|
|
|
|
-
|
|
|
|
|
- # 添加调试信息
|
|
|
|
|
- logger.debug(f"outline_content结构: {list(outline_content.keys()) if outline_content else 'None'}")
|
|
|
|
|
- outline_data = outline_content.get('outline', {})
|
|
|
|
|
- logger.debug(f"outline_data结构: {list(outline_data.keys()) if outline_data else 'None'}")
|
|
|
|
|
- chapters = outline_data.get('chapters', [])
|
|
|
|
|
- logger.info(f"chapters数量: {len(chapters)}")
|
|
|
|
|
-
|
|
|
|
|
- for i, chapter in enumerate(chapters):
|
|
|
|
|
- chapter_title = chapter.get('title', 'N/A')
|
|
|
|
|
- chapter_page = chapter.get('page', 'N/A')
|
|
|
|
|
- logger.info(f"章节{i+1}: {chapter_title} (页码: {chapter_page})")
|
|
|
|
|
- overall_outline += f"{chapter_title} (页码: {chapter_page})\n"
|
|
|
|
|
-
|
|
|
|
|
- logger.info(f"生成的overall_outline长度: {len(overall_outline)}")
|
|
|
|
|
- if overall_outline:
|
|
|
|
|
- logger.info(f"overall_outline内容: {overall_outline[:200]}...")
|
|
|
|
|
-
|
|
|
|
|
- # 2. 获取大纲各章节及其子目录的详细信息
|
|
|
|
|
- detailed_outline = []
|
|
|
|
|
-
|
|
|
|
|
- for chapter in chapters:
|
|
|
|
|
- # 将每个章节作为整体项,包含标题、页码和子目录
|
|
|
|
|
- chapter_content = f"\n{chapter['title']} (页码: {chapter['page']})\n"
|
|
|
|
|
-
|
|
|
|
|
- # 添加子目录(如果有)
|
|
|
|
|
- subsections = chapter.get('subsections', [])
|
|
|
|
|
- if subsections:
|
|
|
|
|
- chapter_content += "包含子目录:\n"
|
|
|
|
|
- for subsection in subsections:
|
|
|
|
|
- indent = " " * (subsection['level'] - 1)
|
|
|
|
|
- chapter_content += f"{indent}- {subsection['title']} (页码: {subsection['page']})\n"
|
|
|
|
|
-
|
|
|
|
|
- # 将完整章节内容作为一个项添加到列表
|
|
|
|
|
- detailed_outline.append(chapter_content)
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
- logger.info(f"提取整体大纲完成{overall_outline}")
|
|
|
|
|
- logger.info(f"提取详细大纲完成{detailed_outline}")
|
|
|
|
|
-
|
|
|
|
|
- # 准备审查数据
|
|
|
|
|
- review_data = {
|
|
|
|
|
- 'outline_content': outline_content,
|
|
|
|
|
- 'overall_outline': overall_outline,
|
|
|
|
|
- 'detailed_outline': detailed_outline,
|
|
|
|
|
- 'state': state,
|
|
|
|
|
- 'stage_name': stage_name
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- # 调用outline_reviewer进行审查
|
|
|
|
|
|
|
+ # CSV文件路径
|
|
|
|
|
+ csv_path = Path('temp') / 'document_temp' / 'outlines_review_results.csv'
|
|
|
|
|
+
|
|
|
|
|
+ # 存储所有缺失项
|
|
|
|
|
+ missing_items = []
|
|
|
|
|
+
|
|
|
try:
|
|
try:
|
|
|
- outline_review_result = await self.outline_reviewer.outline_review(review_data, trace_id_idx, state,stage_name)
|
|
|
|
|
|
|
+ # 读取CSV文件
|
|
|
|
|
+ df = pd.read_csv(csv_path, encoding='utf-8-sig')
|
|
|
|
|
+ logger.info(f"成功读取CSV文件: {csv_path}, 共 {len(df)} 行")
|
|
|
|
|
+
|
|
|
|
|
+ # 兼容新旧字段名
|
|
|
|
|
+ chapter_label_col = 'chapter_label' if 'chapter_label' in df.columns else 'section_label_first'
|
|
|
|
|
+ review_results_col = 'review_results_summary' if 'review_results_summary' in df.columns else 'merged_review_results'
|
|
|
|
|
+
|
|
|
|
|
+ # 遍历每一行
|
|
|
|
|
+ for index, row in df.iterrows():
|
|
|
|
|
+ chapter_label = row.get(chapter_label_col, '')
|
|
|
|
|
+ merged_results_str = row.get(review_results_col, '')
|
|
|
|
|
+
|
|
|
|
|
+ # 解析review_results_summary字典字符串
|
|
|
|
|
+ try:
|
|
|
|
|
+ if pd.isna(merged_results_str) or merged_results_str == '':
|
|
|
|
|
+ merged_results = {}
|
|
|
|
|
+ else:
|
|
|
|
|
+ # 尝试使用ast.literal_eval解析
|
|
|
|
|
+ merged_results = ast.literal_eval(merged_results_str)
|
|
|
|
|
+ except (ValueError, SyntaxError):
|
|
|
|
|
+ try:
|
|
|
|
|
+ # 尝试使用json.loads解析
|
|
|
|
|
+ merged_results = json.loads(merged_results_str)
|
|
|
|
|
+ except (json.JSONDecodeError, TypeError):
|
|
|
|
|
+ logger.warning(f"第 {index} 行无法解析review_results_summary: {merged_results_str}")
|
|
|
|
|
+ merged_results = {}
|
|
|
|
|
+
|
|
|
|
|
+ # 检查字典中的每个字段
|
|
|
|
|
+ if isinstance(merged_results, dict):
|
|
|
|
|
+ for field_name, field_value in merged_results.items():
|
|
|
|
|
+ # 检查列表是否为空
|
|
|
|
|
+ if isinstance(field_value, list) and len(field_value) == 0:
|
|
|
|
|
+ # 生成缺失项
|
|
|
|
|
+ missing_item = {
|
|
|
|
|
+ "check_item_code": "catalogue_completeness_check",
|
|
|
|
|
+ "check_result": {
|
|
|
|
|
+ "issue_point": f"{field_name}缺失",
|
|
|
|
|
+ "location": "",
|
|
|
|
|
+ "suggestion": "",
|
|
|
|
|
+ "reason": "",
|
|
|
|
|
+ "risk_level": ""
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+ missing_items.append(missing_item)
|
|
|
|
|
+ logger.info(f"发现缺失项: 章节[{chapter_label}] 字段[{field_name}]")
|
|
|
|
|
+
|
|
|
|
|
+ logger.info(f"大纲审查完成,共发现 {len(missing_items)} 个缺失项")
|
|
|
|
|
+
|
|
|
|
|
+ except FileNotFoundError:
|
|
|
|
|
+ logger.error(f"CSV文件不存在: {csv_path}")
|
|
|
|
|
+ return {
|
|
|
|
|
+ 'outline_review_result': [],
|
|
|
|
|
+ 'error': f'CSV文件不存在: {csv_path}'
|
|
|
|
|
+ }
|
|
|
except Exception as e:
|
|
except Exception as e:
|
|
|
- logger.warning(f"大纲审查失败,但返回提取结果: {str(e)}")
|
|
|
|
|
- outline_review_result = None
|
|
|
|
|
-
|
|
|
|
|
- # 确保目录存在
|
|
|
|
|
- # import os
|
|
|
|
|
- # os.makedirs("temp/outline_result_temp", exist_ok=True)
|
|
|
|
|
-
|
|
|
|
|
- # # with open("temp/outline_result_temp/outline_result.json","w",encoding="utf-8") as f:
|
|
|
|
|
- # # json.dump(outline_review_result,f,ensure_ascii=False,indent=4)
|
|
|
|
|
- # 返回提取的大纲结果和审查结果
|
|
|
|
|
|
|
+ logger.error(f"大纲审查失败: {str(e)}", exc_info=True)
|
|
|
|
|
+ return {
|
|
|
|
|
+ 'outline_review_result': [],
|
|
|
|
|
+ 'error': f'大纲审查失败: {str(e)}'
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
return {
|
|
return {
|
|
|
- 'outline_review_result': outline_review_result
|
|
|
|
|
|
|
+ 'outline_review_result':
|
|
|
|
|
+ {
|
|
|
|
|
+ "response": missing_items,
|
|
|
|
|
+ }
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
async def reference_basis_reviewer(self, review_data: Dict[str, Any], trace_id: str,
|
|
async def reference_basis_reviewer(self, review_data: Dict[str, Any], trace_id: str,
|