|
|
@@ -76,10 +76,15 @@ import json
|
|
|
|
|
|
|
|
|
|
|
|
-from .check_completeness.components.data_loader import CSVDataLoader
|
|
|
-from .check_completeness.components.prompt_builder import PromptBuilder
|
|
|
-from .check_completeness.components.llm_client import LLMClient
|
|
|
-from .check_completeness.components.result_processor import ResultProcessor
|
|
|
+from .reviewers.check_completeness.components.data_loader import CSVDataLoader
|
|
|
+from .reviewers.check_completeness.components.prompt_builder import PromptBuilder
|
|
|
+from .reviewers.check_completeness.components.llm_client import LLMClient
|
|
|
+from .reviewers.check_completeness.components.result_processor import ResultProcessor
|
|
|
+from .reviewers.check_completeness.components.review_pipeline import ReviewPipeline
|
|
|
+from .reviewers.check_completeness.components.result_saver import ResultSaver
|
|
|
+from .reviewers.check_completeness.components.result_analyzer import ResultAnalyzer
|
|
|
+from .reviewers.check_completeness.utils.file_utils import write_json
|
|
|
+from core.construction_review.component.reviewers.base_reviewer import ReviewResult
|
|
|
|
|
|
@dataclass
|
|
|
class ReviewResult:
|
|
|
@@ -727,7 +732,7 @@ class AIReviewEngine(BaseReviewer):
|
|
|
# return await self.review("semantic_logic_check", trace_id, reviewer_type, prompt_name, review_content, review_references,
|
|
|
# None, review_location_label, state, stage_name)
|
|
|
|
|
|
- async def check_completeness(self, trace_id_idx: str, review_content: Dict[str, Any], review_references: str,
|
|
|
+ async def check_completeness(self, trace_id_idx: str, review_content: List[Dict[str, Any]], review_references: str,
|
|
|
review_location_label: str, state: str, stage_name: str) -> Dict[str, Any]:
|
|
|
"""
|
|
|
完整性检查
|
|
|
@@ -744,22 +749,27 @@ class AIReviewEngine(BaseReviewer):
|
|
|
Dict[str, Any]: 完整性检查结果
|
|
|
"""
|
|
|
|
|
|
-
|
|
|
+ # with open(r'temp\structured_content.json', 'w', encoding='utf-8') as f:
|
|
|
+ # json.dump(review_content, f, ensure_ascii=False, indent=4)
|
|
|
name = "completeness_check"
|
|
|
start_time = time.time()
|
|
|
|
|
|
try:
|
|
|
# 验证review_content格式
|
|
|
- if not isinstance(review_content, dict):
|
|
|
+ if not isinstance(review_content, list):
|
|
|
raise ValueError(f"review_content必须是字典类型,当前类型: {type(review_content)}")
|
|
|
|
|
|
- # 获取文档块信息
|
|
|
- doc = review_content
|
|
|
- chunk_id = doc.get('chunk_id', 'unknown')
|
|
|
- chapter_classification = doc.get('chapter_classification', '')
|
|
|
- content = doc.get('content', '')
|
|
|
+ # # 获取文档块信息
|
|
|
+ # doc = review_content
|
|
|
+ # chunk_id = doc.get('chunk_id', 'unknown')
|
|
|
+ # chapter_classification = doc.get('chapter_classification', '')
|
|
|
+ # content = doc.get('content', '')
|
|
|
+ doc = 'doc'
|
|
|
+ chunk_id = 'chunk_id'
|
|
|
+ chapter_classification = 'chunk_id'
|
|
|
+ content = 'chunk_id'
|
|
|
|
|
|
- logger.info(f"开始执行 {name} 审查,trace_id: {trace_id_idx}, chunk_id: {chunk_id}, chapter_classification: {chapter_classification}")
|
|
|
+ logger.debug(f"开始执行 {name} 审查,trace_id: {trace_id_idx}, chunk_id: {chunk_id}, chapter_classification: {chapter_classification}")
|
|
|
|
|
|
# 检查必要字段
|
|
|
if not chapter_classification:
|
|
|
@@ -768,102 +778,150 @@ class AIReviewEngine(BaseReviewer):
|
|
|
if not content:
|
|
|
raise ValueError(f"文档块 {chunk_id} 缺少content字段")
|
|
|
|
|
|
- # 导入check_completeness组件
|
|
|
- check_completeness_dir = Path(__file__).parent / "check_completeness"
|
|
|
- if str(check_completeness_dir) not in sys.path:
|
|
|
- sys.path.insert(0, str(check_completeness_dir))
|
|
|
|
|
|
- # 初始化组件路径
|
|
|
- base_dir = check_completeness_dir
|
|
|
+ # 配置文件路径
|
|
|
+ # base_dir = Path(__file__).parent
|
|
|
+ base_dir = Path(r'core\construction_review\component\reviewers\check_completeness')
|
|
|
csv_path = base_dir / 'config' / 'Construction_Plan_Content_Specification.csv'
|
|
|
+ json_path = base_dir / 'data' / '文档切分预处理结果.json'
|
|
|
prompt_config_path = base_dir / 'config' / 'prompt.yaml'
|
|
|
api_config_path = base_dir / 'config' / 'llm_api.yaml'
|
|
|
|
|
|
- # 加载规范文件
|
|
|
+ logger.debug("=" * 60)
|
|
|
+ logger.debug("文件要点审查模块")
|
|
|
+ logger.debug("=" * 60)
|
|
|
+
|
|
|
+ # 1. 加载数据
|
|
|
+ logger.debug("\n[1/5] 加载规范文件...")
|
|
|
data_loader = CSVDataLoader()
|
|
|
specification = data_loader.load_specification(str(csv_path))
|
|
|
+ logger.debug(f" 加载完成,共 {len(specification)} 个标签类别")
|
|
|
|
|
|
- # 获取对应的规范要求
|
|
|
- requirements = specification.get(chapter_classification, [])
|
|
|
- if not requirements:
|
|
|
- raise ValueError(f"未找到标签 {chapter_classification} 对应的规范要求")
|
|
|
-
|
|
|
- logger.info(f"找到 {len(requirements)} 个规范要求项")
|
|
|
+ logger.debug("\n[2/5] 加载文档数据...")
|
|
|
+ documents = review_content
|
|
|
+ logger.debug(f" 加载完成,共 {len(documents)} 个文档块")
|
|
|
|
|
|
- # 初始化组件
|
|
|
+ # 2. 初始化组件
|
|
|
+ logger.debug("\n[3/5] 初始化组件...")
|
|
|
prompt_builder = PromptBuilder(str(prompt_config_path))
|
|
|
llm_client = LLMClient(str(api_config_path))
|
|
|
result_processor = ResultProcessor()
|
|
|
|
|
|
- # 构建提示词
|
|
|
- prompt = prompt_builder.build_prompt(content, requirements)
|
|
|
+ # 获取并发数配置
|
|
|
+ api_config = llm_client.config
|
|
|
+ concurrent_workers = api_config.get('keywords', {}).get('concurrent_workers', 20)
|
|
|
|
|
|
- # 调用LLM
|
|
|
- logger.info(f"调用LLM进行审查,使用模型: {llm_client.model_type}")
|
|
|
- llm_response = await llm_client.call_llm(prompt)
|
|
|
+ review_pipeline = ReviewPipeline(
|
|
|
+ prompt_builder=prompt_builder,
|
|
|
+ llm_client=llm_client,
|
|
|
+ result_processor=result_processor,
|
|
|
+ max_concurrent=concurrent_workers
|
|
|
+ )
|
|
|
+ logger.debug(" 组件初始化完成")
|
|
|
|
|
|
- # 处理结果
|
|
|
- review_result = result_processor.parse_result(llm_response, requirements)
|
|
|
+ start_time = time.time()
|
|
|
+ # 3. 执行审查
|
|
|
+ logger.debug("\n[4/5] 开始执行审查...")
|
|
|
+ logger.debug(f" 使用模型: {llm_client.model_type}")
|
|
|
+ logger.debug(f" 最大并发数: {concurrent_workers}")
|
|
|
|
|
|
- # 构建details字段,包含审查结果
|
|
|
- details = {
|
|
|
- 'chunk_id': chunk_id,
|
|
|
- 'name': 'completeness_check',
|
|
|
- 'chapter_classification': chapter_classification,
|
|
|
- 'section_label': doc.get('section_label', ''),
|
|
|
- 'requirements_count': len(requirements),
|
|
|
- 'checked_items': len(review_result),
|
|
|
- 'response': review_result[0] if review_result else {},
|
|
|
- }
|
|
|
+ review_results = await review_pipeline.review(documents, specification)
|
|
|
|
|
|
- execution_time = time.time() - start_time
|
|
|
+ # 统计结果
|
|
|
+ success_count = sum(1 for r in review_results if isinstance(r.get('review_result', {}), dict) and 'error' not in r.get('review_result', {}))
|
|
|
+ error_count = len(review_results) - success_count
|
|
|
+ logger.debug(f"\n 审查完成: 成功 {success_count} 个, 失败 {error_count} 个")
|
|
|
+
|
|
|
+
|
|
|
+ # 6. 使用结果解析处理组件,生成规范覆盖汇总表
|
|
|
+ logger.debug("\n[5/5] 生成规范要点覆盖汇总表...")
|
|
|
+ analyzer = ResultAnalyzer(str(csv_path))
|
|
|
+ processed_results = analyzer.process_results(review_results)
|
|
|
+ spec_summary_csv_path = base_dir / 'output' / 'spec_review_summary.csv'
|
|
|
+ summary_rows = analyzer.build_spec_summary(processed_results)
|
|
|
+ logger.debug(f" 规范覆盖汇总结果已保存至: {spec_summary_csv_path}")
|
|
|
+
|
|
|
+ # 生成缺失要点 JSON 列表,便于前端消费
|
|
|
+ missing_issue_json_path = Path(r'temp\document_temp') / 'spec_review_missing_issues.json'
|
|
|
+ missing_issue_list = analyzer.build_missing_issue_list(summary_rows)
|
|
|
+ write_json(missing_issue_list, str(missing_issue_json_path))
|
|
|
+ logger.debug(f" 缺失要点 JSON 已保存至: {missing_issue_json_path}")
|
|
|
+ cost_time = time.time() - start_time
|
|
|
|
|
|
- # 创建ReviewResult对象
|
|
|
- from core.construction_review.component.reviewers.base_reviewer import ReviewResult
|
|
|
- result = ReviewResult(
|
|
|
- success=True,
|
|
|
- details=details,
|
|
|
- error_message=None,
|
|
|
- execution_time=execution_time
|
|
|
- )
|
|
|
- with open('temp/completeness_check_result_1.json','w',encoding='utf-8') as f:
|
|
|
- json.dump({"details":result.details,"success":result.success,"error_message":result.error_message,"execution_time":result.execution_time},f,ensure_ascii=False,indent=4)
|
|
|
- # 将审查结果转换为字典格式,添加到issues中
|
|
|
- review_result_data = {
|
|
|
- 'name': name,
|
|
|
- 'success': result.success,
|
|
|
- 'details': result.details,
|
|
|
- 'error_message': result.error_message,
|
|
|
- 'execution_time': result.execution_time,
|
|
|
- 'timestamp': time.time()
|
|
|
- }
|
|
|
-
|
|
|
- # 推送审查完成信息
|
|
|
- state_dict = None
|
|
|
- if state:
|
|
|
- if isinstance(state, dict):
|
|
|
- state_dict = state
|
|
|
- elif isinstance(state, str):
|
|
|
- try:
|
|
|
- state_dict = json.loads(state)
|
|
|
- except (json.JSONDecodeError, AttributeError):
|
|
|
- pass
|
|
|
-
|
|
|
- if state_dict and state_dict.get("progress_manager"):
|
|
|
- asyncio.create_task(
|
|
|
- state_dict["progress_manager"].update_stage_progress(
|
|
|
- callback_task_id=state_dict.get("callback_task_id"),
|
|
|
- stage_name=stage_name,
|
|
|
- current=None,
|
|
|
- status="processing",
|
|
|
- message=f"{name} 要点审查完成 (chunk_id: {chunk_id}), 耗时: {result.execution_time:.2f}s",
|
|
|
- issues=[review_result_data],
|
|
|
- event_type="processing"
|
|
|
- )
|
|
|
+ # 构建details字段,包含审查结果
|
|
|
+ # details = {
|
|
|
+ # 'chunk_id': chunk_id,
|
|
|
+ # 'name': 'completeness_check',
|
|
|
+ # 'chapter_classification': chapter_classification,
|
|
|
+ # 'section_label': doc.get('section_label', ''),
|
|
|
+ # 'requirements_count': len(requirements),
|
|
|
+ # 'checked_items': len(review_result),
|
|
|
+ # 'response': review_result[0] if review_result else {},
|
|
|
+ # }
|
|
|
+ result_list = []
|
|
|
+ for index, missing_issue in enumerate(missing_issue_list):
|
|
|
+ details = {
|
|
|
+ 'chunk_id': f'chunk_id_{index}',
|
|
|
+ 'name': 'completeness_check',
|
|
|
+ 'chapter_classification': 'chapter_classification',
|
|
|
+ 'section_label': 'section_label',
|
|
|
+ 'requirements_count': 'requirements_count',
|
|
|
+ 'checked_items': len(missing_issue),
|
|
|
+ 'response': missing_issue if missing_issue else {},
|
|
|
+ }
|
|
|
+
|
|
|
+ # 创建ReviewResult对象
|
|
|
+ from core.construction_review.component.reviewers.base_reviewer import ReviewResult
|
|
|
+ result = ReviewResult(
|
|
|
+ success=True,
|
|
|
+ details=details,
|
|
|
+ error_message=None,
|
|
|
+ execution_time=cost_time
|
|
|
)
|
|
|
- logger.info(f"{name} 审查完成 (chunk_id: {chunk_id}), 耗时: {result.execution_time:.2f}s")
|
|
|
+
|
|
|
+ result_dict = {"details":result.details,"success":result.success,"error_message":result.error_message,"execution_time":result.execution_time}
|
|
|
+ result_list.append(result_dict)
|
|
|
+ with open('temp/completeness_check_result_1.json','w',encoding='utf-8') as f:
|
|
|
+ json.dump(result_dict, f, ensure_ascii=False,indent=4)
|
|
|
+
|
|
|
+ # 将审查结果转换为字典格式,添加到issues中
|
|
|
+ review_result_data = {
|
|
|
+ 'name': name,
|
|
|
+ 'success': result.success,
|
|
|
+ 'details': result.details,
|
|
|
+ 'error_message': result.error_message,
|
|
|
+ 'execution_time': result.execution_time,
|
|
|
+ 'timestamp': time.time()
|
|
|
+ }
|
|
|
+
|
|
|
+ # 推送审查完成信息
|
|
|
+ state_dict = None
|
|
|
+ if state:
|
|
|
+ if isinstance(state, dict):
|
|
|
+ state_dict = state
|
|
|
+ elif isinstance(state, str):
|
|
|
+ try:
|
|
|
+ state_dict = json.loads(state)
|
|
|
+ except (json.JSONDecodeError, AttributeError):
|
|
|
+ pass
|
|
|
+
|
|
|
+ if state_dict and state_dict.get("progress_manager"):
|
|
|
+ asyncio.create_task(
|
|
|
+ state_dict["progress_manager"].update_stage_progress(
|
|
|
+ callback_task_id=state_dict.get("callback_task_id"),
|
|
|
+ stage_name=stage_name,
|
|
|
+ current=None,
|
|
|
+ status="processing",
|
|
|
+ message=f"{name} 要点审查完成 (chunk_id: {chunk_id}_{index}), 耗时: {result.execution_time:.2f}s",
|
|
|
+ issues=[review_result_data],
|
|
|
+ event_type="processing"
|
|
|
+ )
|
|
|
+ )
|
|
|
+ logger.debug(f"{name} 审查完成 (chunk_id: {chunk_id}_{index}), 耗时: {result.execution_time:.2f}s")
|
|
|
|
|
|
- return result
|
|
|
+ return {
|
|
|
+ 'completeness_review_result': result_list
|
|
|
+ }
|
|
|
|
|
|
except Exception as e:
|
|
|
execution_time = time.time() - start_time
|
|
|
@@ -874,7 +932,7 @@ class AIReviewEngine(BaseReviewer):
|
|
|
return ReviewResult(
|
|
|
success=False,
|
|
|
details={
|
|
|
- 'chunk_id': review_content.get('chunk_id', 'unknown') if isinstance(review_content, dict) else 'unknown',
|
|
|
+ 'chunk_id': review_content[0].get('chunk_id', 'unknown') if isinstance(review_content[0], dict) else 'unknown',
|
|
|
'error': str(e)
|
|
|
},
|
|
|
error_message=error_msg,
|