|
@@ -5,7 +5,7 @@
|
|
|
@Project : lq-agent-api
|
|
@Project : lq-agent-api
|
|
|
@File : construction_review/ai_review_engine.py
|
|
@File : construction_review/ai_review_engine.py
|
|
|
@IDE : VsCode
|
|
@IDE : VsCode
|
|
|
-@Author : 王旭明
|
|
|
|
|
|
|
+@Author : wandaan
|
|
|
@Date : 2025-12-01 11:07:12
|
|
@Date : 2025-12-01 11:07:12
|
|
|
@Description: AI审查引擎核心组件,负责执行各类文档审查任务,支持并发处理和多种审查模式
|
|
@Description: AI审查引擎核心组件,负责执行各类文档审查任务,支持并发处理和多种审查模式
|
|
|
|
|
|
|
@@ -149,7 +149,6 @@ class AIReviewEngine(BaseReviewer):
|
|
|
|
|
|
|
|
self.max_concurrent_reviews = max_concurrent_reviews
|
|
self.max_concurrent_reviews = max_concurrent_reviews
|
|
|
self.semaphore = asyncio.Semaphore(max_concurrent_reviews)
|
|
self.semaphore = asyncio.Semaphore(max_concurrent_reviews)
|
|
|
- self.milvus_collection = config_handler.get('milvus', 'MILVUS_COLLECTION', 'default')
|
|
|
|
|
|
|
|
|
|
# [新增] 数据库连接池
|
|
# [新增] 数据库连接池
|
|
|
self.db_pool = db_pool
|
|
self.db_pool = db_pool
|
|
@@ -699,41 +698,29 @@ class AIReviewEngine(BaseReviewer):
|
|
|
model_client=getattr(self, 'model_client', None)
|
|
model_client=getattr(self, 'model_client', None)
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
- # 从state获取outline和原始chunks(如果有)
|
|
|
|
|
|
|
+ # 从state获取outline(仅用于目录审查)
|
|
|
outline = None
|
|
outline = None
|
|
|
- all_chunks = []
|
|
|
|
|
if state and isinstance(state, dict):
|
|
if state and isinstance(state, dict):
|
|
|
- structured = state.get('structured_content', {})
|
|
|
|
|
- outline = structured.get('outline')
|
|
|
|
|
- all_chunks = structured.get('chunks', [])
|
|
|
|
|
|
|
+ outline = state.get('structured_content', {}).get('outline')
|
|
|
|
|
|
|
|
- # 从传入的chunks中提取chapter_code和章节信息
|
|
|
|
|
|
|
+ # 提取chunk信息(与其他审查一致,使用单个chunk)
|
|
|
chapter_code = "all"
|
|
chapter_code = "all"
|
|
|
chapter_name = ""
|
|
chapter_name = ""
|
|
|
- if review_content and isinstance(review_content, list):
|
|
|
|
|
- first_chunk = review_content[0]
|
|
|
|
|
- if isinstance(first_chunk, dict):
|
|
|
|
|
- chapter_code = first_chunk.get('chapter_classification', 'all')
|
|
|
|
|
- chapter_name = first_chunk.get('chapter', '') or first_chunk.get('section_label', '')
|
|
|
|
|
- # 只保留章级标题("->" 之前的部分),避免二级标题污染 location
|
|
|
|
|
- if chapter_name and '->' in chapter_name:
|
|
|
|
|
- chapter_name = chapter_name.split('->')[0]
|
|
|
|
|
-
|
|
|
|
|
- # 获取该章节的所有原始chunks用于完整性审查(包含所有分类代码)
|
|
|
|
|
- chapter_chunks = review_content
|
|
|
|
|
- if chapter_code != "all" and all_chunks:
|
|
|
|
|
- # 从state中获取该章节的所有原始chunks
|
|
|
|
|
- chapter_chunks = [
|
|
|
|
|
- c for c in all_chunks
|
|
|
|
|
- if c.get('chapter_classification') == chapter_code
|
|
|
|
|
- ]
|
|
|
|
|
- logger.info(f"[{name}] 章节 '{chapter_code}' 从state获取 {len(chapter_chunks)} 个原始chunks进行完整性审查")
|
|
|
|
|
-
|
|
|
|
|
- # 执行检查(传入当前章节分类,只检查该章节下的三级分类)
|
|
|
|
|
|
|
+ secondary_code = ""
|
|
|
|
|
+ chunk = review_content[0] if (review_content and isinstance(review_content, list)) else {}
|
|
|
|
|
+ if isinstance(chunk, dict):
|
|
|
|
|
+ chapter_code = chunk.get('chapter_classification', 'all')
|
|
|
|
|
+ secondary_code = chunk.get('secondary_category_code', '')
|
|
|
|
|
+ chapter_name = chunk.get('chapter', '') or chunk.get('section_label', '')
|
|
|
|
|
+ if chapter_name and '->' in chapter_name:
|
|
|
|
|
+ chapter_name = chapter_name.split('->')[0]
|
|
|
|
|
+
|
|
|
|
|
+ # 执行检查(传入单个chunk + 二级分类,只检查该二级下的标准项)
|
|
|
result = await checker.check(
|
|
result = await checker.check(
|
|
|
- chunks=chapter_chunks,
|
|
|
|
|
|
|
+ chunks=[chunk] if isinstance(chunk, dict) and chunk else review_content,
|
|
|
outline=outline,
|
|
outline=outline,
|
|
|
- chapter_classification=chapter_code if chapter_code != "all" else None
|
|
|
|
|
|
|
+ chapter_classification=chapter_code if chapter_code != "all" else None,
|
|
|
|
|
+ secondary_classification=secondary_code if secondary_code else None,
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
# 转换为字典
|
|
# 转换为字典
|
|
@@ -779,21 +766,28 @@ class AIReviewEngine(BaseReviewer):
|
|
|
response_item["second_seq"] = rec['second_seq']
|
|
response_item["second_seq"] = rec['second_seq']
|
|
|
if 'third_seq' in rec:
|
|
if 'third_seq' in rec:
|
|
|
response_item["third_seq"] = rec['third_seq']
|
|
response_item["third_seq"] = rec['third_seq']
|
|
|
- # 继续添加其他字段
|
|
|
|
|
|
|
+ # 继续添加其他字段(方案B增强:含 evidence / confidence)
|
|
|
|
|
+ check_result_data = {
|
|
|
|
|
+ "issue_point": issue_point,
|
|
|
|
|
+ "location": location,
|
|
|
|
|
+ "suggestion": rec.get('suggestion', ''),
|
|
|
|
|
+ "reason": rec.get('reason', ''),
|
|
|
|
|
+ "risk_level": risk_level,
|
|
|
|
|
+ }
|
|
|
|
|
+ # 方案B特有字段
|
|
|
|
|
+ if rec.get('evidence'):
|
|
|
|
|
+ check_result_data["evidence"] = rec['evidence']
|
|
|
|
|
+ if rec.get('confidence'):
|
|
|
|
|
+ check_result_data["confidence"] = rec['confidence']
|
|
|
|
|
+
|
|
|
response_item.update({
|
|
response_item.update({
|
|
|
"check_item_code": f"{chapter_code if chapter_code != 'all' else 'unknown'}_completeness_check",
|
|
"check_item_code": f"{chapter_code if chapter_code != 'all' else 'unknown'}_completeness_check",
|
|
|
- "check_result": {
|
|
|
|
|
- "issue_point": issue_point,
|
|
|
|
|
- "location": location,
|
|
|
|
|
- "suggestion": rec.get('suggestion', ''),
|
|
|
|
|
- "reason": rec.get('reason', ''),
|
|
|
|
|
- "risk_level": risk_level
|
|
|
|
|
- },
|
|
|
|
|
|
|
+ "check_result": check_result_data,
|
|
|
"exist_issue": True,
|
|
"exist_issue": True,
|
|
|
"risk_info": {"risk_level": risk_level_en}
|
|
"risk_info": {"risk_level": risk_level_en}
|
|
|
})
|
|
})
|
|
|
response_items.append(response_item)
|
|
response_items.append(response_item)
|
|
|
-
|
|
|
|
|
|
|
+
|
|
|
# 如果没有缺失项,显示完整度
|
|
# 如果没有缺失项,显示完整度
|
|
|
if not response_items:
|
|
if not response_items:
|
|
|
completeness_rate = result_dict.get('tertiary_completeness', {}).get('completeness_rate', '0%')
|
|
completeness_rate = result_dict.get('tertiary_completeness', {}).get('completeness_rate', '0%')
|
|
@@ -812,27 +806,38 @@ class AIReviewEngine(BaseReviewer):
|
|
|
"exist_issue": False,
|
|
"exist_issue": False,
|
|
|
"risk_info": {"risk_level": "low"}
|
|
"risk_info": {"risk_level": "low"}
|
|
|
})
|
|
})
|
|
|
-
|
|
|
|
|
|
|
+
|
|
|
execution_time = time.time() - start_time
|
|
execution_time = time.time() - start_time
|
|
|
-
|
|
|
|
|
- # 构建与原有格式兼容的结果
|
|
|
|
|
|
|
+
|
|
|
|
|
+ # 统计LLM调用信息
|
|
|
|
|
+ llm_calls = result_dict.get('direct_llm_call_count', 0)
|
|
|
|
|
+ direct_items_count = len(result_dict.get('direct_check_items', []))
|
|
|
|
|
+
|
|
|
|
|
+ # 构建与原有格式兼容的结果(方案B增强)
|
|
|
check_result = {
|
|
check_result = {
|
|
|
"details": {
|
|
"details": {
|
|
|
"name": "completeness_check",
|
|
"name": "completeness_check",
|
|
|
"response": response_items,
|
|
"response": response_items,
|
|
|
- "review_location_label": "三级完整性审查",
|
|
|
|
|
|
|
+ "review_location_label": "三级完整性审查(LLM直接解释)",
|
|
|
"chapter_code": chapter_code,
|
|
"chapter_code": chapter_code,
|
|
|
- "original_content": f"标准三级分类: {result_dict.get('tertiary_completeness', {}).get('total', 0)}个, "
|
|
|
|
|
- f"有内容: {result_dict.get('tertiary_completeness', {}).get('present', 0)}个, "
|
|
|
|
|
- f"缺失: {result_dict.get('tertiary_completeness', {}).get('missing', 0)}个",
|
|
|
|
|
- # 保留完整的轻量级审查结果供前端使用
|
|
|
|
|
- "lightweight_result": result_dict
|
|
|
|
|
|
|
+ "original_content": (
|
|
|
|
|
+ f"标准三级分类: {result_dict.get('tertiary_completeness', {}).get('total', 0)}个, "
|
|
|
|
|
+ f"有内容: {result_dict.get('tertiary_completeness', {}).get('present', 0)}个, "
|
|
|
|
|
+ f"缺失: {result_dict.get('tertiary_completeness', {}).get('missing', 0)}个"
|
|
|
|
|
+ ),
|
|
|
|
|
+ # 保留完整的审查结果供前端使用
|
|
|
|
|
+ "lightweight_result": result_dict,
|
|
|
|
|
+ # 方案B特有:LLM逐项判断详情
|
|
|
|
|
+ "direct_check_items": result_dict.get('direct_check_items', []),
|
|
|
|
|
+ "direct_llm_call_count": llm_calls,
|
|
|
|
|
+ "review_method": "direct_llm",
|
|
|
},
|
|
},
|
|
|
"success": True,
|
|
"success": True,
|
|
|
"execution_time": execution_time
|
|
"execution_time": execution_time
|
|
|
}
|
|
}
|
|
|
-
|
|
|
|
|
- logger.info(f"[{name}] 审查完成,耗时: {execution_time:.2f}s, "
|
|
|
|
|
|
|
+
|
|
|
|
|
+ logger.info(f"[{name}] 审查完成(方案B), 耗时: {execution_time:.2f}s, "
|
|
|
|
|
+ f"LLM调用: {llm_calls}次, 直接检查项: {direct_items_count}项, "
|
|
|
f"三级完整率: {result_dict.get('tertiary_completeness', {}).get('completeness_rate', 'N/A')}")
|
|
f"三级完整率: {result_dict.get('tertiary_completeness', {}).get('completeness_rate', 'N/A')}")
|
|
|
|
|
|
|
|
return check_result, trace_id_idx
|
|
return check_result, trace_id_idx
|
|
@@ -1075,7 +1080,7 @@ class AIReviewEngine(BaseReviewer):
|
|
|
combined_content = review_content
|
|
combined_content = review_content
|
|
|
|
|
|
|
|
return await self.review("non_parameter_compliance_check", trace_id, reviewer_type, prompt_name, combined_content, review_references,
|
|
return await self.review("non_parameter_compliance_check", trace_id, reviewer_type, prompt_name, combined_content, review_references,
|
|
|
- reference_source, state, stage_name, timeout=45, function_name="completeness_review_classify")
|
|
|
|
|
|
|
+ reference_source, state, stage_name, timeout=45, function_name="non_parameter_compliance_check")
|
|
|
|
|
|
|
|
async def check_parameter_compliance(self, trace_id_idx: str, review_content: str, review_references: str,
|
|
async def check_parameter_compliance(self, trace_id_idx: str, review_content: str, review_references: str,
|
|
|
reference_source: str, state: str, stage_name: str,
|
|
reference_source: str, state: str, stage_name: str,
|
|
@@ -1108,7 +1113,7 @@ class AIReviewEngine(BaseReviewer):
|
|
|
combined_content = review_content
|
|
combined_content = review_content
|
|
|
|
|
|
|
|
return await self.review("parameter_compliance_check", trace_id, reviewer_type, prompt_name, combined_content, review_references,
|
|
return await self.review("parameter_compliance_check", trace_id, reviewer_type, prompt_name, combined_content, review_references,
|
|
|
- reference_source, state, stage_name, timeout=45, function_name="completeness_review_classify")
|
|
|
|
|
|
|
+ reference_source, state, stage_name, timeout=45, function_name="parameter_compliance_check")
|
|
|
|
|
|
|
|
async def reference_basis_reviewer(self, review_data: Dict[str, Any], trace_id: str,
|
|
async def reference_basis_reviewer(self, review_data: Dict[str, Any], trace_id: str,
|
|
|
state: dict = None, stage_name: str = None) -> Dict[str, Any]:
|
|
state: dict = None, stage_name: str = None) -> Dict[str, Any]:
|
|
@@ -1249,114 +1254,6 @@ class AIReviewEngine(BaseReviewer):
|
|
|
"error_message": error_msg
|
|
"error_message": error_msg
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
-
|
|
|
|
|
- async def timeliness_content_reviewer(self, review_data: Dict[str, Any], trace_id: str,
|
|
|
|
|
- state: dict = None, stage_name: str = None) -> Dict[str, Any]:
|
|
|
|
|
- """
|
|
|
|
|
- 执行三级分类内容时效性审查:检查tertiary_classification_details中引用的规范是否过时
|
|
|
|
|
-
|
|
|
|
|
- Args:
|
|
|
|
|
- review_data: 待审查数据,包含tertiary_classification_details
|
|
|
|
|
- trace_id: 追踪ID
|
|
|
|
|
- state: 状态字典
|
|
|
|
|
- stage_name: 阶段名称
|
|
|
|
|
-
|
|
|
|
|
- Returns:
|
|
|
|
|
- 审查结果字典,包含内容时效性审查结果
|
|
|
|
|
- """
|
|
|
|
|
- start_time = time.time()
|
|
|
|
|
- try:
|
|
|
|
|
- logger.info(f"开始三级分类内容时效性审查,trace_id: {trace_id}")
|
|
|
|
|
-
|
|
|
|
|
- # 提取三级分类详情
|
|
|
|
|
- tertiary_details = review_data.get('tertiary_classification_details', [])
|
|
|
|
|
- max_concurrent = review_data.get('max_concurrent', 4)
|
|
|
|
|
-
|
|
|
|
|
- if not tertiary_details:
|
|
|
|
|
- logger.warning("三级分类详情为空,将跳过内容时效性审查")
|
|
|
|
|
- return {
|
|
|
|
|
- "timeliness_content_review_results": {
|
|
|
|
|
- "review_results": [],
|
|
|
|
|
- "total_items": 0,
|
|
|
|
|
- "issue_items": 0,
|
|
|
|
|
- "execution_time": time.time() - start_time,
|
|
|
|
|
- "error_message": None,
|
|
|
|
|
- "message": "未找到三级分类详情,跳过内容时效性审查"
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- logger.info(f"提取到 {len(tertiary_details)} 个三级分类详情")
|
|
|
|
|
-
|
|
|
|
|
- # 调用内容时效性审查
|
|
|
|
|
- try:
|
|
|
|
|
- # 使用信号量控制并发
|
|
|
|
|
- async with self.semaphore:
|
|
|
|
|
- # 从state中获取progress_manager和callback_task_id
|
|
|
|
|
- progress_manager = state.get('progress_manager') if state else None
|
|
|
|
|
- callback_task_id = state.get('callback_task_id') if state else None
|
|
|
|
|
-
|
|
|
|
|
- # 调用内容时效性审查器(使用新的统一入口)
|
|
|
|
|
- from core.construction_review.component.reviewers.timeliness_reviewer import TimelinessReviewService
|
|
|
|
|
- async with TimelinessReviewService(max_concurrent=max_concurrent, db_pool=self.db_pool) as reviewer:
|
|
|
|
|
- # 从 tertiary_details 提取内容
|
|
|
|
|
- contents = []
|
|
|
|
|
- for detail in tertiary_details:
|
|
|
|
|
- content = detail.get("content", "") if isinstance(detail, dict) else str(detail)
|
|
|
|
|
- if content:
|
|
|
|
|
- contents.append(content)
|
|
|
|
|
- full_content = "\n".join(contents)
|
|
|
|
|
-
|
|
|
|
|
- timeliness_content_results = await reviewer.review_from_content(
|
|
|
|
|
- content=full_content,
|
|
|
|
|
- chapter_code="content",
|
|
|
|
|
- collection_name="first_bfp_collection_status"
|
|
|
|
|
- )
|
|
|
|
|
-
|
|
|
|
|
- logger.info(f"内容时效性审查完成,发现问题数量: {len(timeliness_content_results)}")
|
|
|
|
|
-
|
|
|
|
|
- # 统计审查结果
|
|
|
|
|
- total_items = len(timeliness_content_results)
|
|
|
|
|
- issue_items = sum(1 for item in timeliness_content_results if item.get('exist_issue', False))
|
|
|
|
|
-
|
|
|
|
|
- logger.info(f"审查统计 - 总规范引用: {total_items}, 问题项: {issue_items}")
|
|
|
|
|
-
|
|
|
|
|
- except Exception as e:
|
|
|
|
|
- logger.error(f"内容时效性审查失败: {str(e)}")
|
|
|
|
|
- return {
|
|
|
|
|
- "timeliness_content_review_results": {
|
|
|
|
|
- "review_results": [],
|
|
|
|
|
- "total_items": 0,
|
|
|
|
|
- "issue_items": 0,
|
|
|
|
|
- "execution_time": time.time() - start_time,
|
|
|
|
|
- "error_message": f"内容时效性审查失败: {str(e)}"
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- # 返回完整结果
|
|
|
|
|
- return {
|
|
|
|
|
- "timeliness_content_review_results": {
|
|
|
|
|
- "review_results": timeliness_content_results,
|
|
|
|
|
- "total_items": total_items,
|
|
|
|
|
- "issue_items": issue_items,
|
|
|
|
|
- "execution_time": time.time() - start_time,
|
|
|
|
|
- "error_message": None
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- except Exception as e:
|
|
|
|
|
- execution_time = time.time() - start_time
|
|
|
|
|
- error_msg = f"内容时效性审查失败: {str(e)}"
|
|
|
|
|
- logger.error(error_msg, exc_info=True)
|
|
|
|
|
-
|
|
|
|
|
- return {
|
|
|
|
|
- "timeliness_content_review_results": {
|
|
|
|
|
- "review_results": [],
|
|
|
|
|
- "total_items": 0,
|
|
|
|
|
- "issue_items": 0,
|
|
|
|
|
- "execution_time": execution_time,
|
|
|
|
|
- "error_message": error_msg
|
|
|
|
|
- }
|
|
|
|
|
- }
|
|
|
|
|
|
|
|
|
|
async def timeliness_basis_reviewer(self, review_data: Dict[str, Any], trace_id: str,
|
|
async def timeliness_basis_reviewer(self, review_data: Dict[str, Any], trace_id: str,
|
|
|
state: dict = None, stage_name: str = None) -> Dict[str, Any]:
|
|
state: dict = None, stage_name: str = None) -> Dict[str, Any]:
|