|
|
@@ -42,7 +42,7 @@ class OutlineReviewer:
|
|
|
# self.review_location_label = review_location_label
|
|
|
self.reviewer_type = "outline"
|
|
|
|
|
|
- async def outline_review(self, review_data: Dict[str, Any], trace_id: str, stage_name: str = None, state: dict = None) -> Dict[str, Any]:
|
|
|
+ async def outline_review(self, review_data: Dict[str, Any], trace_id: str, state: dict = None,stage_name: str = None) -> Dict[str, Any]:
|
|
|
"""
|
|
|
执行两阶段大纲审查:1.整体大纲完整性审查 2.详细大纲逐项审查
|
|
|
|
|
|
@@ -79,13 +79,13 @@ class OutlineReviewer:
|
|
|
# 阶段1:整体大纲完整性审查(仅在有数据时执行)
|
|
|
if overall_outline and overall_outline.strip():
|
|
|
logger.info("启动阶段1:整体大纲完整性审查...")
|
|
|
- overall_task = self._overall_completeness_review(overall_outline, trace_id)
|
|
|
+ overall_task = self._overall_completeness_review(overall_outline, trace_id, state, stage_name)
|
|
|
tasks.append(("overall", overall_task))
|
|
|
|
|
|
# 阶段2:详细大纲逐项审查
|
|
|
if detailed_outline:
|
|
|
logger.info("启动阶段2:详细大纲逐项审查...")
|
|
|
- detailed_task = self._detailed_item_review(detailed_outline, trace_id)
|
|
|
+ detailed_task = self._detailed_item_review(detailed_outline, trace_id, state, stage_name)
|
|
|
tasks.append(("detailed", detailed_task))
|
|
|
|
|
|
# 处理空数据情况
|
|
|
@@ -138,9 +138,9 @@ class OutlineReviewer:
|
|
|
# 降级为串行处理
|
|
|
logger.warning("降级为串行执行两阶段审查")
|
|
|
if overall_outline and overall_outline.strip():
|
|
|
- overall_review_result = await self._overall_completeness_review(overall_outline, trace_id)
|
|
|
+ overall_review_result = await self._overall_completeness_review(overall_outline, trace_id, state, stage_name)
|
|
|
if detailed_outline:
|
|
|
- detailed_review_results = await self._detailed_item_review(detailed_outline, trace_id)
|
|
|
+ detailed_review_results = await self._detailed_item_review(detailed_outline, trace_id, state, stage_name)
|
|
|
|
|
|
# 返回完整结果
|
|
|
return {
|
|
|
@@ -164,13 +164,15 @@ class OutlineReviewer:
|
|
|
"stage2_detailed_review": []
|
|
|
}
|
|
|
|
|
|
- async def _overall_completeness_review(self, overall_outline: str, trace_id: str) -> Dict[str, Any]:
|
|
|
+ async def _overall_completeness_review(self, overall_outline: str, trace_id: str, state: dict = None, stage_name: str = None) -> Dict[str, Any]:
|
|
|
"""
|
|
|
阶段1:整体大纲完整性审查 - 检查十大类章节是否有缺失
|
|
|
|
|
|
Args:
|
|
|
overall_outline: 整体大纲内容
|
|
|
trace_id: 追踪ID
|
|
|
+ state: 状态字典
|
|
|
+ stage_name: 阶段名称
|
|
|
|
|
|
Returns:
|
|
|
整体大纲审查结果
|
|
|
@@ -210,9 +212,27 @@ class OutlineReviewer:
|
|
|
|
|
|
response_text = model_response
|
|
|
overall_completeness_result = self._extract_json_from_text(response_text)
|
|
|
+ filtered_issues = [r for r in overall_completeness_result if self._is_non_compliant_item(r)]
|
|
|
+ message=f"整体大纲完整性审查完成,发现 {len(filtered_issues)} 个问题",
|
|
|
if not overall_completeness_result:
|
|
|
- overall_completeness_result.append("整体大纲一级目录检查已通过,未发现缺失项")
|
|
|
-
|
|
|
+ message = "整体大纲一级目录检查已通过,未发现缺失项"
|
|
|
+
|
|
|
+ if state and state.get("progress_manager"):
|
|
|
+ # 使用try-catch确保SSE推送失败不会影响主流程
|
|
|
+ try:
|
|
|
+ await state["progress_manager"].update_stage_progress(
|
|
|
+ callback_task_id=state["callback_task_id"],
|
|
|
+ stage_name=f"{stage_name} - 阶段1:整体大纲审查",
|
|
|
+ current=None, # 明确不更新current,保持主流程进度
|
|
|
+ status="completed",
|
|
|
+ message=message,
|
|
|
+ issues=filtered_issues,
|
|
|
+ event_type="processing" # 使用专门的事件类型
|
|
|
+ )
|
|
|
+ logger.info("SSE推送成功: 整体大纲完整性审查完成")
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"SSE推送失败: 整体大纲完整性审查, 错误: {str(e)}")
|
|
|
+ # 不抛出异常,避免影响主流程
|
|
|
|
|
|
return {
|
|
|
"success": True,
|
|
|
@@ -220,7 +240,7 @@ class OutlineReviewer:
|
|
|
"parsed_result": overall_completeness_result
|
|
|
}
|
|
|
|
|
|
- async def _detailed_item_review(self, detailed_outline: list, trace_id: str) -> list:
|
|
|
+ async def _detailed_item_review(self, detailed_outline: list, trace_id: str,state, stage_name) -> list:
|
|
|
"""
|
|
|
阶段2:详细大纲并发审查 - 对detailed_outline中的所有项目进行并发审查
|
|
|
|
|
|
@@ -249,7 +269,7 @@ class OutlineReviewer:
|
|
|
tasks = []
|
|
|
|
|
|
for i, outline_item in valid_items:
|
|
|
- task = self._concurrent_single_review(i, outline_item, trace_id, semaphore)
|
|
|
+ task = self._concurrent_single_review(i, outline_item, trace_id, semaphore, state, stage_name)
|
|
|
tasks.append(task)
|
|
|
|
|
|
# 等待所有任务完成
|
|
|
@@ -259,7 +279,7 @@ class OutlineReviewer:
|
|
|
except Exception as e:
|
|
|
logger.error(f"并发审查失败: {str(e)}")
|
|
|
# 如果并发失败,降级为串行处理
|
|
|
- return await self._fallback_sequential_review(valid_items, trace_id)
|
|
|
+ return await self._fallback_sequential_review(valid_items, trace_id,state, stage_name)
|
|
|
|
|
|
# 处理结果
|
|
|
detailed_review_results = []
|
|
|
@@ -288,7 +308,7 @@ class OutlineReviewer:
|
|
|
|
|
|
return detailed_review_results
|
|
|
|
|
|
- async def _concurrent_single_review(self, item_index: int, outline_item: str, trace_id: str, semaphore: asyncio.Semaphore) -> Dict[str, Any]:
|
|
|
+ async def _concurrent_single_review(self, item_index: int, outline_item: str, trace_id: str, semaphore: asyncio.Semaphore, state, stage_name) -> Dict[str, Any]:
|
|
|
"""
|
|
|
单个项目的并发审查
|
|
|
|
|
|
@@ -297,6 +317,8 @@ class OutlineReviewer:
|
|
|
outline_item: 大纲项目内容
|
|
|
trace_id: 追踪ID
|
|
|
semaphore: 并发控制信号量
|
|
|
+ state: 状态字典
|
|
|
+ stage_name: 阶段名称
|
|
|
|
|
|
Returns:
|
|
|
单项审查结果
|
|
|
@@ -304,7 +326,7 @@ class OutlineReviewer:
|
|
|
async with semaphore:
|
|
|
try:
|
|
|
logger.info(f"开始审查第{item_index+1}项: {outline_item[:50]}...")
|
|
|
- result = await self._single_item_review(outline_item, trace_id, item_index)
|
|
|
+ result = await self._single_item_review(outline_item, trace_id, item_index, state, stage_name)
|
|
|
logger.info(f"完成审查第{item_index+1}项,成功: {result.get('success', False)}")
|
|
|
return result
|
|
|
except Exception as e:
|
|
|
@@ -316,7 +338,7 @@ class OutlineReviewer:
|
|
|
"parsed_result": None
|
|
|
}
|
|
|
|
|
|
- async def _fallback_sequential_review(self, valid_items: list, trace_id: str) -> list:
|
|
|
+ async def _fallback_sequential_review(self, valid_items: list, trace_id: str, state, stage_name) -> list:
|
|
|
"""
|
|
|
降级串行审查(当并发失败时使用)
|
|
|
|
|
|
@@ -333,7 +355,7 @@ class OutlineReviewer:
|
|
|
for i, outline_item in valid_items:
|
|
|
try:
|
|
|
logger.info(f"串行审查第{i+1}项: {outline_item[:50]}...")
|
|
|
- item_review_result = await self._single_item_review(outline_item, trace_id, i)
|
|
|
+ item_review_result = await self._single_item_review(outline_item, trace_id, i, state, stage_name)
|
|
|
|
|
|
detailed_review_results.append({
|
|
|
"item_index": i,
|
|
|
@@ -355,7 +377,7 @@ class OutlineReviewer:
|
|
|
|
|
|
return detailed_review_results
|
|
|
|
|
|
- async def _single_item_review(self, outline_item: str, trace_id: str, item_index: int) -> Dict[str, Any]:
|
|
|
+ async def _single_item_review(self, outline_item: str, trace_id: str, item_index: int, state: dict = None, stage_name: str = None) -> Dict[str, Any]:
|
|
|
"""
|
|
|
单项大纲审查 - 调用原有逻辑
|
|
|
|
|
|
@@ -363,6 +385,8 @@ class OutlineReviewer:
|
|
|
outline_item: 单个大纲项目
|
|
|
trace_id: 追踪ID
|
|
|
item_index: 项目索引
|
|
|
+ state: 状态字典
|
|
|
+ stage_name: 阶段名称
|
|
|
|
|
|
Returns:
|
|
|
单项审查结果
|
|
|
@@ -404,6 +428,34 @@ class OutlineReviewer:
|
|
|
response_text = model_response
|
|
|
parsed_result = self._extract_json_from_text(response_text)
|
|
|
|
|
|
+ # 发送单项审查完成进度
|
|
|
+ logger.info(f"state参数检查: state存在={state is not None}")
|
|
|
+ if state:
|
|
|
+ logger.info(f"state keys: {list(state.keys())}")
|
|
|
+ logger.info(f"progress_manager存在: {'progress_manager' in state}")
|
|
|
+ if state and state.get("progress_manager"):
|
|
|
+ # 过滤并计算问题数量(与过滤逻辑保持一致)
|
|
|
+ filtered_result = [r for r in (parsed_result or []) if self._is_non_compliant_item(r)] if parsed_result else []
|
|
|
+ message = f"第{item_index+1}项{category}审查完成,发现 {len(filtered_result)} 个问题",
|
|
|
+ if not filtered_result:
|
|
|
+ message = f"第{item_index+1}项{category}审查完成,无问题"
|
|
|
+ # 使用try-catch确保SSE推送失败不会影响主流程
|
|
|
+ try:
|
|
|
+ await state["progress_manager"].update_stage_progress(
|
|
|
+ callback_task_id=state["callback_task_id"],
|
|
|
+ stage_name=f"{stage_name} - 阶段2:详细大纲审查",
|
|
|
+ current=item_index + 1, # 显示当前审查项目索引
|
|
|
+ status="processing",
|
|
|
+ message=message,
|
|
|
+ issues=filtered_result,
|
|
|
+ event_type="processing" # 使用专门的事件类型
|
|
|
+ )
|
|
|
+ logger.info(f"SSE推送成功: 第{item_index+1}项{category}审查完成")
|
|
|
+ logger.info(f"发送单项审查完成进度: 第{item_index+1}项{category}审查完成")
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"SSE推送失败: 第{item_index+1}项{category}, 错误: {str(e)}")
|
|
|
+ # 不抛出异常,避免影响主流程
|
|
|
+
|
|
|
return {
|
|
|
"success": True,
|
|
|
"category": category,
|
|
|
@@ -520,7 +572,7 @@ class OutlineReviewer:
|
|
|
|
|
|
def _extract_json_from_text(self, text: str):
|
|
|
"""
|
|
|
- 从文本中提取JSON内容
|
|
|
+ 从文本中提取JSON内容,复用inter_tool中的格式化方法
|
|
|
|
|
|
Args:
|
|
|
text: 包含JSON的文本
|
|
|
@@ -531,87 +583,70 @@ class OutlineReviewer:
|
|
|
if not text:
|
|
|
return None
|
|
|
|
|
|
- # 方法1: 尝试找到JSON代码块
|
|
|
- json_block_pattern = r'```(?:json)?\s*(\[.*?\])\s*```'
|
|
|
- match = re.search(json_block_pattern, text, re.DOTALL)
|
|
|
- if match:
|
|
|
- json_text = match.group(1)
|
|
|
- try:
|
|
|
- json_data = json.loads(json_text)
|
|
|
+ # 复用 inter_tool.py 中的 _extract_json_data 方法
|
|
|
+ try:
|
|
|
+ from core.construction_review.component.reviewers.utils.inter_tool import InterTool
|
|
|
+ inter_tool = InterTool()
|
|
|
+
|
|
|
+ json_data = inter_tool._extract_json_data(text)
|
|
|
+ if json_data:
|
|
|
+ # 确保返回的是列表格式,保持与原来方法的兼容性
|
|
|
if isinstance(json_data, list):
|
|
|
# 过滤掉"✔ 符合要求"的结果
|
|
|
filtered_data = self._filter_compliant_items(json_data)
|
|
|
return filtered_data
|
|
|
- except json.JSONDecodeError:
|
|
|
- pass
|
|
|
-
|
|
|
- # 方法2: 尝试找到第一个 [ 到最后一个 ] 之间的内容
|
|
|
- bracket_positions = []
|
|
|
- for i, char in enumerate(text):
|
|
|
- if char == '[':
|
|
|
- bracket_positions.append(('start', i))
|
|
|
- elif char == ']':
|
|
|
- bracket_positions.append(('end', i))
|
|
|
-
|
|
|
- if bracket_positions:
|
|
|
- # 找到最后一个 ] 的位置
|
|
|
- last_end = None
|
|
|
- for pos_type, pos in reversed(bracket_positions):
|
|
|
- if pos_type == 'end':
|
|
|
- last_end = pos
|
|
|
- break
|
|
|
-
|
|
|
- if last_end:
|
|
|
- # 从最后一个 ] 往前找匹配的 [
|
|
|
- depth = 0
|
|
|
- start_pos = None
|
|
|
- for pos_type, pos in reversed(bracket_positions):
|
|
|
- if pos > last_end:
|
|
|
- continue
|
|
|
- if pos_type == 'end':
|
|
|
- depth += 1
|
|
|
- elif pos_type == 'start':
|
|
|
- depth -= 1
|
|
|
- if depth == 0:
|
|
|
- start_pos = pos
|
|
|
- break
|
|
|
-
|
|
|
- if start_pos is not None:
|
|
|
- json_text = text[start_pos:last_end + 1]
|
|
|
- try:
|
|
|
- json_data = json.loads(json_text)
|
|
|
- if isinstance(json_data, list):
|
|
|
- # 过滤掉"✔ 符合要求"的结果
|
|
|
- filtered_data = self._filter_compliant_items(json_data)
|
|
|
- return filtered_data
|
|
|
- except json.JSONDecodeError:
|
|
|
- pass
|
|
|
-
|
|
|
- # 方法3: 尝试直接匹配简单的JSON数组
|
|
|
- json_array_pattern = r'(\[[\s\S]*?\])'
|
|
|
- matches = re.finditer(json_array_pattern, text)
|
|
|
- for match in matches:
|
|
|
- json_text = match.group(1)
|
|
|
- try:
|
|
|
- json_data = json.loads(json_text)
|
|
|
- if isinstance(json_data, list):
|
|
|
- # 过滤掉"✔ 符合要求"的结果
|
|
|
- filtered_data = self._filter_compliant_items(json_data)
|
|
|
+ elif isinstance(json_data, dict):
|
|
|
+ # 如果是字典,转换为列表
|
|
|
+ filtered_data = self._filter_compliant_items([json_data])
|
|
|
return filtered_data
|
|
|
- except json.JSONDecodeError:
|
|
|
- continue
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.warning(f"使用inter_tool提取JSON失败,回退到原有方法: {str(e)}")
|
|
|
+
|
|
|
+ # 回退到原有的简单提取方法
|
|
|
+ try:
|
|
|
+ # 方法1: 尝试找到JSON代码块
|
|
|
+ json_block_pattern = r'```(?:json)?\s*(\[.*?\])\s*```'
|
|
|
+ match = re.search(json_block_pattern, text, re.DOTALL)
|
|
|
+ if match:
|
|
|
+ json_text = match.group(1)
|
|
|
+ try:
|
|
|
+ json_data = json.loads(json_text)
|
|
|
+ if isinstance(json_data, list):
|
|
|
+ # 过滤掉"✔ 符合要求"的结果
|
|
|
+ filtered_data = self._filter_compliant_items(json_data)
|
|
|
+ return filtered_data
|
|
|
+ except json.JSONDecodeError:
|
|
|
+ pass
|
|
|
+
|
|
|
+ # 方法2: 尝试直接匹配简单的JSON数组
|
|
|
+ json_array_pattern = r'(\[[\s\S]*?\])'
|
|
|
+ matches = re.finditer(json_array_pattern, text)
|
|
|
+ for match in matches:
|
|
|
+ json_text = match.group(1)
|
|
|
+ try:
|
|
|
+ json_data = json.loads(json_text)
|
|
|
+ if isinstance(json_data, list):
|
|
|
+ # 过滤掉"✔ 符合要求"的结果
|
|
|
+ filtered_data = self._filter_compliant_items(json_data)
|
|
|
+ return filtered_data
|
|
|
+ except json.JSONDecodeError:
|
|
|
+ continue
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"JSON提取完全失败: {str(e)}")
|
|
|
|
|
|
return None
|
|
|
|
|
|
def _filter_compliant_items(self, json_data: list) -> list:
|
|
|
"""
|
|
|
- 过滤掉"✔ 符合要求"的结果,只保留有问题的问题
|
|
|
+ 从issue_point字段判断,过滤掉符合要求的结果,只保留有问题的项目
|
|
|
|
|
|
Args:
|
|
|
json_data: 原始JSON数据列表
|
|
|
|
|
|
Returns:
|
|
|
- 过滤后的JSON数据列表
|
|
|
+ 过滤后的JSON数据列表,只包含"✘ 不符合要求"、"⚠ 部分符合"、"⚠ 部分缺失"等有问题的项目
|
|
|
"""
|
|
|
if not isinstance(json_data, list):
|
|
|
return json_data
|
|
|
@@ -619,23 +654,35 @@ class OutlineReviewer:
|
|
|
filtered_items = []
|
|
|
for item in json_data:
|
|
|
if isinstance(item, dict):
|
|
|
- # 检查是否符合要求的字段
|
|
|
- status_field = item.get('是否符合要求') or item.get('完整性状态') or item.get('状态')
|
|
|
-
|
|
|
- # 如果状态字段表示完全符合要求,则过滤掉
|
|
|
- if status_field:
|
|
|
- status_str = str(status_field).strip()
|
|
|
- # 精确匹配完全符合要求的情况,避免误过滤部分符合的项目
|
|
|
- if (status_str == '✔ 符合要求' or
|
|
|
- status_str == '完整包含' or
|
|
|
- status_str.startswith('[完整]') or
|
|
|
- status_str == '✔ 完整包含' or
|
|
|
- (status_str.startswith('✔') and '符合要求' in status_str and '部分' not in status_str)):
|
|
|
- logger.info(f"过滤符合要求的项目: {item.get('项目', item.get('章节类别', 'N/A'))}")
|
|
|
- continue # 跳过符合要求的项目
|
|
|
-
|
|
|
- # 保留有问题或不符合要求的项目
|
|
|
- filtered_items.append(item)
|
|
|
+ # 检查issue_point字段是否包含"✘ 不符合要求"
|
|
|
+ issue_point = item.get('issue_point', '')
|
|
|
+
|
|
|
+ if issue_point:
|
|
|
+ issue_point_str = str(issue_point).strip()
|
|
|
+ # 保留包含"✘ 不符合要求"或"⚠ 部分符合"的项目
|
|
|
+ if ('✘ 不符合要求' in issue_point_str or
|
|
|
+ '⚠ 部分符合' in issue_point_str or
|
|
|
+ '⚠ 部分缺失' in issue_point_str):
|
|
|
+ filtered_items.append(item)
|
|
|
+ logger.info(f"保留有问题项目: {issue_point_str}")
|
|
|
+ else:
|
|
|
+ logger.info(f"过滤符合要求的项目: {issue_point_str}")
|
|
|
+ else:
|
|
|
+ # 如果没有issue_point字段,检查旧格式的状态字段
|
|
|
+ status_field = item.get('是否符合要求') or item.get('完整性状态') or item.get('状态')
|
|
|
+ if status_field:
|
|
|
+ status_str = str(status_field).strip()
|
|
|
+ # 对于旧格式,保留包含"不符合要求"或"缺失"的项目
|
|
|
+ if ('不符合要求' in status_str or '缺失' in status_str or
|
|
|
+ status_str.startswith('✘') or '完全缺失' in status_str):
|
|
|
+ filtered_items.append(item)
|
|
|
+ logger.info(f"保留旧格式不符合要求的项目: {item.get('项目', 'N/A')} - {status_str}")
|
|
|
+ else:
|
|
|
+ logger.info(f"过滤旧格式符合要求的项目: {item.get('项目', 'N/A')} - {status_str}")
|
|
|
+ else:
|
|
|
+ # 既没有issue_point也没有状态字段,直接保留
|
|
|
+ filtered_items.append(item)
|
|
|
+ logger.warning(f"项目缺少状态标识字段,直接保留: {item}")
|
|
|
else:
|
|
|
# 非字典类型的项目直接保留
|
|
|
filtered_items.append(item)
|
|
|
@@ -643,4 +690,37 @@ class OutlineReviewer:
|
|
|
logger.info(f"过滤结果: 原始项目数 {len(json_data)}, 过滤后项目数 {len(filtered_items)}")
|
|
|
return filtered_items
|
|
|
|
|
|
+ def _is_non_compliant_item(self, item: dict) -> bool:
|
|
|
+ """
|
|
|
+ 判断项目是否有问题(与_filter_compliant_items逻辑保持一致)
|
|
|
+
|
|
|
+ Args:
|
|
|
+ item: 要判断的项目字典
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ bool: True表示有问题需要保留,False表示符合要求需要过滤
|
|
|
+ """
|
|
|
+ if not isinstance(item, dict):
|
|
|
+ return False
|
|
|
+
|
|
|
+ # 检查issue_point字段是否包含"✘ 不符合要求"
|
|
|
+ issue_point = item.get('issue_point', '')
|
|
|
+
|
|
|
+ if issue_point:
|
|
|
+ issue_point_str = str(issue_point).strip()
|
|
|
+ # 保留包含"✘ 不符合要求"或"⚠ 部分符合"的项目
|
|
|
+ return ('✘ 不符合要求' in issue_point_str or
|
|
|
+ '⚠ 部分符合' in issue_point_str or
|
|
|
+ '⚠ 部分缺失' in issue_point_str)
|
|
|
+ else:
|
|
|
+ # 如果没有issue_point字段,检查旧格式的状态字段
|
|
|
+ status_field = item.get('是否符合要求') or item.get('完整性状态') or item.get('状态')
|
|
|
+ if status_field:
|
|
|
+ status_str = str(status_field).strip()
|
|
|
+ # 对于旧格式,保留包含"不符合要求"或"缺失"的项目
|
|
|
+ return ('不符合要求' in status_str or '缺失' in status_str or
|
|
|
+ status_str.startswith('✘') or '完全缺失' in status_str)
|
|
|
+ else:
|
|
|
+ # 既没有issue_point也没有状态字段,默认为不符合要求(保守处理)
|
|
|
+ return True
|
|
|
|