|
|
@@ -40,9 +40,8 @@
|
|
|
|
|
|
import asyncio
|
|
|
import json
|
|
|
-import time
|
|
|
import random
|
|
|
-import uuid
|
|
|
+import re
|
|
|
from dataclasses import dataclass, asdict
|
|
|
from typing import Optional, Callable, Dict, Any, TypedDict, Annotated, List
|
|
|
from langgraph.graph import StateGraph, END
|
|
|
@@ -86,6 +85,7 @@ class AIReviewState(TypedDict):
|
|
|
messages: Annotated[List[BaseMessage], add_messages]
|
|
|
|
|
|
|
|
|
+
|
|
|
class AIReviewWorkflow:
|
|
|
"""基于LangGraph的AI审查工作流"""
|
|
|
|
|
|
@@ -110,58 +110,27 @@ class AIReviewWorkflow:
|
|
|
self.structured_content = structured_content
|
|
|
self.progress_manager = progress_manager
|
|
|
self.ai_review_engine = AIReviewEngine()
|
|
|
+
|
|
|
+ # 初始化核心功能和工具类
|
|
|
+ self.core_fun = AIReviewCoreFun(self.ai_review_engine, max_review_units, review_mode)
|
|
|
+ self.inter_tool = InterTool()
|
|
|
+
|
|
|
self.max_review_units = max_review_units
|
|
|
self.review_mode = review_mode
|
|
|
self.graph = self._build_workflow()
|
|
|
|
|
|
- def _filter_review_units(self, chunks: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
|
- """
|
|
|
- 根据配置筛选要审查的单元
|
|
|
|
|
|
- Args:
|
|
|
- chunks: 所有审查单元
|
|
|
+ def _build_workflow(self) -> StateGraph:
|
|
|
+ """
|
|
|
+ 构建AI审查的LangGraph工作流图
|
|
|
|
|
|
Returns:
|
|
|
- List[Dict[str, Any]]: 筛选后的审查单元
|
|
|
+ StateGraph: 配置完成的LangGraph工作流图实例
|
|
|
|
|
|
Note:
|
|
|
- 使用预设的起始索引进行切片,确保不会跳过前面的重要内容
|
|
|
+ 创建包含开始、初始化进度、AI审查、完成和错误处理节点的完整工作流
|
|
|
+ 设置节点间的转换关系和条件边,支持错误处理流程
|
|
|
"""
|
|
|
- if self.max_review_units is None or self.review_mode == "all":
|
|
|
- return chunks
|
|
|
-
|
|
|
- # 验证原始chunks不为空
|
|
|
- if not chunks:
|
|
|
- logger.warning("没有可用的审查单元")
|
|
|
- return []
|
|
|
-
|
|
|
- # 安全的切片操作,考虑边界情况
|
|
|
- # 使用预设的起始索引进行切片,避免跳过前面的内容
|
|
|
- start_index = min(DEFAULT_SLICE_START_INDEX, len(chunks) - 1) # 确保start_index不超过数组边界
|
|
|
- chunks = chunks[start_index:]
|
|
|
-
|
|
|
- # 再次验证切片后的结果
|
|
|
- if not chunks:
|
|
|
- logger.warning(f"从索引{start_index}切片后没有可用的审查单元")
|
|
|
- return []
|
|
|
-
|
|
|
- total_chunks = len(chunks)
|
|
|
- actual_review_count = min(self.max_review_units, total_chunks)
|
|
|
-
|
|
|
- logger.info(f"审查模式: {self.review_mode}, 总单元数: {total_chunks}, 实际审查数: {actual_review_count}")
|
|
|
-
|
|
|
- if self.review_mode == "first":
|
|
|
- # 取前N个
|
|
|
- return chunks[:actual_review_count]
|
|
|
- elif self.review_mode == "random":
|
|
|
- # 随机取N个
|
|
|
- return random.sample(chunks, actual_review_count)
|
|
|
- else:
|
|
|
- # 默认取前N个
|
|
|
- return chunks[:actual_review_count]
|
|
|
-
|
|
|
- def _build_workflow(self) -> StateGraph:
|
|
|
- """构建AI审查的LangGraph工作流图"""
|
|
|
workflow = StateGraph(AIReviewState)
|
|
|
workflow.add_node("start", self._start_node)
|
|
|
workflow.add_node("initialize_progress", self._initialize_progress_node)
|
|
|
@@ -178,7 +147,7 @@ class AIReviewWorkflow:
|
|
|
# 添加条件边(错误处理)
|
|
|
workflow.add_conditional_edges(
|
|
|
"ai_review",
|
|
|
- self._check_ai_review_result,
|
|
|
+ self.inter_tool._check_ai_review_result,
|
|
|
{
|
|
|
"success": "complete",
|
|
|
"error": "error_handler"
|
|
|
@@ -286,16 +255,16 @@ class AIReviewWorkflow:
|
|
|
state["current_stage"] = "ai_review"
|
|
|
|
|
|
# 1. 准备审查单元数据
|
|
|
- review_chunks, total_units, total_all_units = await self._prepare_review_units(state)
|
|
|
+ review_chunks, total_units, total_all_units = await self.core_fun._prepare_review_units(state)
|
|
|
|
|
|
# 2. 发送开始审查进度
|
|
|
- await self._send_start_review_progress(state, total_units)
|
|
|
+ await self.core_fun._send_start_review_progress(state, total_units)
|
|
|
|
|
|
# 3. 执行并发审查
|
|
|
- successful_results = await self._execute_concurrent_reviews(review_chunks, total_units, state)
|
|
|
+ successful_results = await self.core_fun._execute_concurrent_reviews(review_chunks, total_units, state)
|
|
|
|
|
|
# 4. 汇总结果
|
|
|
- summary = self._aggregate_results(successful_results)
|
|
|
+ summary = self.inter_tool._aggregate_results(successful_results)
|
|
|
|
|
|
review_results = {
|
|
|
'total_all_units': total_all_units, # 原始总单元数
|
|
|
@@ -322,7 +291,19 @@ class AIReviewWorkflow:
|
|
|
return state
|
|
|
|
|
|
async def _complete_node(self, state: AIReviewState) -> AIReviewState:
|
|
|
- """完成节点"""
|
|
|
+ """
|
|
|
+ 完成节点 - 工作流结束处理
|
|
|
+
|
|
|
+ Args:
|
|
|
+ state: AI审查工作流状态
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ AIReviewState: 更新后的工作流状态,标记为已完成
|
|
|
+
|
|
|
+ Note:
|
|
|
+ 设置最终状态为completed,更新进度管理器的完成状态
|
|
|
+ 发送工作流完成的消息日志
|
|
|
+ """
|
|
|
logger.info(f"AI审查完成: {state['file_id']}")
|
|
|
|
|
|
state["current_stage"] = "complete"
|
|
|
@@ -345,7 +326,19 @@ class AIReviewWorkflow:
|
|
|
return state
|
|
|
|
|
|
async def _error_handler_node(self, state: AIReviewState) -> AIReviewState:
|
|
|
- """错误处理节点"""
|
|
|
+ """
|
|
|
+ 错误处理节点 - 处理工作流中的异常情况
|
|
|
+
|
|
|
+ Args:
|
|
|
+ state: AI审查工作流状态,包含错误信息
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ AIReviewState: 更新后的工作流状态,标记为失败
|
|
|
+
|
|
|
+ Note:
|
|
|
+ 设置状态为failed,通过进度管理器发送错误通知
|
|
|
+ 记录详细的错误信息到日志中
|
|
|
+ """
|
|
|
logger.error(f"AI审查错误处理: {state['file_id']}, 错误: {state['error_message']}")
|
|
|
|
|
|
state["status"] = "failed"
|
|
|
@@ -381,6 +374,22 @@ class AIReviewWorkflow:
|
|
|
return {}
|
|
|
|
|
|
|
|
|
+class AIReviewCoreFun:
|
|
|
+ """AI审查核心功能类 - 负责具体的审查逻辑和数据处理"""
|
|
|
+
|
|
|
+ def __init__(self, ai_review_engine, max_review_units: int = None, review_mode: str = "all"):
|
|
|
+ """
|
|
|
+ 初始化AI审查核心功能类
|
|
|
+
|
|
|
+ Args:
|
|
|
+ ai_review_engine: AI审查引擎实例
|
|
|
+ max_review_units: 最大审查单元数量(None表示审查所有)
|
|
|
+ review_mode: 审查模式 ("all"=全部, "first"=前N个, "random"=随机N个)
|
|
|
+ """
|
|
|
+ self.ai_review_engine = ai_review_engine
|
|
|
+ self.max_review_units = max_review_units
|
|
|
+ self.review_mode = review_mode
|
|
|
+
|
|
|
|
|
|
async def _execute_concurrent_reviews(self, review_chunks: List[Dict[str, Any]],
|
|
|
total_units: int, state: AIReviewState) -> List[ReviewResult]:
|
|
|
@@ -396,39 +405,51 @@ class AIReviewWorkflow:
|
|
|
List[ReviewResult]: 审查结果列表
|
|
|
"""
|
|
|
try:
|
|
|
- # 实现异步并发
|
|
|
- review_tasks = [
|
|
|
- asyncio.create_task(self._review_single_unit(content, i, total_units, state))
|
|
|
- for i, content in enumerate(review_chunks)
|
|
|
+ # 简化方案:并发执行,每个单元完成时立即推送消息
|
|
|
+ semaphore = asyncio.Semaphore(3) # 允许3个并发审查
|
|
|
+
|
|
|
+ async def process_unit_and_notify(unit_index, unit_content):
|
|
|
+ """处理单个单元,完成后立即推送通知"""
|
|
|
+ async with semaphore:
|
|
|
+ # 执行单个单元审查
|
|
|
+ result = await self._review_single_unit(unit_content, unit_index, total_units, state)
|
|
|
+
|
|
|
+ # 审查完成后立即推送通知
|
|
|
+ if result.overall_risk != "error":
|
|
|
+ section_label = unit_content.get('section_label', f'第{unit_index + 1}部分')
|
|
|
+ logger.info(f"section_label: {section_label}")
|
|
|
+ # 格式化issues以获取问题数量
|
|
|
+ issues = self._format_review_results_to_issues(
|
|
|
+ state["callback_task_id"],
|
|
|
+ unit_index,
|
|
|
+ f"第{unit_content.get('page', '')}页:{section_label}",
|
|
|
+ unit_content,
|
|
|
+ result.basic_compliance,
|
|
|
+ result.technical_compliance
|
|
|
+ )
|
|
|
+
|
|
|
+ # # 正确统计:只统计真正存在的问题数量
|
|
|
+ # issues_count = sum(
|
|
|
+ # 1 for issue in issues
|
|
|
+ # for issue_data in issue.values()
|
|
|
+ # for review_item in issue_data.get("review_lists", [])
|
|
|
+ # if review_item.get("exist_issue", False)
|
|
|
+ # )
|
|
|
+ current = int(((unit_index + 1) / total_units) * 100)
|
|
|
+
|
|
|
+ # 立即发送单元审查详情(包含unit_review和processing_flag事件)
|
|
|
+ await self._send_unit_review_progress(state, unit_index, total_units, section_label, issues, current)
|
|
|
+
|
|
|
+ return result
|
|
|
+
|
|
|
+ # 创建并发任务
|
|
|
+ tasks = [
|
|
|
+ asyncio.create_task(process_unit_and_notify(i, unit_content))
|
|
|
+ for i, unit_content in enumerate(review_chunks)
|
|
|
]
|
|
|
|
|
|
- # 等待所有审查完成
|
|
|
- all_results = await asyncio.gather(*review_tasks)
|
|
|
-
|
|
|
- # 处理进度更新和问题详情推送
|
|
|
- for i, result in enumerate(all_results):
|
|
|
- if result.overall_risk != "error":
|
|
|
- unit_content = review_chunks[i]
|
|
|
- section_label = unit_content.get('section_label', f'第{i + 1}部分')
|
|
|
-
|
|
|
- # 格式化issues以获取问题数量
|
|
|
- issues = self._format_review_results_to_issues(
|
|
|
- state["callback_task_id"],
|
|
|
- i,
|
|
|
- f"第{unit_content.get('page', '')}页:{section_label}",
|
|
|
- unit_content,
|
|
|
- result.basic_compliance,
|
|
|
- result.technical_compliance
|
|
|
- )
|
|
|
-
|
|
|
- issues_count = sum(len(issue.get("review_lists", [])) for issue in issues)
|
|
|
- current = int(((i + 1) / total_units) * 100)
|
|
|
-
|
|
|
- # 发送进度更新
|
|
|
- #await self._send_unit_complete_progress(state, i, total_units, section_label, issues_count)
|
|
|
-
|
|
|
- # 发送问题详情
|
|
|
- #await self._send_unit_review_details(state, i, total_units, section_label, issues, current)
|
|
|
+ # 等待所有任务完成
|
|
|
+ all_results = await asyncio.gather(*tasks)
|
|
|
|
|
|
# 过滤成功结果
|
|
|
successful_results = [result for result in all_results if result.overall_risk != "error"]
|
|
|
@@ -477,6 +498,7 @@ class AIReviewWorkflow:
|
|
|
ReviewResult: 单元审查结果
|
|
|
"""
|
|
|
try:
|
|
|
+
|
|
|
# 构建Trace ID
|
|
|
trace_id_idx = f"({state['callback_task_id']}-{unit_index})"
|
|
|
|
|
|
@@ -487,15 +509,15 @@ class AIReviewWorkflow:
|
|
|
|
|
|
# 设置review_location_label到AIReviewEngine
|
|
|
self.ai_review_engine.set_review_location_label(review_location_label)
|
|
|
-
|
|
|
+ logger.info(f"review_location_label: {review_location_label}")
|
|
|
stage_name = f"AI审查:{section_label}"
|
|
|
|
|
|
# 方法内部进度计算(基于当前处理的单元)
|
|
|
current_progress = int((unit_index / total_units) * 100)
|
|
|
|
|
|
# 构建进度消息
|
|
|
- message = f"正在处理第 {unit_index + 1}/{total_units} 个单元: {section_label}"
|
|
|
- logger.info(f"开始处理单元: {unit_index + 1}/{total_units} - {section_label}")
|
|
|
+ #message = f"正在处理第 {unit_index + 1}/{total_units} 个单元: {section_label}"
|
|
|
+ #logger.info(f"开始处理单元: {unit_index + 1}/{total_units} - {section_label}")
|
|
|
|
|
|
# 并发执行各种原子化审查方法,添加超时控制
|
|
|
review_tasks = [
|
|
|
@@ -511,8 +533,6 @@ class AIReviewWorkflow:
|
|
|
|
|
|
# 等待所有审查完成
|
|
|
review_results = await asyncio.gather(*review_tasks, return_exceptions=True)
|
|
|
- await self._send_unit_review_details(state, i, total_units, section_label, issues, current)
|
|
|
-
|
|
|
# 处理异常结果
|
|
|
basic_result = review_results[0] if not isinstance(review_results[0], Exception) else {"error": str(review_results[0])}
|
|
|
technical_result = review_results[1] if len(review_results) > 1 and not isinstance(review_results[1], Exception) else {"error": str(review_results[1]) if len(review_results) > 1 else "No result"}
|
|
|
@@ -521,17 +541,11 @@ class AIReviewWorkflow:
|
|
|
rag_result = {"error": "RAG check disabled"}
|
|
|
|
|
|
# 计算总体风险等级
|
|
|
- overall_risk = self._calculate_overall_risk(basic_result, technical_result, rag_result)
|
|
|
-
|
|
|
- # 格式化审查结果
|
|
|
- issues = self._format_review_results_to_issues(
|
|
|
- state["callback_task_id"],
|
|
|
- unit_index,
|
|
|
- review_location_label,
|
|
|
- unit_content,
|
|
|
- basic_result,
|
|
|
- technical_result
|
|
|
- )
|
|
|
+ inter_tool = InterTool()
|
|
|
+ overall_risk = inter_tool._calculate_overall_risk(basic_result, technical_result, rag_result)
|
|
|
+
|
|
|
+ # 格式化审查结果(注意:实际进度推送现在在 _execute_concurrent_reviews 中按顺序处理)
|
|
|
+ # 这里保存格式化结果到ReviewResult中,供后续推送使用
|
|
|
|
|
|
return ReviewResult(
|
|
|
unit_index=unit_index,
|
|
|
@@ -584,7 +598,7 @@ class AIReviewWorkflow:
|
|
|
except Exception as e:
|
|
|
logger.warning(f"发送开始进度更新失败: {str(e)}")
|
|
|
|
|
|
- async def _send_unit_review_details(self, state: AIReviewState, unit_index: int,
|
|
|
+ async def _send_unit_review_progress(self, state: AIReviewState, unit_index: int,
|
|
|
total_units: int, section_label: str,
|
|
|
issues: List[Dict], current: int) -> None:
|
|
|
"""
|
|
|
@@ -601,27 +615,27 @@ class AIReviewWorkflow:
|
|
|
try:
|
|
|
if isinstance(issues, list) and issues and state["progress_manager"]:
|
|
|
stage_name = f"AI审查:{section_label}"
|
|
|
+ # 正确统计:只统计真正存在的问题数量
|
|
|
issues_count = sum(
|
|
|
- len(issue.get(issue_id, {}).get("review_lists", []))
|
|
|
- for issue in issues
|
|
|
- for issue_id in issue.keys()
|
|
|
- )
|
|
|
- await self._send_unit_complete_progress(state, unit_index, total_units, section_label, issues_count)
|
|
|
-
|
|
|
- asyncio.create_task(
|
|
|
- state["progress_manager"].update_stage_progress(
|
|
|
- callback_task_id=state["callback_task_id"],
|
|
|
- stage_name=stage_name,
|
|
|
- current=current,
|
|
|
- status="unit_review_update",
|
|
|
- message=f"发现{issues_count}个问题: {section_label}",
|
|
|
- issues=issues,
|
|
|
- user_id=state.get("user_id", ""),
|
|
|
- overall_task_status="processing",
|
|
|
- event_type="unit_review"
|
|
|
- )
|
|
|
+ 1 for issue in issues
|
|
|
+ for issue_data in issue.values()
|
|
|
+ for review_item in issue_data.get("review_lists", [])
|
|
|
+ if review_item.get("exist_issue", False)
|
|
|
)
|
|
|
+ await self._send_unit_overall_progress(state, unit_index, total_units, section_label, issues_count)
|
|
|
|
|
|
+ # 同步等待进度消息推送完成,确保与主流程同步
|
|
|
+ await state["progress_manager"].update_stage_progress(
|
|
|
+ callback_task_id=state["callback_task_id"],
|
|
|
+ stage_name=stage_name,
|
|
|
+ current=current,
|
|
|
+ status="unit_review_update",
|
|
|
+ message=f"发现{issues_count}个问题: {section_label}",
|
|
|
+ issues=issues,
|
|
|
+ user_id=state.get("user_id", ""),
|
|
|
+ overall_task_status="processing",
|
|
|
+ event_type="unit_review"
|
|
|
+ )
|
|
|
|
|
|
# 清空当前issues
|
|
|
await state["progress_manager"].update_stage_progress(
|
|
|
@@ -631,7 +645,7 @@ class AIReviewWorkflow:
|
|
|
except Exception as e:
|
|
|
logger.warning(f"发送单元审查详情失败: {str(e)}")
|
|
|
|
|
|
- async def _send_unit_complete_progress(self, state: AIReviewState, unit_index: int,
|
|
|
+ async def _send_unit_overall_progress(self, state: AIReviewState, unit_index: int,
|
|
|
total_units: int, section_label: str,
|
|
|
issues_count: int) -> None:
|
|
|
"""
|
|
|
@@ -672,21 +686,29 @@ class AIReviewWorkflow:
|
|
|
logger.warning(f"发送单元完成进度更新失败: {str(e)}")
|
|
|
|
|
|
def _format_review_results_to_issues(self,callback_task_id: str, unit_index: int, review_location_label: str,
|
|
|
- unit_content: Dict[str, Any], basic_result: Dict[str, Any],
|
|
|
- technical_result: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
|
+ unit_content: Dict[str, Any], basic_result: Dict[str, Any],
|
|
|
+ technical_result: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
|
"""
|
|
|
将审查结果格式化为issues结构
|
|
|
|
|
|
Args:
|
|
|
- callback_task_id: 回调任务ID
|
|
|
- unit_index: 单元索引
|
|
|
- review_location_label: 审查位置标签
|
|
|
- unit_content: 单元内容
|
|
|
- basic_result: 基础合规性审查结果
|
|
|
- technical_result: 技术性审查结果
|
|
|
+ callback_task_id: 回调任务ID,用于生成唯一issue_id
|
|
|
+ unit_index: 单元索引,用于生成唯一issue_id
|
|
|
+ review_location_label: 审查位置标签,如"第3页:第一章"
|
|
|
+ unit_content: 单元内容,包含原始文本等信息
|
|
|
+ basic_result: 基础合规性审查结果,包含各项检查结果
|
|
|
+ technical_result: 技术性审查结果,包含技术标准检查结果
|
|
|
|
|
|
Returns:
|
|
|
- List[Dict]: 格式化后的issues列表,包含issue_id、metadata、risk_summary、review_lists等字段
|
|
|
+ List[Dict]: 格式化后的issues列表,每个issue包含:
|
|
|
+ - issue_id: 唯一标识符,格式为"{callback_task_id}-{risk_level}-{unit_index}"
|
|
|
+ - metadata: 元数据,包含审查位置和原始内容
|
|
|
+ - risk_summary: 风险摘要,包含最高风险等级和问题数量统计
|
|
|
+ - review_lists: 详细审查问题列表
|
|
|
+
|
|
|
+ Note:
|
|
|
+ 自动跳过overall_score字段,提取所有检查项的详细结果
|
|
|
+ 支持风险等级统计和最高风险等级确定
|
|
|
"""
|
|
|
issues = []
|
|
|
review_lists = []
|
|
|
@@ -709,8 +731,6 @@ class AIReviewWorkflow:
|
|
|
logger.info(f"跳过分数字段: {check_key}")
|
|
|
continue
|
|
|
|
|
|
- #logger.info(f"检查项 {check_key} 的结果: {check_result}")
|
|
|
-
|
|
|
if check_result and "details" in check_result and "response" in check_result["details"]:
|
|
|
response = check_result["details"]["response"]
|
|
|
check_name = check_result["details"].get("name", check_key)
|
|
|
@@ -735,16 +755,16 @@ class AIReviewWorkflow:
|
|
|
|
|
|
# 如果有审查结果,创建issue
|
|
|
if review_lists:
|
|
|
- issue_id = f"{callback_task_id}-{max_risk_level or '0'}-{unit_index}"
|
|
|
+ issue_id = f"{callback_task_id}-{max_risk_level if max_risk_level else '0'}-{unit_index}"
|
|
|
issue = {
|
|
|
- issue_id: {
|
|
|
+ issue_id: {
|
|
|
"metadata": {
|
|
|
"review_location_label": review_location_label,
|
|
|
"original_content": unit_content.get('content', '')
|
|
|
},
|
|
|
"risk_summary": {
|
|
|
- "max_risk_level": max_risk_level or '0',
|
|
|
- "risk_count": risk_count or 0
|
|
|
+ "max_risk_level": max_risk_level if max_risk_level else '0',
|
|
|
+ "risk_count": risk_count if risk_count else 0
|
|
|
},
|
|
|
"review_lists": review_lists
|
|
|
}
|
|
|
@@ -755,7 +775,7 @@ class AIReviewWorkflow:
|
|
|
|
|
|
def _parse_ai_review_response(self,response: str, check_name: str) -> List[Dict[str, Any]]:
|
|
|
"""
|
|
|
- 解析AI审查的Markdown格式响应
|
|
|
+ 解析AI审查的JSON格式响应
|
|
|
|
|
|
Args:
|
|
|
response: AI审查响应内容
|
|
|
@@ -766,68 +786,192 @@ class AIReviewWorkflow:
|
|
|
|
|
|
Note:
|
|
|
支持识别"无明显问题"等关键词,自动设置风险等级
|
|
|
+ 解析新的英文键名JSON格式: issue_point, location, suggestion, reason, risk_level
|
|
|
"""
|
|
|
review_lists = []
|
|
|
|
|
|
try:
|
|
|
if "无明显问题" in response or "无问题" in response or "符合要求" in response:
|
|
|
review_lists.append({
|
|
|
- "check_item": check_name,
|
|
|
+ "check_item": check_name,
|
|
|
"check_result": "无明显问题",
|
|
|
"exist_issue": False,
|
|
|
"risk_info": {"risk_level": "low"}
|
|
|
})
|
|
|
return review_lists
|
|
|
|
|
|
- lines = response.split('\n')
|
|
|
-
|
|
|
- # 初始化默认风险等级
|
|
|
- risk_level = "medium" # 默认中等风险
|
|
|
-
|
|
|
- # 先扫描一遍提取风险等级
|
|
|
- for line in lines:
|
|
|
- line = line.strip()
|
|
|
- if line.startswith('**风险等级**:'):
|
|
|
- extracted_risk = line[7:].strip().lower()
|
|
|
- if "高" in extracted_risk:
|
|
|
- risk_level = "high"
|
|
|
- elif "中" in extracted_risk:
|
|
|
- risk_level = "medium"
|
|
|
- else:
|
|
|
- risk_level = "low"
|
|
|
- break
|
|
|
-
|
|
|
- # 创建简化的结果对象,直接存储完整响应
|
|
|
- review_lists.append({
|
|
|
- "check_item": check_name,
|
|
|
- "check_result": response, # 直接存储完整的AI响应
|
|
|
- "exist_issue": True,
|
|
|
- "risk_info": {"risk_level": risk_level}
|
|
|
- })
|
|
|
+ # 尝试解析JSON格式响应
|
|
|
+
|
|
|
+ # 首先检查响应是否直接被{}包裹
|
|
|
+ response_stripped = response.strip()
|
|
|
+ json_data = None
|
|
|
+
|
|
|
+ if (response_stripped.startswith('{') and response_stripped.endswith('}')) or \
|
|
|
+ (response_stripped.startswith('{\n') and response_stripped.endswith('\n}')):
|
|
|
+ # 响应被{}包裹,直接解析
|
|
|
+ try:
|
|
|
+ json_data = json.loads(response_stripped)
|
|
|
+ except json.JSONDecodeError:
|
|
|
+ logger.warning(f"直接JSON解析失败,尝试查找JSON代码块")
|
|
|
+
|
|
|
+ # 如果直接解析失败,查找JSON代码块
|
|
|
+ if json_data is None:
|
|
|
+ json_pattern = r'```json\s*(.*?)\s*```'
|
|
|
+ json_matches = re.findall(json_pattern, response, re.DOTALL)
|
|
|
+ else:
|
|
|
+ json_matches = [response_stripped] # 使用整个响应作为JSON
|
|
|
+
|
|
|
+ if json_matches:
|
|
|
+ # 解析找到的JSON
|
|
|
+ for json_str in json_matches:
|
|
|
+ try:
|
|
|
+ # 如果是直接解析的JSON,不需要重新解析
|
|
|
+ if json_str == response_stripped and json_data is not None:
|
|
|
+ issue_data = json_data
|
|
|
+ else:
|
|
|
+ # 清理JSON字符串
|
|
|
+ json_str = json_str.strip()
|
|
|
+ if not json_str:
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 解析JSON
|
|
|
+ issue_data = json.loads(json_str)
|
|
|
+
|
|
|
+ risk_level = issue_data.get("risk_level", "")
|
|
|
+
|
|
|
+ # 确定风险等级
|
|
|
+ if not risk_level:
|
|
|
+ risk_level = "medium" # 默认中等风险
|
|
|
+ else:
|
|
|
+ risk_level = risk_level.lower()
|
|
|
+ if "高" in risk_level or "high" in risk_level:
|
|
|
+ risk_level = "high"
|
|
|
+ elif "中" in risk_level or "medium" in risk_level:
|
|
|
+ risk_level = "medium"
|
|
|
+ else:
|
|
|
+ risk_level = "low"
|
|
|
+
|
|
|
+ # 直接存储JSON字段
|
|
|
+ review_lists.append({
|
|
|
+ "check_item": check_name,
|
|
|
+ "check_result": issue_data, # 直接存储原始JSON数据
|
|
|
+ "exist_issue": True,
|
|
|
+ "risk_info": {"risk_level": risk_level}
|
|
|
+ })
|
|
|
+
|
|
|
+ except json.JSONDecodeError as je:
|
|
|
+ logger.warning(f"JSON解析失败: {str(je)}, JSON内容: {json_str[:100]}...")
|
|
|
+ # 如果JSON解析失败,回退到文本解析
|
|
|
+ continue
|
|
|
+
|
|
|
+ # 如果没有成功解析到JSON,使用旧的文本解析方法
|
|
|
+ if not review_lists:
|
|
|
+ lines = response.split('\n')
|
|
|
+ risk_level = "medium" # 默认中等风险
|
|
|
+
|
|
|
+ # 扫描提取风险等级(支持中英文)
|
|
|
+ for line in lines:
|
|
|
+ line = line.strip()
|
|
|
+ if line.startswith('**风险等级**:') or line.startswith('**risk_level**:'):
|
|
|
+ if ':' in line:
|
|
|
+ extracted_risk = line.split(':', 1)[1].strip().lower()
|
|
|
+ elif ':' in line:
|
|
|
+ extracted_risk = line.split(':', 1)[1].strip().lower()
|
|
|
+
|
|
|
+ if "高" in extracted_risk or "high" in extracted_risk:
|
|
|
+ risk_level = "high"
|
|
|
+ elif "中" in extracted_risk or "medium" in extracted_risk:
|
|
|
+ risk_level = "medium"
|
|
|
+ else:
|
|
|
+ risk_level = "low"
|
|
|
+ break
|
|
|
+
|
|
|
+ review_lists.append({
|
|
|
+ "check_item": check_name,
|
|
|
+ "check_result": response, # 存储完整响应作为备选
|
|
|
+ "exist_issue": True,
|
|
|
+ "risk_info": {"risk_level": risk_level}
|
|
|
+ })
|
|
|
|
|
|
except Exception as e:
|
|
|
logger.error(f"解析AI审查响应失败: {str(e)}")
|
|
|
review_lists.append({
|
|
|
"check_item": check_name,
|
|
|
"check_result": response,
|
|
|
- "exist_issue": True,
|
|
|
+ "exist_issue": True,
|
|
|
"risk_info": {"risk_level": "low"}
|
|
|
})
|
|
|
|
|
|
return review_lists
|
|
|
-
|
|
|
+
|
|
|
+ def _filter_review_units(self, chunks: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
|
+ """
|
|
|
+ 根据配置筛选要审查的单元
|
|
|
+
|
|
|
+ Args:
|
|
|
+ chunks: 所有审查单元
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ List[Dict[str, Any]]: 筛选后的审查单元
|
|
|
+
|
|
|
+ Note:
|
|
|
+ 使用预设的起始索引进行切片,确保不会跳过前面的重要内容
|
|
|
+ """
|
|
|
+ if self.max_review_units is None or self.review_mode == "all":
|
|
|
+ return chunks
|
|
|
+
|
|
|
+ # 验证原始chunks不为空
|
|
|
+ if not chunks:
|
|
|
+ logger.warning("没有可用的审查单元")
|
|
|
+ return []
|
|
|
+
|
|
|
+ # 安全的切片操作,考虑边界情况
|
|
|
+ # 使用预设的起始索引进行切片,避免跳过前面的内容
|
|
|
+ start_index = min(DEFAULT_SLICE_START_INDEX, len(chunks) - 1) # 确保start_index不超过数组边界
|
|
|
+ chunks = chunks[start_index:]
|
|
|
+
|
|
|
+ # 再次验证切片后的结果
|
|
|
+ if not chunks:
|
|
|
+ logger.warning(f"从索引{start_index}切片后没有可用的审查单元")
|
|
|
+ return []
|
|
|
+
|
|
|
+ total_chunks = len(chunks)
|
|
|
+ actual_review_count = min(self.max_review_units, total_chunks)
|
|
|
+
|
|
|
+ logger.info(f"审查模式: {self.review_mode}, 总单元数: {total_chunks}, 实际审查数: {actual_review_count}")
|
|
|
+
|
|
|
+ if self.review_mode == "first":
|
|
|
+ # 取前N个
|
|
|
+ return chunks[:actual_review_count]
|
|
|
+ elif self.review_mode == "random":
|
|
|
+ # 随机取N个
|
|
|
+ return random.sample(chunks, actual_review_count)
|
|
|
+ else:
|
|
|
+ # 默认取前N个
|
|
|
+ return chunks[:actual_review_count]
|
|
|
+
|
|
|
+
|
|
|
+class InterTool:
|
|
|
+ """AI审查工具类 - 负责辅助计算和结果处理"""
|
|
|
|
|
|
def _calculate_overall_risk(self, basic_result: Dict, technical_result: Dict, rag_result: Dict) -> str:
|
|
|
"""
|
|
|
计算总体风险等级
|
|
|
|
|
|
Args:
|
|
|
- basic_result: 基础合规性审查结果
|
|
|
- technical_result: 技术性审查结果
|
|
|
- rag_result: RAG增强审查结果
|
|
|
+ basic_result: 基础合规性审查结果,包含overall_score字段
|
|
|
+ technical_result: 技术性审查结果,包含overall_score字段
|
|
|
+ rag_result: RAG增强审查结果(当前未使用)
|
|
|
|
|
|
Returns:
|
|
|
str: 风险等级 ("low", "medium", "high")
|
|
|
+
|
|
|
+ Note:
|
|
|
+ 风险等级计算逻辑:
|
|
|
+ - low: 基础和技术审查都达到90分以上
|
|
|
+ - medium: 基础和技术审查都达到70分以上
|
|
|
+ - high: 其他情况
|
|
|
+ 异常情况下返回默认风险等级medium
|
|
|
"""
|
|
|
try:
|
|
|
# 基于各种审查结果计算风险等级
|
|
|
@@ -850,10 +994,17 @@ class AIReviewWorkflow:
|
|
|
汇总审查结果
|
|
|
|
|
|
Args:
|
|
|
- successful_results: 成功的审查结果列表
|
|
|
+ successful_results: 成功的审查结果列表,每个结果包含风险等级和得分
|
|
|
|
|
|
Returns:
|
|
|
- Dict[str, Any]: 汇总后的统计信息,包括风险统计、平均得分等
|
|
|
+ Dict[str, Any]: 汇总后的统计信息,包含以下字段:
|
|
|
+ - risk_stats: 各风险等级的数量统计 {"low": 0, "medium": 0, "high": 0}
|
|
|
+ - avg_basic_score: 基础合规性平均得分
|
|
|
+ - avg_technical_score: 技术性审查平均得分
|
|
|
+ - total_reviewed: 成功审查的总数量
|
|
|
+
|
|
|
+ Note:
|
|
|
+ 当输入为空时返回空字典,异常时记录错误并返回空字典
|
|
|
"""
|
|
|
try:
|
|
|
if not successful_results:
|
|
|
@@ -882,7 +1033,19 @@ class AIReviewWorkflow:
|
|
|
return {}
|
|
|
|
|
|
def _check_ai_review_result(self, state: AIReviewState) -> str:
|
|
|
- """检查AI审查结果"""
|
|
|
+ """
|
|
|
+ 检查AI审查结果状态
|
|
|
+
|
|
|
+ Args:
|
|
|
+ state: AI审查工作流状态
|
|
|
+
|
|
|
+ Returns:
|
|
|
+ str: 状态标识 ("success" 或 "error")
|
|
|
+
|
|
|
+ Note:
|
|
|
+ 根据状态中是否存在错误信息来判断审查是否成功
|
|
|
+ """
|
|
|
if state.get("error_message"):
|
|
|
return "error"
|
|
|
return "success"
|
|
|
+
|