|
@@ -29,7 +29,6 @@ AI审查核心功能类 - 负责具体的审查逻辑和数据处理
|
|
|
├── _execute_technical_review() # 执行技术性审查(参数/非参数合规性检查)
|
|
├── _execute_technical_review() # 执行技术性审查(参数/非参数合规性检查)
|
|
|
├── _group_chunks_by_chapter() # 按章节代码对chunks进行分组
|
|
├── _group_chunks_by_chapter() # 按章节代码对chunks进行分组
|
|
|
├── _extract_issues_from_result() # 从审查结果中提取issues列表
|
|
├── _extract_issues_from_result() # 从审查结果中提取issues列表
|
|
|
-├── _format_chunk_results_to_issues() # 格式化单个块的审查结果为issues列表
|
|
|
|
|
└── _dummy_review_task() # 空任务(方法不存在时使用)
|
|
└── _dummy_review_task() # 空任务(方法不存在时使用)
|
|
|
'''
|
|
'''
|
|
|
|
|
|
|
@@ -59,6 +58,7 @@ class UnitReviewResult():
|
|
|
technical_compliance: Dict[str, Any]
|
|
technical_compliance: Dict[str, Any]
|
|
|
rag_enhanced: Dict[str, Any]
|
|
rag_enhanced: Dict[str, Any]
|
|
|
overall_risk: str
|
|
overall_risk: str
|
|
|
|
|
+ is_sse_push: bool = True # 是否成功执行并推送SSE,默认为True
|
|
|
|
|
|
|
|
|
|
|
|
|
class AIReviewCoreFun:
|
|
class AIReviewCoreFun:
|
|
@@ -139,7 +139,9 @@ class AIReviewCoreFun:
|
|
|
chunk_results = await self._execute_chunk_methods(
|
|
chunk_results = await self._execute_chunk_methods(
|
|
|
chapter_code, chunk, global_chunk_index, func_names, state
|
|
chapter_code, chunk, global_chunk_index, func_names, state
|
|
|
)
|
|
)
|
|
|
-
|
|
|
|
|
|
|
+ if not chunk_results.get('is_sse_push', False):
|
|
|
|
|
+ logger.info(f"跳过当前未成功审查块 {chunk_index} 处理完成")
|
|
|
|
|
+ continue # 跳过未成功执行的块
|
|
|
# 格式化当前块的结果为issues
|
|
# 格式化当前块的结果为issues
|
|
|
chunk_page = chunk.get('page', '')
|
|
chunk_page = chunk.get('page', '')
|
|
|
review_location_label = f"第{chunk_page}页:{chunk_label}"
|
|
review_location_label = f"第{chunk_page}页:{chunk_label}"
|
|
@@ -203,43 +205,6 @@ class AIReviewCoreFun:
|
|
|
|
|
|
|
|
return issues
|
|
return issues
|
|
|
|
|
|
|
|
- def _format_chunk_results_to_issues(
|
|
|
|
|
- self,
|
|
|
|
|
- state: AIReviewState,
|
|
|
|
|
- chunk_index: int,
|
|
|
|
|
- chunk: Dict[str, Any],
|
|
|
|
|
- chapter_code: str,
|
|
|
|
|
- chunk_results: Dict[str, Any]
|
|
|
|
|
- ) -> List[Dict]:
|
|
|
|
|
- """
|
|
|
|
|
- 格式化单个块的所有审查结果为issues列表
|
|
|
|
|
-
|
|
|
|
|
- Args:
|
|
|
|
|
- state: AI审查状态
|
|
|
|
|
- chunk_index: 块索引
|
|
|
|
|
- chunk: 块内容
|
|
|
|
|
- chapter_code: 章节代码
|
|
|
|
|
- chunk_results: 块审查结果字典 {func_name: result}
|
|
|
|
|
-
|
|
|
|
|
- Returns:
|
|
|
|
|
- List[Dict]: issues列表
|
|
|
|
|
- """
|
|
|
|
|
- issues = []
|
|
|
|
|
-
|
|
|
|
|
- for func_name, result in chunk_results.items():
|
|
|
|
|
- if result is None:
|
|
|
|
|
- continue
|
|
|
|
|
-
|
|
|
|
|
- # 处理错误结果
|
|
|
|
|
- if isinstance(result, dict) and "error" in result:
|
|
|
|
|
- logger.warning(f"审查方法 {func_name} 返回错误: {result['error']}")
|
|
|
|
|
- continue
|
|
|
|
|
-
|
|
|
|
|
- # 提取issues
|
|
|
|
|
- extracted = self._extract_issues_from_result(result)
|
|
|
|
|
- issues.extend(extracted)
|
|
|
|
|
-
|
|
|
|
|
- return issues
|
|
|
|
|
|
|
|
|
|
def _group_chunks_by_chapter(self, chunks: List[Dict[str, Any]]) -> Dict[str, List[Dict[str, Any]]]:
|
|
def _group_chunks_by_chapter(self, chunks: List[Dict[str, Any]]) -> Dict[str, List[Dict[str, Any]]]:
|
|
|
"""
|
|
"""
|
|
@@ -280,18 +245,21 @@ class AIReviewCoreFun:
|
|
|
semaphore = asyncio.Semaphore(5) # 单个块内限制并发数为5
|
|
semaphore = asyncio.Semaphore(5) # 单个块内限制并发数为5
|
|
|
rag_enhanced_content = None # 初始化变量,避免作用域错误
|
|
rag_enhanced_content = None # 初始化变量,避免作用域错误
|
|
|
basis_content = None # 初始化变量,避免作用域错误
|
|
basis_content = None # 初始化变量,避免作用域错误
|
|
|
-
|
|
|
|
|
- if 'check_parameter_compliance' in func_names or 'check_non_parameter_compliance' in func_names:
|
|
|
|
|
|
|
+ is_complete_field = chunk.get('is_complete_field', False)
|
|
|
|
|
+ logger.info(f"检查is_complete_field值是否正常: {is_complete_field}")
|
|
|
|
|
+ # 只有非完整性审查的chunk才执行RAG检索(注意括号位置,确保运算符优先级正确)
|
|
|
|
|
+ if ('check_parameter_compliance' in func_names or 'check_non_parameter_compliance' in func_names) and not is_complete_field:
|
|
|
logger.debug("开始执行RAG检索增强")
|
|
logger.debug("开始执行RAG检索增强")
|
|
|
rag_enhanced_content = self.ai_review_engine.rag_enhanced_check(chunk.get('content', ''))
|
|
rag_enhanced_content = self.ai_review_engine.rag_enhanced_check(chunk.get('content', ''))
|
|
|
|
|
|
|
|
- if 'reference_basis_reviewer' in func_names or 'timeliness_basis_reviewer' in func_names:
|
|
|
|
|
|
|
+ if ('reference_basis_reviewer' in func_names or 'timeliness_basis_reviewer' in func_names) and not is_complete_field:
|
|
|
logger.debug("开始执行编制依据/时效性预处理")
|
|
logger.debug("开始执行编制依据/时效性预处理")
|
|
|
# 预处理编制依据/时效性审查所需内容
|
|
# 预处理编制依据/时效性审查所需内容
|
|
|
basis_content = await directory_extraction.extract_basis_with_langchain_qwen(
|
|
basis_content = await directory_extraction.extract_basis_with_langchain_qwen(
|
|
|
state['progress_manager'],
|
|
state['progress_manager'],
|
|
|
state["callback_task_id"],
|
|
state["callback_task_id"],
|
|
|
- chunk.get('content', '')
|
|
|
|
|
|
|
+ chunk.get('content', ''),
|
|
|
|
|
+ chapter_code
|
|
|
)
|
|
)
|
|
|
async def execute_with_semaphore(func_name):
|
|
async def execute_with_semaphore(func_name):
|
|
|
async with semaphore:
|
|
async with semaphore:
|
|
@@ -320,6 +288,7 @@ class AIReviewCoreFun:
|
|
|
merged_basic = {}
|
|
merged_basic = {}
|
|
|
merged_technical = {}
|
|
merged_technical = {}
|
|
|
merged_rag = {}
|
|
merged_rag = {}
|
|
|
|
|
+ has_success = False # 标记是否有成功执行的任务
|
|
|
|
|
|
|
|
for result in completed_results:
|
|
for result in completed_results:
|
|
|
if isinstance(result, Exception):
|
|
if isinstance(result, Exception):
|
|
@@ -329,6 +298,9 @@ class AIReviewCoreFun:
|
|
|
if result and len(result) == 2:
|
|
if result and len(result) == 2:
|
|
|
func_name, review_result = result
|
|
func_name, review_result = result
|
|
|
if isinstance(review_result, UnitReviewResult):
|
|
if isinstance(review_result, UnitReviewResult):
|
|
|
|
|
+ # 检查是否有成功的任务
|
|
|
|
|
+ if review_result.is_sse_push:
|
|
|
|
|
+ has_success = True
|
|
|
# 合并 basic_compliance
|
|
# 合并 basic_compliance
|
|
|
merged_basic.update(review_result.basic_compliance)
|
|
merged_basic.update(review_result.basic_compliance)
|
|
|
# 合并 technical_compliance
|
|
# 合并 technical_compliance
|
|
@@ -339,7 +311,8 @@ class AIReviewCoreFun:
|
|
|
return {
|
|
return {
|
|
|
'basic_compliance': merged_basic,
|
|
'basic_compliance': merged_basic,
|
|
|
'technical_compliance': merged_technical,
|
|
'technical_compliance': merged_technical,
|
|
|
- 'rag_enhanced': merged_rag
|
|
|
|
|
|
|
+ 'rag_enhanced': merged_rag,
|
|
|
|
|
+ 'is_sse_push': has_success # 添加 is_sse_push 字段
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
async def _execute_single_review(self, chapter_code: str, chunk: Dict[str, Any], chunk_index: int, func_name: str, state: AIReviewState,rag_enhanced_content :dict = None, basis_content: dict = None) -> UnitReviewResult:
|
|
async def _execute_single_review(self, chapter_code: str, chunk: Dict[str, Any], chunk_index: int, func_name: str, state: AIReviewState,rag_enhanced_content :dict = None, basis_content: dict = None) -> UnitReviewResult:
|
|
@@ -356,6 +329,7 @@ class AIReviewCoreFun:
|
|
|
Returns:
|
|
Returns:
|
|
|
UnitReviewResult: 单个审查方法的UnitReviewResult对象,包含 basic_compliance 或 technical_compliance
|
|
UnitReviewResult: 单个审查方法的UnitReviewResult对象,包含 basic_compliance 或 technical_compliance
|
|
|
"""
|
|
"""
|
|
|
|
|
+
|
|
|
# 从ai_review_engine获取对应的方法
|
|
# 从ai_review_engine获取对应的方法
|
|
|
if not hasattr(self.ai_review_engine, func_name):
|
|
if not hasattr(self.ai_review_engine, func_name):
|
|
|
logger.warning(f"AIReviewEngine中未找到方法: {func_name}")
|
|
logger.warning(f"AIReviewEngine中未找到方法: {func_name}")
|
|
@@ -366,7 +340,8 @@ class AIReviewCoreFun:
|
|
|
basic_compliance={func_name: {"error": f"未找到方法: {func_name}"}},
|
|
basic_compliance={func_name: {"error": f"未找到方法: {func_name}"}},
|
|
|
technical_compliance={},
|
|
technical_compliance={},
|
|
|
rag_enhanced={},
|
|
rag_enhanced={},
|
|
|
- overall_risk="error"
|
|
|
|
|
|
|
+ overall_risk="error",
|
|
|
|
|
+ is_sse_push=True
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
method = getattr(self.ai_review_engine, func_name)
|
|
method = getattr(self.ai_review_engine, func_name)
|
|
@@ -377,11 +352,11 @@ class AIReviewCoreFun:
|
|
|
|
|
|
|
|
# 获取块内容
|
|
# 获取块内容
|
|
|
review_content = chunk.get("content", "")
|
|
review_content = chunk.get("content", "")
|
|
|
-
|
|
|
|
|
|
|
+ is_complete_field = chunk.get("is_complete_field", False)
|
|
|
logger.debug(f"执行审查: {trace_id} -> {func_name}")
|
|
logger.debug(f"执行审查: {trace_id} -> {func_name}")
|
|
|
|
|
|
|
|
# 根据func_name构建对应的参数并调用
|
|
# 根据func_name构建对应的参数并调用
|
|
|
- if func_name == "sensitive_word_check":
|
|
|
|
|
|
|
+ if func_name == "sensitive_word_check" and not is_complete_field:
|
|
|
raw_result = await method(trace_id, review_content, state, stage_name)
|
|
raw_result = await method(trace_id, review_content, state, stage_name)
|
|
|
# 基础审查方法,放入 basic_compliance
|
|
# 基础审查方法,放入 basic_compliance
|
|
|
return UnitReviewResult(
|
|
return UnitReviewResult(
|
|
@@ -390,10 +365,11 @@ class AIReviewCoreFun:
|
|
|
basic_compliance={func_name: raw_result},
|
|
basic_compliance={func_name: raw_result},
|
|
|
technical_compliance={},
|
|
technical_compliance={},
|
|
|
rag_enhanced={},
|
|
rag_enhanced={},
|
|
|
- overall_risk=self._calculate_single_result_risk(raw_result)
|
|
|
|
|
|
|
+ overall_risk=self._calculate_single_result_risk(raw_result),
|
|
|
|
|
+ is_sse_push=True
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
- elif func_name == "check_semantic_logic":
|
|
|
|
|
|
|
+ elif func_name == "check_semantic_logic" and not is_complete_field:
|
|
|
raw_result = await method(trace_id, review_content, state, stage_name)
|
|
raw_result = await method(trace_id, review_content, state, stage_name)
|
|
|
# 基础审查方法,放入 basic_compliance
|
|
# 基础审查方法,放入 basic_compliance
|
|
|
return UnitReviewResult(
|
|
return UnitReviewResult(
|
|
@@ -402,10 +378,11 @@ class AIReviewCoreFun:
|
|
|
basic_compliance={func_name: raw_result},
|
|
basic_compliance={func_name: raw_result},
|
|
|
technical_compliance={},
|
|
technical_compliance={},
|
|
|
rag_enhanced={},
|
|
rag_enhanced={},
|
|
|
- overall_risk=self._calculate_single_result_risk(raw_result)
|
|
|
|
|
|
|
+ overall_risk=self._calculate_single_result_risk(raw_result),
|
|
|
|
|
+ is_sse_push=True
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
- elif func_name == "check_sensitive":
|
|
|
|
|
|
|
+ elif func_name == "check_sensitive" and not is_complete_field:
|
|
|
raw_result = await method(trace_id, review_content, state, stage_name)
|
|
raw_result = await method(trace_id, review_content, state, stage_name)
|
|
|
# 基础审查方法,放入 basic_compliance
|
|
# 基础审查方法,放入 basic_compliance
|
|
|
return UnitReviewResult(
|
|
return UnitReviewResult(
|
|
@@ -414,10 +391,11 @@ class AIReviewCoreFun:
|
|
|
basic_compliance={func_name: raw_result},
|
|
basic_compliance={func_name: raw_result},
|
|
|
technical_compliance={},
|
|
technical_compliance={},
|
|
|
rag_enhanced={},
|
|
rag_enhanced={},
|
|
|
- overall_risk=self._calculate_single_result_risk(raw_result)
|
|
|
|
|
|
|
+ overall_risk=self._calculate_single_result_risk(raw_result),
|
|
|
|
|
+ is_sse_push=True
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
- elif func_name == "check_completeness":
|
|
|
|
|
|
|
+ elif func_name == "check_completeness" and is_complete_field:
|
|
|
# check_completeness 需要列表类型,将单个 chunk 包装成列表
|
|
# check_completeness 需要列表类型,将单个 chunk 包装成列表
|
|
|
raw_result = await method(trace_id, [chunk], state, stage_name)
|
|
raw_result = await method(trace_id, [chunk], state, stage_name)
|
|
|
# 基础审查方法,放入 basic_compliance
|
|
# 基础审查方法,放入 basic_compliance
|
|
@@ -427,10 +405,11 @@ class AIReviewCoreFun:
|
|
|
basic_compliance={func_name: raw_result},
|
|
basic_compliance={func_name: raw_result},
|
|
|
technical_compliance={},
|
|
technical_compliance={},
|
|
|
rag_enhanced={},
|
|
rag_enhanced={},
|
|
|
- overall_risk=self._calculate_single_result_risk(raw_result)
|
|
|
|
|
|
|
+ overall_risk=self._calculate_single_result_risk(raw_result),
|
|
|
|
|
+ is_sse_push=True
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
- elif func_name == "check_non_parameter_compliance":
|
|
|
|
|
|
|
+ elif func_name == "check_non_parameter_compliance" and not is_complete_field:
|
|
|
# 技术审查方法需要从 RAG 检索结果中获取 references
|
|
# 技术审查方法需要从 RAG 检索结果中获取 references
|
|
|
raw_result = await self._execute_technical_review(
|
|
raw_result = await self._execute_technical_review(
|
|
|
method, trace_id, review_content, chunk, state, stage_name, rag_enhanced_content, func_name
|
|
method, trace_id, review_content, chunk, state, stage_name, rag_enhanced_content, func_name
|
|
@@ -442,10 +421,11 @@ class AIReviewCoreFun:
|
|
|
basic_compliance={},
|
|
basic_compliance={},
|
|
|
technical_compliance={func_name: raw_result},
|
|
technical_compliance={func_name: raw_result},
|
|
|
rag_enhanced={},
|
|
rag_enhanced={},
|
|
|
- overall_risk=self._calculate_single_result_risk(raw_result)
|
|
|
|
|
|
|
+ overall_risk=self._calculate_single_result_risk(raw_result),
|
|
|
|
|
+ is_sse_push=True
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
- elif func_name == "check_parameter_compliance":
|
|
|
|
|
|
|
+ elif func_name == "check_parameter_compliance" and not is_complete_field:
|
|
|
# 技术审查方法需要从 RAG 检索结果中获取 references
|
|
# 技术审查方法需要从 RAG 检索结果中获取 references
|
|
|
raw_result = await self._execute_technical_review(
|
|
raw_result = await self._execute_technical_review(
|
|
|
method, trace_id, review_content, chunk, state, stage_name, rag_enhanced_content, func_name
|
|
method, trace_id, review_content, chunk, state, stage_name, rag_enhanced_content, func_name
|
|
@@ -457,23 +437,14 @@ class AIReviewCoreFun:
|
|
|
basic_compliance={},
|
|
basic_compliance={},
|
|
|
technical_compliance={func_name: raw_result},
|
|
technical_compliance={func_name: raw_result},
|
|
|
rag_enhanced={},
|
|
rag_enhanced={},
|
|
|
- overall_risk=self._calculate_single_result_risk(raw_result)
|
|
|
|
|
|
|
+ overall_risk=self._calculate_single_result_risk(raw_result),
|
|
|
|
|
+ is_sse_push=True
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
- # # outline_check 仍在章节级别处理
|
|
|
|
|
- # elif func_name == "outline_check":
|
|
|
|
|
- # logger.warning(f"方法 {func_name} 不应在块级别调用,已在主流程中处理")
|
|
|
|
|
- # return UnitReviewResult(
|
|
|
|
|
- # unit_index=chunk_index,
|
|
|
|
|
- # unit_content=chunk,
|
|
|
|
|
- # basic_compliance={},
|
|
|
|
|
- # technical_compliance={},
|
|
|
|
|
- # rag_enhanced={},
|
|
|
|
|
- # overall_risk="low"
|
|
|
|
|
- # )
|
|
|
|
|
|
|
+
|
|
|
|
|
|
|
|
# reference_basis_reviewer:编制依据审查(逐块处理)
|
|
# reference_basis_reviewer:编制依据审查(逐块处理)
|
|
|
- elif func_name == "reference_basis_reviewer":
|
|
|
|
|
|
|
+ elif func_name == "reference_basis_reviewer" and not is_complete_field:
|
|
|
review_data = {
|
|
review_data = {
|
|
|
"content": review_content, # 原始文本内容
|
|
"content": review_content, # 原始文本内容
|
|
|
"basis_items": basis_content, # 提取的 BasisItems 对象
|
|
"basis_items": basis_content, # 提取的 BasisItems 对象
|
|
@@ -492,11 +463,12 @@ class AIReviewCoreFun:
|
|
|
basic_compliance={func_name: raw_result},
|
|
basic_compliance={func_name: raw_result},
|
|
|
technical_compliance={},
|
|
technical_compliance={},
|
|
|
rag_enhanced={},
|
|
rag_enhanced={},
|
|
|
- overall_risk=self._calculate_single_result_risk(raw_result)
|
|
|
|
|
|
|
+ overall_risk=self._calculate_single_result_risk(raw_result),
|
|
|
|
|
+ is_sse_push=True
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
# timeliness_basis_reviewer:时效性审查(逐块处理)
|
|
# timeliness_basis_reviewer:时效性审查(逐块处理)
|
|
|
- elif func_name == "timeliness_basis_reviewer":
|
|
|
|
|
|
|
+ elif func_name == "timeliness_basis_reviewer" and not is_complete_field:
|
|
|
review_data = {
|
|
review_data = {
|
|
|
"content": review_content, # 原始文本内容
|
|
"content": review_content, # 原始文本内容
|
|
|
"basis_items": basis_content, # 提取的 BasisItems 对象
|
|
"basis_items": basis_content, # 提取的 BasisItems 对象
|
|
@@ -515,18 +487,21 @@ class AIReviewCoreFun:
|
|
|
basic_compliance={func_name: raw_result},
|
|
basic_compliance={func_name: raw_result},
|
|
|
technical_compliance={},
|
|
technical_compliance={},
|
|
|
rag_enhanced={},
|
|
rag_enhanced={},
|
|
|
- overall_risk=self._calculate_single_result_risk(raw_result)
|
|
|
|
|
|
|
+ overall_risk=self._calculate_single_result_risk(raw_result),
|
|
|
|
|
+ is_sse_push=True
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
else:
|
|
else:
|
|
|
- logger.warning(f"未知的审查方法: {func_name},使用默认调用方式")
|
|
|
|
|
|
|
+ logger.warning(f"未知的审查方法: {func_name}")
|
|
|
|
|
+ logger.warning(f"is_complete_field: {is_complete_field}")
|
|
|
return UnitReviewResult(
|
|
return UnitReviewResult(
|
|
|
unit_index=chunk_index,
|
|
unit_index=chunk_index,
|
|
|
unit_content=chunk,
|
|
unit_content=chunk,
|
|
|
basic_compliance={func_name: {"error": f"未知的审查方法: {func_name}"}},
|
|
basic_compliance={func_name: {"error": f"未知的审查方法: {func_name}"}},
|
|
|
technical_compliance={},
|
|
technical_compliance={},
|
|
|
rag_enhanced={},
|
|
rag_enhanced={},
|
|
|
- overall_risk="error"
|
|
|
|
|
|
|
+ overall_risk="error",
|
|
|
|
|
+ is_sse_push=False
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
def _calculate_single_result_risk(self, raw_result: Any) -> str:
|
|
def _calculate_single_result_risk(self, raw_result: Any) -> str:
|
|
@@ -1090,7 +1065,7 @@ class AIReviewCoreFun:
|
|
|
logger.warning(f"发送单元完成进度更新失败: {str(e)}")
|
|
logger.warning(f"发送单元完成进度更新失败: {str(e)}")
|
|
|
# 发生异常时,尝试返回一个基于 index 的估算值
|
|
# 发生异常时,尝试返回一个基于 index 的估算值
|
|
|
try:
|
|
try:
|
|
|
- return int(((unit_index + 1) / total_units) * 100)
|
|
|
|
|
|
|
+ return int(((unit_index + 1) / total_chunks) * 100)
|
|
|
except:
|
|
except:
|
|
|
return 0
|
|
return 0
|
|
|
|
|
|
|
@@ -1157,11 +1132,11 @@ class AIReviewCoreFun:
|
|
|
for item in review_item_config:
|
|
for item in review_item_config:
|
|
|
key, value = item.split("_", 1)
|
|
key, value = item.split("_", 1)
|
|
|
review_item_dict.setdefault(key, []).append(value)
|
|
review_item_dict.setdefault(key, []).append(value)
|
|
|
-
|
|
|
|
|
|
|
+
|
|
|
# 依据方案标准章节顺序进行排序
|
|
# 依据方案标准章节顺序进行排序
|
|
|
- sgfa_chapter_index_order = ["catalogue", "basis", "overview", "plan","technology", "safety", "quality", "environment",
|
|
|
|
|
|
|
+ sgfa_chapter_index_order = ["catalogue", "basis", "overview", "plan","technology", "safety", "quality", "environment",
|
|
|
"management", "acceptance", "other"]
|
|
"management", "acceptance", "other"]
|
|
|
-
|
|
|
|
|
|
|
+
|
|
|
all_keys = review_item_dict.keys()
|
|
all_keys = review_item_dict.keys()
|
|
|
sorted_keys = sorted(
|
|
sorted_keys = sorted(
|
|
|
all_keys,
|
|
all_keys,
|
|
@@ -1170,4 +1145,141 @@ class AIReviewCoreFun:
|
|
|
review_item_dict_sorted = {}
|
|
review_item_dict_sorted = {}
|
|
|
for key in sorted_keys:
|
|
for key in sorted_keys:
|
|
|
review_item_dict_sorted[key] = review_item_dict[key]
|
|
review_item_dict_sorted[key] = review_item_dict[key]
|
|
|
- return review_item_dict_sorted
|
|
|
|
|
|
|
+ return review_item_dict_sorted
|
|
|
|
|
+
|
|
|
|
|
+ def _merge_chunks_for_completeness_check(
|
|
|
|
|
+ self,
|
|
|
|
|
+ chunks: List[Dict[str, Any]],
|
|
|
|
|
+ review_item_dict: Dict[str, List[str]]
|
|
|
|
|
+ ) -> List[Dict[str, Any]]:
|
|
|
|
|
+ """
|
|
|
|
|
+ 筛选包含完整性审查的分类,将其整章进行合并
|
|
|
|
|
+
|
|
|
|
|
+ Args:
|
|
|
|
|
+ chunks: 筛选后的chunks列表
|
|
|
|
|
+ review_item_dict: 审查项字典 {chapter_code: [func_names]}
|
|
|
|
|
+
|
|
|
|
|
+ Returns:
|
|
|
|
|
+ List[Dict[str, Any]]: 追加合并chunk后的chunks列表,并按标准章节顺序排序
|
|
|
|
|
+
|
|
|
|
|
+ Note:
|
|
|
|
|
+ 合并规则:
|
|
|
|
|
+ 1. 找出包含 'check_completeness' 或 'outline_check' 的章节分类
|
|
|
|
|
+ 2. 章节定义:chapter字段去除->及其之后的内容作为章节名
|
|
|
|
|
+ 3. 同章节内按page升序排列,合并content和original_content
|
|
|
|
|
+ 4. page取最小值
|
|
|
|
|
+ 5. 合并后的chunk追加到原列表末尾,不删除原chunks
|
|
|
|
|
+ 6. 增加 is_complete_field 字段标记为合并chunk(即使只有一个chunk也要标记)
|
|
|
|
|
+ 7. 追加后按 chapter_classification 和标准章节顺序排序
|
|
|
|
|
+ """
|
|
|
|
|
+ try:
|
|
|
|
|
+ # 1. 找出包含完整性审查的章节分类
|
|
|
|
|
+ completeness_chapters = set()
|
|
|
|
|
+ for chapter_code, func_names in review_item_dict.items():
|
|
|
|
|
+ if 'check_completeness' in func_names or 'outline_check' in func_names:
|
|
|
|
|
+ completeness_chapters.add(chapter_code)
|
|
|
|
|
+
|
|
|
|
|
+ if not completeness_chapters:
|
|
|
|
|
+ logger.info("没有包含完整性审查的章节,无需合并")
|
|
|
|
|
+ return chunks
|
|
|
|
|
+
|
|
|
|
|
+ logger.info(f"包含完整性审查的章节分类: {completeness_chapters}")
|
|
|
|
|
+
|
|
|
|
|
+ # 2. 筛选出需要合并的chunks(属于完整性审查章节的)
|
|
|
|
|
+ chunks_to_merge = []
|
|
|
|
|
+ for chunk in chunks:
|
|
|
|
|
+ chapter_code = chunk.get("chapter_classification", "")
|
|
|
|
|
+ if chapter_code in completeness_chapters:
|
|
|
|
|
+ chunks_to_merge.append(chunk)
|
|
|
|
|
+
|
|
|
|
|
+ if not chunks_to_merge:
|
|
|
|
|
+ logger.info("没有找到需要合并的chunks")
|
|
|
|
|
+ return chunks
|
|
|
|
|
+
|
|
|
|
|
+ # 3. 按章节分组(章节定义:去除->及其之后的内容)
|
|
|
|
|
+ chapter_groups = {}
|
|
|
|
|
+ for chunk in chunks_to_merge:
|
|
|
|
|
+ chapter_full = chunk.get("chapter", chunk.get("section_label", ""))
|
|
|
|
|
+ # 提取章节名:去除->及其之后的内容
|
|
|
|
|
+ chapter_name = chapter_full.split("->")[0].strip() if "->" in chapter_full else chapter_full
|
|
|
|
|
+
|
|
|
|
|
+ if chapter_name not in chapter_groups:
|
|
|
|
|
+ chapter_groups[chapter_name] = []
|
|
|
|
|
+ chapter_groups[chapter_name].append(chunk)
|
|
|
|
|
+
|
|
|
|
|
+ logger.info(f"按章节分组完成,共 {len(chapter_groups)} 个章节需要合并")
|
|
|
|
|
+
|
|
|
|
|
+ # 4. 合并每个章节的chunks
|
|
|
|
|
+ # 先给所有原chunk添加 is_complete_field: False
|
|
|
|
|
+ result_chunks = []
|
|
|
|
|
+ for chunk in chunks:
|
|
|
|
|
+ chunk_copy = chunk.copy()
|
|
|
|
|
+ chunk_copy["is_complete_field"] = False
|
|
|
|
|
+ result_chunks.append(chunk_copy)
|
|
|
|
|
+
|
|
|
|
|
+ for chapter_name, chapter_chunk_list in chapter_groups.items():
|
|
|
|
|
+ # 按page升序排列
|
|
|
|
|
+ chapter_chunk_list.sort(key=lambda x: int(x.get("page", 0)) if str(x.get("page", 0)).isdigit() else x.get("page", 0))
|
|
|
|
|
+
|
|
|
|
|
+ # 提取最小page
|
|
|
|
|
+ min_page = chapter_chunk_list[0].get("page", 0)
|
|
|
|
|
+
|
|
|
|
|
+ # 合并content和original_content
|
|
|
|
|
+ merged_content = "\n\n".join([
|
|
|
|
|
+ chunk.get("content", "") for chunk in chapter_chunk_list
|
|
|
|
|
+ ])
|
|
|
|
|
+ merged_original_content = "\n\n".join([
|
|
|
|
|
+ chunk.get("original_content", "") for chunk in chapter_chunk_list
|
|
|
|
|
+ ])
|
|
|
|
|
+
|
|
|
|
|
+ # 创建合并后的chunk(基于第一个chunk,保留所有字段)
|
|
|
|
|
+ merged_chunk = chapter_chunk_list[0].copy()
|
|
|
|
|
+
|
|
|
|
|
+ # 更新核心字段
|
|
|
|
|
+ # chunk_id 去除 -> 及其后的内容
|
|
|
|
|
+ original_chunk_id = merged_chunk.get('chunk_id', '')
|
|
|
|
|
+ clean_chunk_id = original_chunk_id.split("->")[0].strip() if "->" in original_chunk_id else original_chunk_id
|
|
|
|
|
+ merged_chunk["chunk_id"] = f"{clean_chunk_id}_merged"
|
|
|
|
|
+
|
|
|
|
|
+ merged_chunk["chapter"] = chapter_name # 更新为合并后的章节名
|
|
|
|
|
+ merged_chunk["content"] = merged_content
|
|
|
|
|
+ merged_chunk["original_content"] = merged_original_content
|
|
|
|
|
+ merged_chunk["page"] = min_page
|
|
|
|
|
+ merged_chunk["is_complete_field"] = True # 标记为合并chunk(即使只有一个chunk也要标记)
|
|
|
|
|
+
|
|
|
|
|
+ # 更新 section_label 和 title
|
|
|
|
|
+ merged_chunk["section_label"] = chapter_name
|
|
|
|
|
+ merged_chunk["title"] = chapter_name
|
|
|
|
|
+
|
|
|
|
|
+ # serial_number 设置为空字符串
|
|
|
|
|
+ merged_chunk["serial_number"] = ""
|
|
|
|
|
+
|
|
|
|
|
+ # 保留其他所有字段(如 element_tag, project_plan_type 等)
|
|
|
|
|
+ # element_tag 只保留第一个的
|
|
|
|
|
+ if "element_tag" in merged_chunk:
|
|
|
|
|
+ merged_chunk["element_tag"] = chapter_chunk_list[0]["element_tag"].copy()
|
|
|
|
|
+ # element_tag 中的 chunk_id 也要去除 -> 及其后的内容
|
|
|
|
|
+ original_element_chunk_id = merged_chunk["element_tag"].get('chunk_id', '')
|
|
|
|
|
+ clean_element_chunk_id = original_element_chunk_id.split("->")[0].strip() if "->" in original_element_chunk_id else original_element_chunk_id
|
|
|
|
|
+ merged_chunk["element_tag"]["chunk_id"] = f"{clean_element_chunk_id}_merged"
|
|
|
|
|
+ # element_tag 中的 serial_number 也设置为空字符串
|
|
|
|
|
+ merged_chunk["element_tag"]["serial_number"] = ""
|
|
|
|
|
+
|
|
|
|
|
+ # 追加到结果列表
|
|
|
|
|
+ result_chunks.append(merged_chunk)
|
|
|
|
|
+
|
|
|
|
|
+ logger.info(f"合并章节 '{chapter_name}': {len(chapter_chunk_list)} 个chunk -> 1 个合并chunk (page={min_page})")
|
|
|
|
|
+
|
|
|
|
|
+ # 5. 按页码排序
|
|
|
|
|
+ result_chunks.sort(
|
|
|
|
|
+ key=lambda x: int(x.get("page", 0)) if str(x.get("page", 0)).isdigit() else x.get("page", 0)
|
|
|
|
|
+ )
|
|
|
|
|
+
|
|
|
|
|
+ logger.info(f"合并完成并按页码排序: 原始 {len(chunks)} 个chunk -> 最终 {len(result_chunks)} 个chunk(包含 {len(result_chunks) - len(chunks)} 个合并chunk)")
|
|
|
|
|
+
|
|
|
|
|
+ return result_chunks
|
|
|
|
|
+
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ logger.error(f"合并chunks失败: {str(e)}", exc_info=True)
|
|
|
|
|
+ # 出错时返回原始列表
|
|
|
|
|
+ return chunks
|