3 次代碼提交 8cdc32747b ... 297d694a34

作者 SHA1 備註 提交日期
  WangXuMing 297d694a34 v0.0.5-功能优化-条文完整性审查 1 月之前
  WangXuMing db4f9b7b58 Merge branch 'dev' of http://47.109.151.80:15030/CRBC-MaaS-Platform-Project/LQAgentPlatform into dev 1 月之前
  WangXuMing 5b8f0650a5 v0.0.5-功能优化-条文完整性审查 1 月之前

+ 21 - 5
core/construction_review/component/reviewers/check_completeness/components/result_analyzer.py

@@ -227,11 +227,25 @@ class ResultAnalyzer(IResultAnalyzer):
         """
         all_issues = []
         metadata = {}
-
+        suorces_eum = {
+            "basis": "编制依据",
+            "overview": "工程概况",
+            "plan": "施工计划",
+            "technology": "施工工艺技术",
+            "safety": "安全保证措施",
+            "quality": "质量保证措施",
+            "environment": "环境保证措施",
+            "management": "施工管理及作业人员配备与分工",
+            "acceptance": "验收要求",
+            "other": "其他资料"
+            }
         for row in summary_rows:
             level2 = (row.get("二级目录") or "").strip()
             requirement = (row.get("内容要求") or "").strip()
-
+            reference_source = '桥梁公司危险性较大工程管理实施细则(2025版)'
+            reason= f"参照:{reference_source} 中的内容要求,{row.get('section_label', '')}内容属于,专项施工方案内容要求中的 【{suorces_eum[row.get("标签", "")]}】 板块,应包含{requirement}"
+            review_references = (row.get("依据") or "").strip()
+            
             missing_points_raw = row.get("缺失的要点", "")
             missing_points = self._parse_list_field(missing_points_raw)
             if not missing_points:
@@ -244,9 +258,9 @@ class ResultAnalyzer(IResultAnalyzer):
             requirement_list = requirement.split(':')[-1].split(';')
             requirement_text = ';'.join([requirement_list[i-1] for i in missing_points])
             issue_point = (
-                f"[{level2}内容缺失]未包含要点:{requirement_text}"
+                f"{row.get('section_label', '')}下缺失{suorces_eum[row.get("标签", "")]}中的【{level2}】内容"
             )
-            suggestion = f"补充:{requirement_text}" if requirement else "补充缺失要点内容"
+            suggestion = f"建议补充:{requirement_text}" if requirement else "补充缺失要点内容"
             risk_level = self._map_risk_level(len(missing_points))
 
             # 构建问题项并添加到列表
@@ -254,8 +268,10 @@ class ResultAnalyzer(IResultAnalyzer):
                 "issue_point": issue_point,
                 "location": row.get("section_label", ""),
                 "suggestion": suggestion,
-                "reason": requirement,
+                "reason": reason,
                 "risk_level": risk_level,
+                "review_references": review_references,
+                "reference_source": reference_source
             }
             all_issues.append(issue_item)
 

+ 23 - 4
core/construction_review/component/reviewers/utils/inter_tool.py

@@ -530,13 +530,19 @@ class InterTool:
 
             # 3. 如果JSON解析失败,回退到文本解析
             if not review_lists:
+                # 🔧 修复:检查响应是否为空或只包含空白字符
+                response_stripped = response.strip() if isinstance(response, str) else ""
+                is_empty_response = not response_stripped or response_stripped in ["", "null", "None", "undefined"]
+
                 risk_level = self._determine_risk_level(response)
+
+                # 如果响应为空,则设置 exist_issue=False
                 review_lists.append({
                     "check_item": check_name,
                     "chapter_code": chapter_code,
                     "check_item_code": check_item_code,
                     "check_result": response,
-                    "exist_issue": True,
+                    "exist_issue": not is_empty_response,  # 🔧 修复:空响应不存在问题
                     "risk_info": {"risk_level": risk_level}
                 })
 
@@ -632,9 +638,22 @@ class InterTool:
         """创建单个审查问题项"""
         risk_level = self._determine_risk_level(issue_data.get("risk_level", ""))
 
-        # 根据原始风险等级判断是否存在问题
-        original_risk_level = issue_data.get("risk_level", "")
-        exist_issue = original_risk_level not in ["无风险", "无", "通过", "符合要求"]
+        # 🔧 修复:首先检查 issue_data 是否为空
+        is_empty = False
+        if isinstance(issue_data, list):
+            is_empty = len(issue_data) == 0
+        elif isinstance(issue_data, dict):
+            # 检查是否为空字典,或者只有 risk_level 字段但没有其他实质内容
+            is_empty = len(issue_data) == 0 or (len(issue_data) == 1 and "risk_level" in issue_data)
+
+        # 根据原始风险等级和内容判断是否存在问题
+        original_risk_level = issue_data.get("risk_level", "") if isinstance(issue_data, dict) else ""
+        # 只有当内容不为空,且风险等级不是"无风险"类时,才认为存在问题
+        exist_issue = not is_empty and original_risk_level not in ["无风险", "无", "通过", "符合要求"]
+
+        # 记录调试信息
+        if is_empty:
+            logger.debug(f"检查项 {check_name} 的 issue_data 为空,设置 exist_issue=False")
 
         return {
             "check_item": check_name,

+ 20 - 8
core/construction_review/component/reviewers/utils/punctuation_checker.py

@@ -36,6 +36,7 @@ SYSTEM = """
 - title_mark_status:书名号需完全包裹规范名称,且不多包/漏包
 - bracket_status:括号需完全包裹规范编号,且不多包/漏包;编号可能是各种形式,如果文本中没有编号,设置为null
 
+
 【输出要求】
 - 为每个输入文本输出一个检查结果
 - 确保输出数量与输入一致
@@ -67,6 +68,10 @@ HUMAN = """
 - 书名号包裹了完整的规范名称 → title_mark_status=true
 - 英文括号包裹了完整的编号 → bracket_status=true(混用不算错)
 
+示例4:《起重机械钢丝绳保养维护检验和报废》GB/T5972-2023;
+- 书名号包裹了完整的规范名称 → title_mark_status=true
+- 编号未被包裹 → bracket_status=false
+
 【待检查文本】
 {items}
 
@@ -225,8 +230,15 @@ async def check_punctuation(items: List[str]) -> str:
         try:
             raw = await chain.ainvoke(payload)
             data = extract_first_json(raw)
-            findings = PunctuationResults.model_validate(data)
-            llm_result = [x.model_dump() for x in findings.items]
+
+            # 兼容两种格式:带 items 字段或不带 items 字段(单个对象)
+            if "items" in data:
+                findings = PunctuationResults.model_validate(data)
+                llm_result = [x.model_dump() for x in findings.items]
+            else:
+                # LLM 返回了单个对象,包装成列表
+                single_result = PunctuationResult.model_validate(data)
+                llm_result = [single_result.model_dump()]
             break
         except (Exception, ValidationError, json.JSONDecodeError) as e:
             last_err = e
@@ -259,12 +271,12 @@ if __name__ == "__main__":
 
     # 测试用例
     test_items = [
-        "(4)《中华人民共和国突发事件应对法》【主席令〔2007〕第 69 号】;",  # 正确
-        "《混》凝土结构设计规范(GB 50010-2010)",      # 缺少书名号
-        "建筑施工组织设计规范GB/T 50502-2015",  # 缺少括号
-        "《建筑抗震设计规范》(GB 50011)-2001",       # 括号不成对
-        "《城市道路工程设计规范(CJJ 37-2012)",    # 书名号不成对
-        "《公路工程技术标准》(JTG B01-2014)",     # 正确
+        "《起重机械钢丝绳保养维护检验和报废》GB/T5972-2023;"  # 正确
+        # "《混》凝土结构设计规范(GB 50010-2010)",      # 缺少书名号
+        # "建筑施工组织设计规范GB/T 50502-2015",  # 缺少括号
+        # "《建筑抗震设计规范》(GB 50011)-2001",       # 括号不成对
+        # "《城市道路工程设计规范(CJJ 37-2012)",    # 书名号不成对
+        # "《公路工程技术标准》(JTG B01-2014)",     # 正确
     ]
 
     result = asyncio.run(check_punctuation(test_items))

+ 1 - 16
core/construction_review/component/reviewers/utils/punctuation_result_processor.py

@@ -176,23 +176,8 @@ if __name__ == "__main__":
     # 模拟 punctuation_checker 的返回结果
     check_results = json.dumps([
         {
-            "original_text": "《混凝土结构设计规范》",
+            "original_text": "《起重机械钢丝绳保养、维护、检验和报废》GB/T5972-2023;",
             "title_mark_status": True,
-            "bracket_status": "null"
-        },
-        {
-            "original_text": "《混凝土结构设计规范》【GB 50010-2010】",
-            "title_mark_status": True,
-            "bracket_status": False
-        },
-        {
-            "original_text": "《建筑施工组织设计规范》(GB/T 50502-2015)",
-            "title_mark_status": True,
-            "bracket_status": True
-        },
-        {
-            "original_text": "建筑抗震设计规范 GB 50011-2010",
-            "title_mark_status": False,
             "bracket_status": False
         }
     ], ensure_ascii=False)

+ 22 - 5
core/construction_review/workflows/ai_review_workflow.py

@@ -294,7 +294,7 @@ class AIReviewWorkflow:
             review_func_mapping: Dict[str, Union[str, List[str]]] = {
                 'sensitive_word_check': 'sensitive_word_check',
                 'semantic_logic_check': 'check_semantic_logic',
-                'completeness_check': ['check_completeness', 'outline_check'],
+                'completeness_check': 'check_completeness',
                 'timeliness_check': 'timeliness_basis_reviewer',
                 'reference_check': 'reference_basis_reviewer',
                 'sensitive_check': 'check_sensitive',
@@ -316,11 +316,15 @@ class AIReviewWorkflow:
             original_chunks = state.get("structured_content", {}).get("chunks", [])
 
             # 预处理:根据 review_item_dict_sorted 中的 key 对 structured_content 进行筛选
-            # original_chunks = structured_content.get("chunks", [])
             filtered_chunks = [
                 chunk for chunk in original_chunks
                 if chunk.get("chapter_classification") in review_item_dict_sorted.keys()
             ]
+            # 筛选完整性存在完整性审查的分类,将其整章进行合并
+            filtered_chunks = self.core_fun._merge_chunks_for_completeness_check(
+                filtered_chunks, review_item_dict_sorted
+            )
+
             # with open("temp/filtered_chunks/filtered_chunks.json", "w", encoding="utf-8") as f:
             #     json.dump(filtered_chunks, f, ensure_ascii=False, indent=4)
             # # 更新 chunks 和 structured_content
@@ -328,14 +332,27 @@ class AIReviewWorkflow:
             # structured_content["chunks"] = chunks
 
             total_chapters = len(review_item_dict_sorted)
-            total_chunks = len(filtered_chunks)
+            with open("temp/filtered_chunks/review_item_dict_sorted.json", "w", encoding="utf-8") as f:
+                json.dump(review_item_dict_sorted, f, ensure_ascii=False, indent=4)
+            # 如果review_item_dict_sorted中只包含check_completeness,则total_chunks 仅计算chunk中is_complete_field = true的chunk数量
+            all_check_items = []
+            for check_list in review_item_dict_sorted.values():
+                all_check_items.extend(check_list)  # 把每个分类的检查项加入总列表
+
+            # 判断:所有检查项是否都只有 "check_completeness"(无其他检查项)
+            if all(item == "check_completeness" for item in all_check_items):
+                # 仅统计 is_complete_field = True 的chunk数量(用生成器表达式省内存)
+                total_chunks = sum(1 for chunk in filtered_chunks if chunk.get("is_complete_field", False))
+            else:
+                # 统计所有 filtered_chunks
+                total_chunks = len(filtered_chunks)
 
             # 初始化issues列表
             all_issues = []
             completed_chunks = 0
             chapter_chunks_map, chapter_names = self.core_fun._group_chunks_by_chapter(filtered_chunks)
-            # with open("temp/filtered_chunks/chapter_chunks_map.json", "w", encoding="utf-8") as f:
-            #      json.dump(chapter_chunks_map, f, ensure_ascii=False, indent=4)
+            with open("temp/filtered_chunks/chapter_chunks_map.json", "w", encoding="utf-8") as f:
+                 json.dump(chapter_chunks_map, f, ensure_ascii=False, indent=4)
             logger.info(f"内容分组完成,共 {len(chapter_chunks_map)} 个章节")
             await self.core_fun._send_start_review_progress(state,total_chunks, chapter_names)
             # 6️ 按章节处理

+ 185 - 76
core/construction_review/workflows/core_functions/ai_review_core_fun.py

@@ -29,7 +29,6 @@ AI审查核心功能类 - 负责具体的审查逻辑和数据处理
 ├── _execute_technical_review()     # 执行技术性审查(参数/非参数合规性检查)
 ├── _group_chunks_by_chapter()      # 按章节代码对chunks进行分组
 ├── _extract_issues_from_result()   # 从审查结果中提取issues列表
-├── _format_chunk_results_to_issues() # 格式化单个块的审查结果为issues列表
 └── _dummy_review_task()            # 空任务(方法不存在时使用)
 '''
 
@@ -59,6 +58,7 @@ class UnitReviewResult():
     technical_compliance: Dict[str, Any]
     rag_enhanced: Dict[str, Any]
     overall_risk: str
+    is_sse_push: bool = True  # 是否成功执行并推送SSE,默认为True
 
 
 class AIReviewCoreFun:
@@ -139,7 +139,9 @@ class AIReviewCoreFun:
             chunk_results = await self._execute_chunk_methods(
                 chapter_code, chunk, global_chunk_index, func_names, state
             )
-
+            if not chunk_results.get('is_sse_push', False):
+                logger.info(f"跳过当前未成功审查块 {chunk_index} 处理完成")                
+                continue  # 跳过未成功执行的块
             # 格式化当前块的结果为issues
             chunk_page = chunk.get('page', '')
             review_location_label = f"第{chunk_page}页:{chunk_label}"
@@ -203,43 +205,6 @@ class AIReviewCoreFun:
 
         return issues
 
-    def _format_chunk_results_to_issues(
-        self,
-        state: AIReviewState,
-        chunk_index: int,
-        chunk: Dict[str, Any],
-        chapter_code: str,
-        chunk_results: Dict[str, Any]
-    ) -> List[Dict]:
-        """
-        格式化单个块的所有审查结果为issues列表
-
-        Args:
-            state: AI审查状态
-            chunk_index: 块索引
-            chunk: 块内容
-            chapter_code: 章节代码
-            chunk_results: 块审查结果字典 {func_name: result}
-
-        Returns:
-            List[Dict]: issues列表
-        """
-        issues = []
-
-        for func_name, result in chunk_results.items():
-            if result is None:
-                continue
-
-            # 处理错误结果
-            if isinstance(result, dict) and "error" in result:
-                logger.warning(f"审查方法 {func_name} 返回错误: {result['error']}")
-                continue
-
-            # 提取issues
-            extracted = self._extract_issues_from_result(result)
-            issues.extend(extracted)
-
-        return issues
 
     def _group_chunks_by_chapter(self, chunks: List[Dict[str, Any]]) -> Dict[str, List[Dict[str, Any]]]:
         """
@@ -320,6 +285,7 @@ class AIReviewCoreFun:
         merged_basic = {}
         merged_technical = {}
         merged_rag = {}
+        has_success = False  # 标记是否有成功执行的任务
 
         for result in completed_results:
             if isinstance(result, Exception):
@@ -329,6 +295,9 @@ class AIReviewCoreFun:
             if result and len(result) == 2:
                 func_name, review_result = result
                 if isinstance(review_result, UnitReviewResult):
+                    # 检查是否有成功的任务
+                    if review_result.is_sse_push:
+                        has_success = True
                     # 合并 basic_compliance
                     merged_basic.update(review_result.basic_compliance)
                     # 合并 technical_compliance
@@ -339,7 +308,8 @@ class AIReviewCoreFun:
         return {
             'basic_compliance': merged_basic,
             'technical_compliance': merged_technical,
-            'rag_enhanced': merged_rag
+            'rag_enhanced': merged_rag,
+            'is_sse_push': has_success  # 添加 is_sse_push 字段
         }
 
     async def _execute_single_review(self, chapter_code: str, chunk: Dict[str, Any], chunk_index: int, func_name: str, state: AIReviewState,rag_enhanced_content :dict = None, basis_content: dict = None) -> UnitReviewResult:
@@ -356,6 +326,7 @@ class AIReviewCoreFun:
         Returns:
             UnitReviewResult: 单个审查方法的UnitReviewResult对象,包含 basic_compliance 或 technical_compliance
         """
+      
         # 从ai_review_engine获取对应的方法
         if not hasattr(self.ai_review_engine, func_name):
             logger.warning(f"AIReviewEngine中未找到方法: {func_name}")
@@ -366,7 +337,8 @@ class AIReviewCoreFun:
                 basic_compliance={func_name: {"error": f"未找到方法: {func_name}"}},
                 technical_compliance={},
                 rag_enhanced={},
-                overall_risk="error"
+                overall_risk="error",
+                is_sse_push=True
             )
 
         method = getattr(self.ai_review_engine, func_name)
@@ -377,11 +349,11 @@ class AIReviewCoreFun:
 
         # 获取块内容
         review_content = chunk.get("content", "")
-
+        is_complete_field = chunk.get("is_complete_field", False)
         logger.debug(f"执行审查: {trace_id} -> {func_name}")
 
         # 根据func_name构建对应的参数并调用
-        if func_name == "sensitive_word_check":
+        if func_name == "sensitive_word_check" and not is_complete_field:
             raw_result = await method(trace_id, review_content, state, stage_name)
             # 基础审查方法,放入 basic_compliance
             return UnitReviewResult(
@@ -390,10 +362,11 @@ class AIReviewCoreFun:
                 basic_compliance={func_name: raw_result},
                 technical_compliance={},
                 rag_enhanced={},
-                overall_risk=self._calculate_single_result_risk(raw_result)
+                overall_risk=self._calculate_single_result_risk(raw_result),
+                is_sse_push=True
             )
 
-        elif func_name == "check_semantic_logic":
+        elif func_name == "check_semantic_logic" and not is_complete_field:
             raw_result = await method(trace_id, review_content, state, stage_name)
             # 基础审查方法,放入 basic_compliance
             return UnitReviewResult(
@@ -402,10 +375,11 @@ class AIReviewCoreFun:
                 basic_compliance={func_name: raw_result},
                 technical_compliance={},
                 rag_enhanced={},
-                overall_risk=self._calculate_single_result_risk(raw_result)
+                overall_risk=self._calculate_single_result_risk(raw_result),
+                is_sse_push=True
             )
 
-        elif func_name == "check_sensitive":
+        elif func_name == "check_sensitive" and not is_complete_field:
             raw_result = await method(trace_id, review_content, state, stage_name)
             # 基础审查方法,放入 basic_compliance
             return UnitReviewResult(
@@ -414,10 +388,11 @@ class AIReviewCoreFun:
                 basic_compliance={func_name: raw_result},
                 technical_compliance={},
                 rag_enhanced={},
-                overall_risk=self._calculate_single_result_risk(raw_result)
+                overall_risk=self._calculate_single_result_risk(raw_result),
+                is_sse_push=True
             )
 
-        elif func_name == "check_completeness":
+        elif func_name == "check_completeness" and is_complete_field:
             # check_completeness 需要列表类型,将单个 chunk 包装成列表
             raw_result = await method(trace_id, [chunk], state, stage_name)
             # 基础审查方法,放入 basic_compliance
@@ -427,10 +402,11 @@ class AIReviewCoreFun:
                 basic_compliance={func_name: raw_result},
                 technical_compliance={},
                 rag_enhanced={},
-                overall_risk=self._calculate_single_result_risk(raw_result)
+                overall_risk=self._calculate_single_result_risk(raw_result),
+                is_sse_push=True
             )
 
-        elif func_name == "check_non_parameter_compliance":
+        elif func_name == "check_non_parameter_compliance" and not is_complete_field:
             # 技术审查方法需要从 RAG 检索结果中获取 references
             raw_result = await self._execute_technical_review(
                 method, trace_id, review_content, chunk, state, stage_name, rag_enhanced_content, func_name
@@ -442,10 +418,11 @@ class AIReviewCoreFun:
                 basic_compliance={},
                 technical_compliance={func_name: raw_result},
                 rag_enhanced={},
-                overall_risk=self._calculate_single_result_risk(raw_result)
+                overall_risk=self._calculate_single_result_risk(raw_result),
+                is_sse_push=True
             )
 
-        elif func_name == "check_parameter_compliance":
+        elif func_name == "check_parameter_compliance" and not is_complete_field:
             # 技术审查方法需要从 RAG 检索结果中获取 references
             raw_result = await self._execute_technical_review(
                 method, trace_id, review_content, chunk, state, stage_name, rag_enhanced_content, func_name
@@ -457,23 +434,14 @@ class AIReviewCoreFun:
                 basic_compliance={},
                 technical_compliance={func_name: raw_result},
                 rag_enhanced={},
-                overall_risk=self._calculate_single_result_risk(raw_result)
+                overall_risk=self._calculate_single_result_risk(raw_result),
+                is_sse_push=True
             )
 
-        # # outline_check 仍在章节级别处理
-        # elif func_name == "outline_check":
-        #     logger.warning(f"方法 {func_name} 不应在块级别调用,已在主流程中处理")
-        #     return UnitReviewResult(
-        #         unit_index=chunk_index,
-        #         unit_content=chunk,
-        #         basic_compliance={},
-        #         technical_compliance={},
-        #         rag_enhanced={},
-        #         overall_risk="low"
-        #     )
+
 
         # reference_basis_reviewer:编制依据审查(逐块处理)
-        elif func_name == "reference_basis_reviewer":
+        elif func_name == "reference_basis_reviewer" and not is_complete_field:
             review_data = {
                 "content": review_content,  # 原始文本内容
                 "basis_items": basis_content,  # 提取的 BasisItems 对象
@@ -492,11 +460,12 @@ class AIReviewCoreFun:
                 basic_compliance={func_name: raw_result},
                 technical_compliance={},
                 rag_enhanced={},
-                overall_risk=self._calculate_single_result_risk(raw_result)
+                overall_risk=self._calculate_single_result_risk(raw_result),
+                is_sse_push=True
             )
 
         # timeliness_basis_reviewer:时效性审查(逐块处理)
-        elif func_name == "timeliness_basis_reviewer":
+        elif func_name == "timeliness_basis_reviewer" and not is_complete_field:
             review_data = {
                 "content": review_content,  # 原始文本内容
                 "basis_items": basis_content,  # 提取的 BasisItems 对象
@@ -515,18 +484,21 @@ class AIReviewCoreFun:
                 basic_compliance={func_name: raw_result},
                 technical_compliance={},
                 rag_enhanced={},
-                overall_risk=self._calculate_single_result_risk(raw_result)
+                overall_risk=self._calculate_single_result_risk(raw_result),
+                is_sse_push=True
             )
 
         else:
-            logger.warning(f"未知的审查方法: {func_name},使用默认调用方式")
+            logger.warning(f"未知的审查方法: {func_name}")
+            logger.warning(f"is_complete_field: {is_complete_field}")
             return UnitReviewResult(
                 unit_index=chunk_index,
                 unit_content=chunk,
                 basic_compliance={func_name: {"error": f"未知的审查方法: {func_name}"}},
                 technical_compliance={},
                 rag_enhanced={},
-                overall_risk="error"
+                overall_risk="error",
+                is_sse_push=False
             )
 
     def _calculate_single_result_risk(self, raw_result: Any) -> str:
@@ -1090,7 +1062,7 @@ class AIReviewCoreFun:
             logger.warning(f"发送单元完成进度更新失败: {str(e)}")
             # 发生异常时,尝试返回一个基于 index 的估算值
             try:
-                return int(((unit_index + 1) / total_units) * 100)
+                return int(((unit_index + 1) / total_chunks) * 100)
             except:
                 return 0
 
@@ -1157,11 +1129,11 @@ class AIReviewCoreFun:
         for item in review_item_config:
             key, value = item.split("_", 1)
             review_item_dict.setdefault(key, []).append(value)
-        
+
         # 依据方案标准章节顺序进行排序
-        sgfa_chapter_index_order = ["catalogue", "basis", "overview", "plan","technology", "safety", "quality", "environment", 
+        sgfa_chapter_index_order = ["catalogue", "basis", "overview", "plan","technology", "safety", "quality", "environment",
 "management", "acceptance", "other"]
-        
+
         all_keys = review_item_dict.keys()
         sorted_keys = sorted(
             all_keys,
@@ -1170,4 +1142,141 @@ class AIReviewCoreFun:
         review_item_dict_sorted = {}
         for key in sorted_keys:
             review_item_dict_sorted[key] = review_item_dict[key]
-        return review_item_dict_sorted
+        return review_item_dict_sorted
+
+    def _merge_chunks_for_completeness_check(
+        self,
+        chunks: List[Dict[str, Any]],
+        review_item_dict: Dict[str, List[str]]
+    ) -> List[Dict[str, Any]]:
+        """
+        筛选包含完整性审查的分类,将其整章进行合并
+
+        Args:
+            chunks: 筛选后的chunks列表
+            review_item_dict: 审查项字典 {chapter_code: [func_names]}
+
+        Returns:
+            List[Dict[str, Any]]: 追加合并chunk后的chunks列表,并按标准章节顺序排序
+
+        Note:
+            合并规则:
+            1. 找出包含 'check_completeness' 或 'outline_check' 的章节分类
+            2. 章节定义:chapter字段去除->及其之后的内容作为章节名
+            3. 同章节内按page升序排列,合并content和original_content
+            4. page取最小值
+            5. 合并后的chunk追加到原列表末尾,不删除原chunks
+            6. 增加 is_complete_field 字段标记为合并chunk(即使只有一个chunk也要标记)
+            7. 追加后按 chapter_classification 和标准章节顺序排序
+        """
+        try:
+            # 1. 找出包含完整性审查的章节分类
+            completeness_chapters = set()
+            for chapter_code, func_names in review_item_dict.items():
+                if 'check_completeness' in func_names or 'outline_check' in func_names:
+                    completeness_chapters.add(chapter_code)
+
+            if not completeness_chapters:
+                logger.info("没有包含完整性审查的章节,无需合并")
+                return chunks
+
+            logger.info(f"包含完整性审查的章节分类: {completeness_chapters}")
+
+            # 2. 筛选出需要合并的chunks(属于完整性审查章节的)
+            chunks_to_merge = []
+            for chunk in chunks:
+                chapter_code = chunk.get("chapter_classification", "")
+                if chapter_code in completeness_chapters:
+                    chunks_to_merge.append(chunk)
+
+            if not chunks_to_merge:
+                logger.info("没有找到需要合并的chunks")
+                return chunks
+
+            # 3. 按章节分组(章节定义:去除->及其之后的内容)
+            chapter_groups = {}
+            for chunk in chunks_to_merge:
+                chapter_full = chunk.get("chapter", chunk.get("section_label", ""))
+                # 提取章节名:去除->及其之后的内容
+                chapter_name = chapter_full.split("->")[0].strip() if "->" in chapter_full else chapter_full
+
+                if chapter_name not in chapter_groups:
+                    chapter_groups[chapter_name] = []
+                chapter_groups[chapter_name].append(chunk)
+
+            logger.info(f"按章节分组完成,共 {len(chapter_groups)} 个章节需要合并")
+
+            # 4. 合并每个章节的chunks
+            # 先给所有原chunk添加 is_complete_field: False
+            result_chunks = []
+            for chunk in chunks:
+                chunk_copy = chunk.copy()
+                chunk_copy["is_complete_field"] = False
+                result_chunks.append(chunk_copy)
+
+            for chapter_name, chapter_chunk_list in chapter_groups.items():
+                # 按page升序排列
+                chapter_chunk_list.sort(key=lambda x: int(x.get("page", 0)) if str(x.get("page", 0)).isdigit() else x.get("page", 0))
+
+                # 提取最小page
+                min_page = chapter_chunk_list[0].get("page", 0)
+
+                # 合并content和original_content
+                merged_content = "\n\n".join([
+                    chunk.get("content", "") for chunk in chapter_chunk_list
+                ])
+                merged_original_content = "\n\n".join([
+                    chunk.get("original_content", "") for chunk in chapter_chunk_list
+                ])
+
+                # 创建合并后的chunk(基于第一个chunk,保留所有字段)
+                merged_chunk = chapter_chunk_list[0].copy()
+
+                # 更新核心字段
+                # chunk_id 去除 -> 及其后的内容
+                original_chunk_id = merged_chunk.get('chunk_id', '')
+                clean_chunk_id = original_chunk_id.split("->")[0].strip() if "->" in original_chunk_id else original_chunk_id
+                merged_chunk["chunk_id"] = f"{clean_chunk_id}_merged"
+
+                merged_chunk["chapter"] = chapter_name  # 更新为合并后的章节名
+                merged_chunk["content"] = merged_content
+                merged_chunk["original_content"] = merged_original_content
+                merged_chunk["page"] = min_page
+                merged_chunk["is_complete_field"] = True  # 标记为合并chunk(即使只有一个chunk也要标记)
+
+                # 更新 section_label 和 title
+                merged_chunk["section_label"] = chapter_name
+                merged_chunk["title"] = chapter_name
+
+                # serial_number 设置为空字符串
+                merged_chunk["serial_number"] = ""
+
+                # 保留其他所有字段(如 element_tag, project_plan_type 等)
+                # element_tag 只保留第一个的
+                if "element_tag" in merged_chunk:
+                    merged_chunk["element_tag"] = chapter_chunk_list[0]["element_tag"].copy()
+                    # element_tag 中的 chunk_id 也要去除 -> 及其后的内容
+                    original_element_chunk_id = merged_chunk["element_tag"].get('chunk_id', '')
+                    clean_element_chunk_id = original_element_chunk_id.split("->")[0].strip() if "->" in original_element_chunk_id else original_element_chunk_id
+                    merged_chunk["element_tag"]["chunk_id"] = f"{clean_element_chunk_id}_merged"
+                    # element_tag 中的 serial_number 也设置为空字符串
+                    merged_chunk["element_tag"]["serial_number"] = ""
+
+                # 追加到结果列表
+                result_chunks.append(merged_chunk)
+
+                logger.info(f"合并章节 '{chapter_name}': {len(chapter_chunk_list)} 个chunk -> 1 个合并chunk (page={min_page})")
+
+            # 5. 按页码排序
+            result_chunks.sort(
+                key=lambda x: int(x.get("page", 0)) if str(x.get("page", 0)).isdigit() else x.get("page", 0)
+            )
+
+            logger.info(f"合并完成并按页码排序: 原始 {len(chunks)} 个chunk -> 最终 {len(result_chunks)} 个chunk(包含 {len(result_chunks) - len(chunks)} 个合并chunk)")
+
+            return result_chunks
+
+        except Exception as e:
+            logger.error(f"合并chunks失败: {str(e)}", exc_info=True)
+            # 出错时返回原始列表
+            return chunks