5 luni în urmă · e518fdb15c
--- a/core/construction_review/component/doc_worker/chunking/__init__.py
+++ b/core/construction_review/component/doc_worker/chunking/__init__.py
@@ -6,3 +6,4 @@ from .text_splitter import TextSplitter
 
				 
			
 
				 __all__ = ['TextSplitter']
			
 
				 
			
 
				+
			
--- a/core/construction_review/component/doc_worker/chunking/chunk_metadata.py
+++ b/core/construction_review/component/doc_worker/chunking/chunk_metadata.py
@@ -96,7 +96,7 @@ class ChunkMetadata:
 
				     
			
 
				     def finalize_chunk_ids(self, chunks):
			
 
				         """
			
 
				-        生成最终的chunk_id和serial_number（复用测试目录的逻辑）
			
 
				+        生成最终的chunk_id和serial_number
			
 
				         
			
 
				         参数:
			
 
				             chunks: 合并后的块列表
			
@@ -111,6 +111,7 @@ class ChunkMetadata:
 
				         for i, chunk in enumerate(chunks):
			
 
				             title_number = chunk.get('_title_number', '')
			
 
				             is_merged = chunk.get('_is_merged', False)
			
 
				+            section_label = chunk.get('section_label', '')
			
 
				             
			
 
				             # 提取标题编号的主要部分（用于判断是否在同一标题内）
			
 
				             # 如果包含+号，说明是跨标题合并的块
			
@@ -119,8 +120,6 @@ class ChunkMetadata:
 
				                 local_index = 0
			
 
				                 # chunk_id中使用+号（无空格），如"1.5+1.6"
			
 
				                 merged_title_number = title_number
			
 
				-                # serial_number中使用空格，如"1.5 + 1.6"
			
 
				-                serial_number_display = chunk.get('_title_number_display', title_number.replace('+', ' + '))
			
 
				                 # 更新current_title_number为合并后的编号，这样下一个块会重新开始
			
 
				                 current_title_number = title_number
			
 
				             else:
			
@@ -133,14 +132,22 @@ class ChunkMetadata:
 
				                 else:
			
 
				                     local_index += 1
			
 
				                 merged_title_number = title_number
			
 
				-                serial_number_display = title_number
			
 
				             
			
 
				-            # 生成chunk_id（使用无空格的编号）
			
 
				-            if merged_title_number:
			
 
				+            # 从section_label中提取标题路径的编号路径（用于chunk_id）
			
 
				+            title_number_path = self._extract_title_number_path(section_label)
			
 
				+            
			
 
				+            # 生成chunk_id：doc_chunk_<标题路径的编号路径>_序号
			
 
				+            if title_number_path:
			
 
				+                chunk_id_str = f"doc_chunk_{title_number_path}_{local_index}"
			
 
				+            elif merged_title_number:
			
 
				+                # 如果没有完整的编号路径，使用合并后的编号（向后兼容）
			
 
				                 chunk_id_str = f"doc_chunk_{merged_title_number}_{local_index}"
			
 
				             else:
			
 
				                 chunk_id_str = f"doc_chunk_{local_index}"
			
 
				             
			
 
				+            # 从section_label中提取最底层级的编号（用于serial_number）
			
 
				+            serial_number = self.text_utils.extract_number_from_section_label(section_label)
			
 
				+            
			
 
				             # 更新chunk数据
			
 
				             final_chunk = {
			
 
				                 'file_name': chunk['file_name'],
			
@@ -150,7 +157,7 @@ class ChunkMetadata:
 
				                 'element_tag': {
			
 
				                     'chunk_id': chunk_id_str,
			
 
				                     'page': chunk['element_tag']['page'],
			
 
				-                    'serial_number': serial_number_display if merged_title_number else ''
			
 
				+                    'serial_number': serial_number
			
 
				                 },
			
 
				                 'review_chunk_content': chunk['review_chunk_content']
			
 
				             }
			
@@ -248,6 +255,47 @@ class ChunkMetadata:
 
				         
			
 
				         return ""
			
 
				     
			
 
				+    def _extract_title_number_path(self, section_label):
			
 
				+        """
			
 
				+        从section_label中提取标题路径的编号路径
			
 
				+        
			
 
				+        例如：
			
 
				+        "第一章 工程概况->【1】工程概况->1.1 项目总体概况" -> "1->【1】->1.1"
			
 
				+        "第三章 施工计划->【2】机械设备计划" -> "3->【2】"
			
 
				+        "第一章 工程概况->【2】自然条件->2.1 气象情况" -> "1->【2】->2.1"
			
 
				+        
			
 
				+        参数:
			
 
				+            section_label: section_label字符串，格式为 "一级->二级->三级"
			
 
				+            
			
 
				+        返回:
			
 
				+            str: 编号路径，用"->"连接，如果未找到则返回空字符串
			
 
				+        """
			
 
				+        if not section_label:
			
 
				+            return ""
			
 
				+        
			
 
				+        # 处理合并的情况（用" + "连接），取第一部分
			
 
				+        if ' + ' in section_label:
			
 
				+            section_label = section_label.split(' + ')[0]
			
 
				+        
			
 
				+        # 按"->"分割层级路径
			
 
				+        parts = section_label.split('->')
			
 
				+        
			
 
				+        # 提取每一层的编号
			
 
				+        number_paths = []
			
 
				+        for part in parts:
			
 
				+            part = part.strip()
			
 
				+            if part:
			
 
				+                # 使用text_utils的extract_title_number方法提取编号
			
 
				+                number = self.text_utils.extract_title_number(part)
			
 
				+                if number:
			
 
				+                    number_paths.append(number)
			
 
				+        
			
 
				+        # 用"->"连接编号路径
			
 
				+        if number_paths:
			
 
				+            return '->'.join(number_paths)
			
 
				+        
			
 
				+        return ""
			
 
				+    
			
 
				     def build_hierarchy_path(self, title, all_toc_items, target_level):
			
 
				         """
			
 
				         构建从1级到当前标题的完整层级路径
			
--- a/core/construction_review/component/doc_worker/classification/__init__.py
+++ b/core/construction_review/component/doc_worker/classification/__init__.py
@@ -6,3 +6,4 @@ from .llm_classifier import LLMClassifier
 
				 
			
 
				 __all__ = ['LLMClassifier']
			
 
				 
			
 
				+
			
--- a/core/construction_review/component/doc_worker/config/__init__.py
+++ b/core/construction_review/component/doc_worker/config/__init__.py
@@ -6,3 +6,4 @@ from .config_loader import get_config, Config
 
				 
			
 
				 __all__ = ['get_config', 'Config']
			
 
				 
			
 
				+
			
--- a/core/construction_review/component/doc_worker/output/__init__.py
+++ b/core/construction_review/component/doc_worker/output/__init__.py
@@ -6,3 +6,4 @@ from .result_saver import ResultSaver
 
				 
			
 
				 __all__ = ['ResultSaver']
			
 
				 
			
 
				+
			
--- a/core/construction_review/component/doc_worker/toc/__init__.py
+++ b/core/construction_review/component/doc_worker/toc/__init__.py
@@ -6,3 +6,4 @@ from .toc_extractor import TOCExtractor
 
				 
			
 
				 __all__ = ['TOCExtractor']
			
 
				 
			
 
				+
			
--- a/temp/AI审查结果.json
+++ b/temp/AI审查结果.json