hace 3 meses · 2f1e757a55
--- a/core/construction_review/component/doc_worker/docx_worker/toc_extractor.py
+++ b/core/construction_review/component/doc_worker/docx_worker/toc_extractor.py
@@ -13,6 +13,7 @@ from typing import Any, Dict, List
 
				 from docx import Document
			
 
				 
			
 
				 from ..interfaces import TOCExtractor, DocumentSource
			
 
				+from ..utils.toc_level_identifier import TOCLevelIdentifier
			
 
				 
			
 
				 
			
 
				 class DocxTOCExtractor(TOCExtractor):
			
@@ -21,6 +22,10 @@ class DocxTOCExtractor(TOCExtractor):
 
				     # 目录行模式：标题 + 制表符 + 页码
			
 
				     TOC_PATTERN = re.compile(r"^(?P<title>.+?)\t+(?P<page>\d+)\s*$")
			
 
				 
			
 
				+    def __init__(self) -> None:
			
 
				+        """初始化 DOCX 目录提取器"""
			
 
				+        self._level_identifier = TOCLevelIdentifier()
			
 
				+
			
 
				     def extract_toc(self, source: DocumentSource) -> Dict[str, Any]:
			
 
				         """
			
 
				         提取 DOCX 文档的目录信息
			
@@ -55,13 +60,10 @@ class DocxTOCExtractor(TOCExtractor):
 
				                 title = match.group("title").strip()
			
 
				                 page = int(match.group("page"))
			
 
				                 
			
 
				-                # 判断层级（简单规则：根据编号格式）
			
 
				-                level = self._detect_level(title)
			
 
				-                
			
 
				+                # 先不设置层级，后续统一识别
			
 
				                 toc_items.append({
			
 
				                     "title": title,
			
 
				                     "page": page,
			
 
				-                    "level": level,
			
 
				                     "original": text,
			
 
				                 })
			
 
				                 
			
@@ -75,6 +77,9 @@ class DocxTOCExtractor(TOCExtractor):
 
				         else:
			
 
				             toc_pages = []
			
 
				 
			
 
				+        # 使用 TOCLevelIdentifier 识别层级（与 doc_worker 保持一致）
			
 
				+        toc_items = self._level_identifier.identify_levels(toc_items)
			
 
				+
			
 
				         return {
			
 
				             "toc_items": toc_items,
			
 
				             "toc_count": len(toc_items),
			
@@ -83,13 +88,10 @@ class DocxTOCExtractor(TOCExtractor):
 
				 
			
 
				     def _detect_level(self, title: str) -> int:
			
 
				         """
			
 
				-        根据标题格式检测层级
			
 
				+        根据标题格式检测层级（已废弃，保留仅用于向后兼容）
			
 
				         
			
 
				-        规则：
			
 
				-        - 第X章 -> level 1
			
 
				-        - 一）、二）、三） -> level 2
			
 
				-        - 1、2、3、 -> level 3
			
 
				-        - (1)、(2)、(3) -> level 4
			
 
				+        注意：此方法已不再使用，现在使用 TOCLevelIdentifier 统一识别层级。
			
 
				+        保留此方法仅用于向后兼容和测试。
			
 
				         """
			
 
				         # 章节格式
			
 
				         if re.match(r"^第[一二三四五六七八九十\d]+章", title):
			
--- a/core/construction_review/component/doc_worker/utils/llm_client.py
+++ b/core/construction_review/component/doc_worker/utils/llm_client.py
@@ -312,11 +312,11 @@ class LLMClient:
 
				         返回:
			
 
				             结果列表，与输入请求一一对应
			
 
				         
			
 
				-        注意: 此方法现在使用 workflow_manager.py 的全局事件循环，不再自行初始化事件循环
			
 
				+        注意: 此方法使用 workflow_manager.py 的全局事件循环，不再自行初始化事件循环
			
 
				         """
			
 
				         if HAS_AIOHTTP:
			
 
				             # 使用异步实现
			
 
				-            # 注释掉异步初始化，使用 workflow_manager.py 的全局事件循环
			
 
				+            # 注释掉异步初始化，直接使用 workflow_manager.py 设置的全局事件循环
			
 
				             # loop = asyncio.get_event_loop()
			
 
				             # if loop.is_running():
			
 
				             #     # 如果事件循环已经在运行，创建新的事件循环
			
@@ -328,12 +328,14 @@ class LLMClient:
 
				             #         return self._batch_call_sync_fallback(requests)
			
 
				             # return loop.run_until_complete(self.batch_call_async(requests))
			
 
				             
			
 
				-            # 使用 workflow_manager.py 的全局事件循环
			
 
				+            # 使用 workflow_manager.py 的全局事件循环（如果已存在）
			
 
				             try:
			
 
				+                # 获取 workflow_manager.py 设置的全局事件循环
			
 
				                 loop = asyncio.get_event_loop()
			
 
				+                # 直接使用全局循环，不进行任何初始化
			
 
				                 return loop.run_until_complete(self.batch_call_async(requests))
			
 
				             except RuntimeError:
			
 
				-                # 如果没有事件循环，回退到同步调用
			
 
				+                # 如果没有事件循环（workflow_manager.py 还未初始化），回退到同步调用
			
 
				                 return self._batch_call_sync_fallback(requests)
			
 
				         else:
			
 
				             return self._batch_call_sync_fallback(requests)