#!/usr/bin/env python # -*- coding: utf-8 -*- """ 检查特定文件的章节泄漏详情 """ import sys from pathlib import Path project_root = Path(__file__).parent.parent.parent sys.path.insert(0, str(project_root)) from test_chunk_split_fix import build_test_facade from core.construction_review.component.doc_worker.interfaces import DocumentSource def check_file(file_path: Path): print(f"\nChecking: {file_path.name}") print("=" * 80) source = DocumentSource(path=str(file_path)) facade = build_test_facade() result = facade.process(source) chunks = result.get("chunks", []) # 找到第九章和第十章的 chunks chapter9_chunks = [] chapter10_chunks = [] for chunk in chunks: label = chunk.get("section_label", "") if label.startswith("第九章"): chapter9_chunks.append(chunk) elif label.startswith("第十章"): chapter10_chunks.append(chunk) print(f"\n第九章 chunks: {len(chapter9_chunks)}") for chunk in chapter9_chunks: print(f" - {chunk.get('chunk_id')}: {chunk.get('section_label')}") print(f"\n第十章 chunks: {len(chapter10_chunks)}") for chunk in chapter10_chunks: print(f" - {chunk.get('chunk_id')}: {chunk.get('section_label')}") # 检查第九章最后一个 chunk 的内容是否包含"第十章" if chapter9_chunks: last_chunk = chapter9_chunks[-1] content = last_chunk.get("review_chunk_content", "") print(f"\n第九章最后一个 chunk: {last_chunk.get('chunk_id')}") print(f" section_label: {last_chunk.get('section_label')}") print(f" content长度: {len(content)}") # 搜索"第十章" idx = content.find("第十章") if idx >= 0: print(f"\n [WARNING] 发现'第十章'在位置 {idx}") # 显示上下文 start = max(0, idx - 100) end = min(len(content), idx + 100) print(f" 上下文: ...{content[start:end]}...") else: print(f"\n [OK] 未包含'第十章'") # 检查第十章第一个 chunk 的内容(应该包含"第十章") if chapter10_chunks: first_chunk = chapter10_chunks[0] content = first_chunk.get("review_chunk_content", "") print(f"\n第十章第一个 chunk: {first_chunk.get('chunk_id')}") print(f" section_label: {first_chunk.get('section_label')}") idx = content.find("第十章") if idx >= 0: print(f" [OK] 包含'第十章'在位置 {idx}(这是正常的,是章节标题)") if __name__ == "__main__": test_file = Path("D:/wx_work/sichuan_luqiao/lu_sgsc_testfile/测试模版-四川路桥专项施工方案框架以及编制说明(2025修订第三版)- v0.2.pdf") if test_file.exists(): check_file(test_file) else: print(f"File not found: {test_file}")