Procházet zdrojové kódy

fix:修复minerU转换脚本以及json解析

Meric před 2 týdny
rodič
revize
a84d580059

+ 107 - 0
src/app/minerU/minerU.py

@@ -0,0 +1,107 @@
+#!/usr/bin/env python3
+import os
+import requests
+import zipfile
+import shutil
+from pathlib import Path
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+API_URL = "http://183.220.37.46:25428/file_parse"
+INPUT_DIR = Path(r"E:\提供的原始文件\原始文件\PDF分类结果_服务器MinerU版\公司集团评审意见说明\公司集团评审意见_input")
+OUTPUT_DIR = Path(r"E:\提供的原始文件\原始文件\PDF分类结果_服务器MinerU版\公司集团评审意见说明\公司集团评审意见_output")
+
+def parse_file(file_path):
+    filename = file_path.name
+    
+    try:
+        with open(file_path, 'rb') as f:
+            files = {
+                'files': (filename, f, 'application/pdf')
+            }
+            data = {
+                'return_md': 'true',
+                'response_format_zip': 'true',
+                'return_original_file': 'true',
+                'return_middle_json': 'true',
+                'return_content_list': 'true',
+                'return_images': 'true'
+            }
+            
+            print(f"Processing: {filename}")
+            response = requests.post(API_URL, files=files, data=data)
+            
+            if response.status_code == 200:
+                zip_filename = f"{file_path.stem}_result.zip"
+                zip_path = OUTPUT_DIR / zip_filename
+                extract_dir = OUTPUT_DIR / file_path.stem
+                
+                with open(zip_path, 'wb') as out_f:
+                    out_f.write(response.content)
+                
+                print(f"  Saved zip to: {zip_path}")
+                
+                extract_dir.mkdir(exist_ok=True)
+                with zipfile.ZipFile(zip_path, 'r') as zipf:
+                    zipf.extractall(extract_dir)
+                
+                nested_dir = extract_dir / file_path.stem
+                if nested_dir.exists() and nested_dir.is_dir():
+                    for item in nested_dir.iterdir():
+                        shutil.move(str(item), str(extract_dir / item.name))
+                    nested_dir.rmdir()
+                
+                os.remove(zip_path)
+                print(f"  Extracted to: {extract_dir}")
+                return (filename, True, None)
+            else:
+                error_msg = f"HTTP {response.status_code}: {response.text}"
+                print(f"  Error: {error_msg}")
+                return (filename, False, error_msg)
+    except Exception as e:
+        print(f"  Exception: {str(e)}")
+        return (filename, False, str(e))
+
+def main(max_workers=10):
+    INPUT_DIR.mkdir(exist_ok=True)
+    OUTPUT_DIR.mkdir(exist_ok=True)
+    
+    pdf_files = list(INPUT_DIR.glob("*.pdf"))
+    
+    if not pdf_files:
+        print("No PDF files found in input directory")
+        return
+    
+    print(f"Found {len(pdf_files)} PDF file(s)")
+    print(f"Processing with {max_workers} concurrent workers\n")
+    
+    with ThreadPoolExecutor(max_workers=max_workers) as executor:
+        futures = {executor.submit(parse_file, pdf_file): pdf_file for pdf_file in pdf_files}
+        
+        success_count = 0
+        fail_count = 0
+        failed_files = []
+        
+        for future in as_completed(futures):
+            filename, success, error = future.result()
+            if success:
+                success_count += 1
+            else:
+                fail_count += 1
+                failed_files.append((filename, error))
+    
+    print(f"\nDone! Success: {success_count}, Failed: {fail_count}")
+    
+    if failed_files:
+        print("\nFailed files:")
+        for filename, error in failed_files:
+            print(f"  - {filename}: {error}")
+
+if __name__ == "__main__":
+    import argparse
+    
+    parser = argparse.ArgumentParser(description='Parse PDF files using MineRU API')
+    parser.add_argument('-w', '--workers', type=int, default=10,
+                        help='Number of concurrent workers (default: 10)')
+    args = parser.parse_args()
+    
+    main(max_workers=args.workers)

+ 522 - 182
src/app/scripts/ceshi/03-施工方案筛选.py

@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
 """
 """
-评审意见PDF文件筛选脚本 - 二级筛选+断点续传版
+评审意见PDF文件筛选脚本 - 多进程并发版+测试模式
 
 
 功能说明:
 功能说明:
     从raw/670目录下的数字编号子目录中筛选评审意见PDF文件。
     从raw/670目录下的数字编号子目录中筛选评审意见PDF文件。
@@ -21,6 +21,8 @@
     6. 专家评审输出到output/expert_review目录
     6. 专家评审输出到output/expert_review目录
     7. 公司/集团评审输出到output/company_review目录
     7. 公司/集团评审输出到output/company_review目录
     8. 支持断点续传,在temp目录缓存处理进度
     8. 支持断点续传,在temp目录缓存处理进度
+    9. 【新增】支持多进程并发筛选,提高效率
+    10.【新增】支持随机抽取测试模式,快速验证
 
 
 输入:
 输入:
     - 源目录: raw/670/ (包含数字编号子目录,如1567、1569等)
     - 源目录: raw/670/ (包含数字编号子目录,如1567、1569等)
@@ -35,6 +37,7 @@
 
 
 作者: Claude
 作者: Claude
 日期: 2026-04-21
 日期: 2026-04-21
+更新: 2026-05-08 - 增加多进程并发和测试模式
 """
 """
 
 
 import pandas as pd
 import pandas as pd
@@ -44,6 +47,8 @@ import os
 import shutil
 import shutil
 import re
 import re
 import warnings
 import warnings
+import multiprocessing as mp
+import random
 from pathlib import Path
 from pathlib import Path
 from datetime import datetime
 from datetime import datetime
 from typing import List, Dict, Tuple, Optional, Set
 from typing import List, Dict, Tuple, Optional, Set
@@ -59,36 +64,211 @@ warnings.filterwarnings('ignore', category=Warning)
 # 1) 填绝对路径(如 E:/data/raw/670)则直接使用(Windows 建议用 / 或 \\)
 # 1) 填绝对路径(如 E:/data/raw/670)则直接使用(Windows 建议用 / 或 \\)
 # 2) 填相对路径(如 ../../raw/670)则相对当前脚本目录解析
 # 2) 填相对路径(如 ../../raw/670)则相对当前脚本目录解析
 SOURCE_DIR = r"E:\提供的原始文件\原始文件\全部的原始文档\未提取"
 SOURCE_DIR = r"E:\提供的原始文件\原始文件\全部的原始文档\未提取"
-EXPERT_OUTPUT_DIR = r"F:\提供的原始文件\原始文件\PDF分类结果_服务器MinerU版\专家评审意见_记录"
-COMPANY_OUTPUT_DIR = r"F:\提供的原始文件\原始文件\PDF分类结果_服务器MinerU版\公司集团评审意见说明"
+EXPERT_OUTPUT_DIR = r"E:\提供的原始文件\原始文件\PDF分类结果_服务器MinerU版\专家评审意见_记录"
+COMPANY_OUTPUT_DIR = r"E:\提供的原始文件\原始文件\PDF分类结果_服务器MinerU版\公司集团评审意见说明"
 TEMP_DIR = "temp"
 TEMP_DIR = "temp"
 
 
 
 
+# ==================== 并发与测试配置 ====================
+# 多进程并发配置
+NUM_WORKERS = 4  # 并发进程数,建议设为CPU核心数(如CPU有8核则设为6-8)
+                   # 注意:每个工作进程内部还会为单个PDF创建子进程(超时控制)
+                   # 因此 NUM_WORKERS 不宜过大,避免进程过多导致系统资源耗尽
+
+# 测试模式配置
+TEST_MODE = False       # 是否启用测试模式:随机抽取少量目录快速测试
+TEST_SAMPLE_SIZE = 5    # 测试模式下随机抽取的目录数量
+                        # 测试完成后会输出结果并自动退出,不会进入二级筛选
+
+
 # 分批配置(仅用于统计显示,不创建子目录)
 # 分批配置(仅用于统计显示,不创建子目录)
 BATCH_SIZE = 50  # 每批处理的目录数量(仅用于进度显示)
 BATCH_SIZE = 50  # 每批处理的目录数量(仅用于进度显示)
 
 
 # 关键词配置
 # 关键词配置
 KEYWORDS = {
 KEYWORDS = {
-    # 专家意见:必须命中“专家审查”相关表述(仅“评审/评估”不算专家审查)
+    # 专家意见:涵盖"审查"、"评审"、"论证"、"咨询"、"签字"等多种表述
     "expert": [
     "expert": [
+        # === 专家审查系列(核心关键词)===
         "专家审查意见", "专家审查记录", "专家审查结论",
         "专家审查意见", "专家审查记录", "专家审查结论",
         "专家审查说明", "专家审查建议", "专家审查纪要", "专家审查报告",
         "专家审查说明", "专家审查建议", "专家审查纪要", "专家审查报告",
         "专家审查审核表", "专家审查审查表",
         "专家审查审核表", "专家审查审查表",
+        
+        # === 专家评审系列(评审与审查并重)===
+        "专家评审意见", "专家评审记录", "专家评审结论",
+        "专家评审说明", "专家评审建议", "专家评审纪要", "专家评审报告",
+        "专家评审审核表", "专家评审表",
+        
+        # === 专家论证系列(常见于危大工程)===
+        "专家论证意见", "专家论证记录", "专家论证结论",
+        "专家论证说明", "专家论证建议", "专家论证纪要", "专家论证报告",
+        "专家论证审核表", "专家论证审查表",
+        
+        # === 专家组系列(多位专家集体意见)===
+        "专家组意见", "专家组审查意见", "专家组评审意见",
+        "专家组论证意见", "专家组建议", "专家组结论",
+        "专家组纪要", "专家组报告",
+        
+        # === 专家咨询系列(技术咨询类)===
+        "专家咨询意见", "专家咨询建议", "专家咨询记录",
+        "专家咨询结论", "专家咨询说明",
+        
+        # === 专家签字/签名系列(专家参与确认)===
+        "专家签字", "专家签名", "专家签章",
+        "专家签字表", "专家签名表", "专家签认",
+        "专家确认", "专家审核签字",
+        
+        # === 专家意见回复/修改系列(回复与整改)===
+        "专家意见回复", "专家意见修改回复", "专家意见整改回复",
+        "专家意见回复表", "专家意见修改表", "专家意见回复单",
+        
+        # === 专家意见通用表述(兜底关键词)===
+        "专家意见", "专家建议", "专家结论",
+        "专家名单", "专家签到表", "专家签到",
+        
+        # === 常见简写/变体 ===
+        "专家意见及回复", "专家意见及整改", "专家意见及修改",
+        "专家审查结论表", "专家评审结论表", "专家论证结论表",
+        "专家技术意见", "专家技术审查", "专家技术评审",
     ],
     ],
-    # 公司/集团:必须包含“公司”或“集团”主体表述
-    "company": ["公司评审意见", "集团评审意见", "公司审核意见", "集团审核意见", "公司审查意见", "集团审查意见"]  # 公司/集团关键词
+    
+    # 公司/集团:涵盖"公司"、"集团"、"企业"、"项目部"、"总包"、"监理"等多种主体
+    "company": [
+        # === 公司/集团系列(核心关键词)===
+        "公司评审意见", "集团评审意见", "公司审核意见", "集团审核意见", 
+        "公司审查意见", "集团审查意见",
+        "公司评审记录", "集团评审记录", "公司审核记录", "集团审核记录",
+        "公司审查记录", "集团审查记录",
+        "公司评审纪要", "集团评审纪要", "公司审核纪要", "集团审核纪要",
+        "公司审查纪要", "集团审查纪要",
+        "公司评审报告", "集团评审报告", "公司审核报告", "集团审核报告",
+        "公司审查报告", "集团审查报告",
+        
+        # === 企业系列(企业为主体)===
+        "企业评审意见", "企业审核意见", "企业审查意见",
+        "企业评审记录", "企业审核记录", "企业审查记录",
+        "企业评审纪要", "企业审核纪要", "企业审查纪要",
+        "企业技术负责人意见", "企业技术负责人审核",
+        
+        # === 项目部/项目系列(项目部为主体)===
+        "项目部评审意见", "项目部审核意见", "项目部审查意见",
+        "项目部评审记录", "项目部审核记录", "项目部审查记录",
+        "项目评审意见", "项目审核意见", "项目审查意见",
+        "项目经理意见", "项目经理审核",
+        
+        # === 总包/总承包系列(总包单位为主体)===
+        "总包评审意见", "总包审核意见", "总包审查意见",
+        "总承包评审意见", "总承包审核意见", "总承包审查意见",
+        "总包单位意见", "总承包单位意见",
+        
+        # === 内部评审系列(内部流程)===
+        "内部评审意见", "内部审核意见", "内部审查意见",
+        "内部评审记录", "内部审核记录", "内部审查记录",
+        "内部评审纪要", "内部审核纪要", "内部审查纪要",
+        "内部审批意见", "内部会签意见",
+        
+        # === 审批/会签系列(审批流程)===
+        "公司审批意见", "集团审批意见", "企业审批意见",
+        "公司会签意见", "集团会签意见", "企业会签意见",
+        "审批意见", "会签意见", "审批记录",
+        
+        # === 监理系列(监理单位意见)===
+        "监理评审意见", "监理审核意见", "监理审查意见",
+        "监理记录", "监理纪要", "监理报告",
+        "监理工程师意见", "总监理工程师意见",
+        
+        # === 常见简写/变体 ===
+        "公司意见", "集团意见", "企业意见",
+        "公司技术意见", "集团技术意见",
+        "施工方案评审意见", "施工方案审核意见", "施工方案审查意见",
+        "方案评审意见", "方案审核意见", "方案审查意见",
+        "评审会纪要", "审核会纪要", "审查会纪要",
+        "评审会议记录", "审核会议记录", "审查会议记录",
+    ]  
 }
 }
 
 
 # 更宽松但可控的规则,用于提升OCR/版式噪声下的命中率
 # 更宽松但可控的规则,用于提升OCR/版式噪声下的命中率
 KEYWORD_PATTERNS = {
 KEYWORD_PATTERNS = {
     "expert": [
     "expert": [
-        # 仅允许“审查”语义命中(避免把“专家评审/评估”误判为专家审查)
-        r"专家.{0,12}审查.{0,12}(意见|记录|结论|说明|建议|纪要|报告|审核表|审查表)",
-        r"审查.{0,10}专家.{0,12}(意见|记录|结论|说明|建议|纪要|报告|审核表|审查表)",
+        # === 专家审查系列 ===
+        r"专家.{0,12}审查.{0,12}(意见|记录|结论|说明|建议|纪要|报告|审核表|审查表|结论表)",
+        r"审查.{0,10}专家.{0,12}(意见|记录|结论|说明|建议|纪要|报告|审核表|审查表|结论表)",
+        
+        # === 专家评审系列 ===
+        r"专家.{0,12}评审.{0,12}(意见|记录|结论|说明|建议|纪要|报告|审核表|评审表|结论表)",
+        r"评审.{0,10}专家.{0,12}(意见|记录|结论|说明|建议|纪要|报告|审核表|评审表|结论表)",
+        
+        # === 专家论证系列(危大工程常见)===
+        r"专家.{0,12}论证.{0,12}(意见|记录|结论|说明|建议|纪要|报告|审核表|论证表|结论表)",
+        r"论证.{0,10}专家.{0,12}(意见|记录|结论|说明|建议|纪要|报告|审核表|论证表|结论表)",
+        
+        # === 专家组系列 ===
+        r"专家组.{0,10}(审查|评审|论证).{0,10}(意见|记录|结论|说明|建议|纪要|报告)",
+        r"专家组.{0,10}(意见|建议|结论)",
+        
+        # === 专家咨询系列 ===
+        r"专家.{0,10}咨询.{0,10}(意见|建议|记录|结论|说明)",
+        r"咨询.{0,8}专家.{0,10}(意见|建议|记录|结论|说明)",
+        
+        # === 专家签字/签名系列 ===
+        r"专家.{0,8}(签字|签名|签章|签认|确认)",
+        r"(签字|签名|签章).{0,8}专家",
+        
+        # === 专家意见回复/修改系列 ===
+        r"专家.{0,8}意见.{0,8}(回复|修改|整改).{0,8}(表|单|记录)",
+        r"(审查|评审|论证).{0,8}意见.{0,8}(回复|修改|整改).{0,8}(表|单|记录)",
+        r"(审查|评审|论证).{0,8}意见.{0,8}回复",
+        r"对.{0,10}专家.{0,10}意见.{0,10}回复",
+        
+        # === 专家通用表述(兜底)===
+        r"专家.{0,15}(意见|建议|结论)",
+        r"专家.{0,8}(名单|签到)",
+        r"专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+        
+        # === 常见简写变体 ===
+        r"专家.{0,8}(审查|评审|论证).{0,8}结论",
+        r"专家.{0,10}意见.{0,10}(及|和).{0,10}(回复|修改|整改)",
     ],
     ],
+    
     "company": [
     "company": [
-        r"(公司|集团).{0,10}(评审|审核|审查).{0,10}(意见|说明|记录)",
-        r"(公司|集团).{0,10}施工方案.{0,10}(审核|审查|评审).{0,10}(意见|说明|记录)",
+        # === 公司/集团系列 ===
+        r"(公司|集团).{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+        r"(公司|集团).{0,10}施工方案.{0,10}(审核|审查|评审).{0,10}(意见|说明|记录|纪要|报告)",
+        r"(公司|集团).{0,10}(审批|会签).{0,10}意见",
+        r"(公司|集团).{0,10}技术.{0,8}(负责人|主管).{0,8}意见",
+        
+        # === 企业系列 ===
+        r"企业.{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+        r"企业.{0,10}技术.{0,8}(负责人|主管).{0,8}(意见|审核)",
+        
+        # === 项目部系列 ===
+        r"项目(部)?.{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+        r"项目(部)?.{0,10}经理.{0,8}(意见|审核)",
+        r"项目(部)?.{0,10}技术.{0,8}(负责人|主管).{0,8}(意见|审核)",
+        
+        # === 总包/总承包系列 ===
+        r"(总包|总承包).{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+        r"(总包|总承包).{0,10}单位.{0,8}意见",
+        
+        # === 内部评审系列 ===
+        r"内部.{0,10}(评审|审核|审查|审批|会签).{0,10}(意见|说明|记录|纪要|报告)",
+        
+        # === 监理系列 ===
+        r"监理.{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+        r"监理.{0,8}工程师.{0,8}意见",
+        r"总监理.{0,8}工程师.{0,8}意见",
+        
+        # === 施工方案评审系列 ===
+        r"施工方案.{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+        r"方案.{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+        
+        # === 会议/纪要系列 ===
+        r"(评审|审核|审查).{0,8}会.{0,8}(纪要|记录)",
+        r"(评审|审核|审查).{0,8}会议.{0,8}(纪要|记录)",
+        r"(评审|审核|审查).{0,8}纪要",
+        
+        # === 通用兜底 ===
+        r"(公司|集团|企业).{0,8}意见",
+        r"(公司|集团|企业).{0,8}技术.{0,8}意见",
     ],
     ],
 }
 }
 
 
@@ -141,14 +321,12 @@ def _extract_pdf_text_worker(pdf_path_str: str, max_pages: int, result_queue):
         result_queue.put({"ok": False, "error": str(e)})
         result_queue.put({"ok": False, "error": str(e)})
 
 
 
 
-def extract_text_with_pages(pdf_path: Path, max_pages: int, timeout_seconds: int = 30) -> str:
+def extract_text_with_pages(pdf_path: Path, max_pages: int, timeout_seconds: int = 30, verbose: bool = True) -> str:
     """从PDF文件中提取文本内容(指定页数),带硬超时机制(子进程)"""
     """从PDF文件中提取文本内容(指定页数),带硬超时机制(子进程)"""
-    import multiprocessing as mp
-
     text = ""
     text = ""
     file_size_mb = pdf_path.stat().st_size / (1024 * 1024)
     file_size_mb = pdf_path.stat().st_size / (1024 * 1024)
 
 
-    if file_size_mb > 50:
+    if verbose and file_size_mb > 50:
         print(f"\n      [大文件 {file_size_mb:.1f}MB,读取中...]", end="", flush=True)
         print(f"\n      [大文件 {file_size_mb:.1f}MB,读取中...]", end="", flush=True)
 
 
     try:
     try:
@@ -165,24 +343,27 @@ def extract_text_with_pages(pdf_path: Path, max_pages: int, timeout_seconds: int
         if process.is_alive():
         if process.is_alive():
             process.terminate()
             process.terminate()
             process.join(timeout=2)
             process.join(timeout=2)
-            print(f" [超时跳过]", end="", flush=True)
+            if verbose:
+                print(f" [超时跳过]", end="", flush=True)
             return ""
             return ""
 
 
         result = result_queue.get_nowait() if not result_queue.empty() else {"ok": False, "error": "子进程无返回结果"}
         result = result_queue.get_nowait() if not result_queue.empty() else {"ok": False, "error": "子进程无返回结果"}
 
 
         if not result.get("ok"):
         if not result.get("ok"):
             error_msg = result.get("error", "")
             error_msg = result.get("error", "")
-            if "PyCryptodome" in error_msg or "AES" in error_msg:
-                print(f" [加密PDF需PyCryptodome]", end="", flush=True)
-            elif "Password" in error_msg or "password" in error_msg:
-                print(f" [PDF加密需要密码]", end="", flush=True)
-            else:
-                print(f" [读取失败]", end="", flush=True)
+            if verbose:
+                if "PyCryptodome" in error_msg or "AES" in error_msg:
+                    print(f" [加密PDF需PyCryptodome]", end="", flush=True)
+                elif "Password" in error_msg or "password" in error_msg:
+                    print(f" [PDF加密需要密码]", end="", flush=True)
+                else:
+                    print(f" [读取失败]", end="", flush=True)
             return ""
             return ""
 
 
         text = result.get("text", "")
         text = result.get("text", "")
     except Exception:
     except Exception:
-        print(f" [读取错误]", end="", flush=True)
+        if verbose:
+            print(f" [读取错误]", end="", flush=True)
 
 
     return text
     return text
 
 
@@ -206,7 +387,7 @@ def check_pdf_contains_keywords_with_pages(pdf_path: Path, keywords: List[str],
 def extract_docx_text(docx_path: Path, max_pages: int) -> str:
 def extract_docx_text(docx_path: Path, max_pages: int) -> str:
     """提取DOCX文本(按段落近似页数限制)"""
     """提取DOCX文本(按段落近似页数限制)"""
     try:
     try:
-        # Word没有固定分页信息,这里用“每页约40段”进行近似截断,避免读取过慢。
+        # Word没有固定分页信息,这里用"每页约40段"进行近似截断,避免读取过慢。
         approx_max_paragraphs = max(1, max_pages * 40)
         approx_max_paragraphs = max(1, max_pages * 40)
         doc = Document(str(docx_path))
         doc = Document(str(docx_path))
         texts = []
         texts = []
@@ -220,15 +401,16 @@ def extract_docx_text(docx_path: Path, max_pages: int) -> str:
         return ""
         return ""
 
 
 
 
-def check_file_contains_keywords_with_pages(file_path: Path, keywords: List[str], max_pages: int) -> Tuple[bool, str]:
+def check_file_contains_keywords_with_pages(file_path: Path, keywords: List[str], max_pages: int, verbose: bool = True) -> Tuple[bool, str]:
     """检查文件(PDF/DOCX)指定范围内是否包含任一关键词"""
     """检查文件(PDF/DOCX)指定范围内是否包含任一关键词"""
     suffix = file_path.suffix.lower()
     suffix = file_path.suffix.lower()
     if suffix == ".pdf":
     if suffix == ".pdf":
-        text = extract_text_with_pages(file_path, max_pages=max_pages)
+        text = extract_text_with_pages(file_path, max_pages=max_pages, verbose=verbose)
     elif suffix == ".docx":
     elif suffix == ".docx":
         text = extract_docx_text(file_path, max_pages=max_pages)
         text = extract_docx_text(file_path, max_pages=max_pages)
     elif suffix == ".doc":
     elif suffix == ".doc":
-        print(" [DOC暂不支持,跳过]", end="", flush=True)
+        if verbose:
+            print(" [DOC暂不支持,跳过]", end="", flush=True)
         return False, ""
         return False, ""
     else:
     else:
         return False, ""
         return False, ""
@@ -285,7 +467,7 @@ def get_newest_file(files: List[Path]) -> Optional[Path]:
     return max(files, key=lambda f: get_file_creation_time(f))
     return max(files, key=lambda f: get_file_creation_time(f))
 
 
 
 
-def process_single_directory_phase(dir_path: Path, phase: int, max_pages: int, review_type: str) -> Tuple[bool, str, Optional[Path], List[Path], str]:
+def process_single_directory_phase(dir_path: Path, phase: int, max_pages: int, review_type: str, verbose: bool = True) -> Tuple[bool, str, Optional[Path], List[Path], str]:
     """
     """
     处理单个目录的文件筛选(指定阶段和评审类型)
     处理单个目录的文件筛选(指定阶段和评审类型)
 
 
@@ -294,6 +476,7 @@ def process_single_directory_phase(dir_path: Path, phase: int, max_pages: int, r
         phase: 阶段(1或2)
         phase: 阶段(1或2)
         max_pages: 检查的最大页数
         max_pages: 检查的最大页数
         review_type: 评审类型 ('expert' 或 'company')
         review_type: 评审类型 ('expert' 或 'company')
+        verbose: 是否打印详细日志
 
 
     Returns:
     Returns:
         (是否成功, 状态信息, 选中的文件路径, 所有包含关键词的文件列表, 匹配到的关键词)
         (是否成功, 状态信息, 选中的文件路径, 所有包含关键词的文件列表, 匹配到的关键词)
@@ -314,20 +497,24 @@ def process_single_directory_phase(dir_path: Path, phase: int, max_pages: int, r
     matched_in_top5 = []
     matched_in_top5 = []
     matched_keyword_top5 = ""
     matched_keyword_top5 = ""
     phase_str = f"【第{phase}阶段-{review_type}】"
     phase_str = f"【第{phase}阶段-{review_type}】"
-    print(f"\n  {phase_str} 目录: {dir_path.name} - 共{len(candidate_files)}个文件,检查前{max_pages}页,先检查Top5...")
+    if verbose:
+        print(f"\n  {phase_str} 目录: {dir_path.name} - 共{len(candidate_files)}个文件,检查前{max_pages}页,先检查Top5...")
 
 
     for file_path in top5_files:
     for file_path in top5_files:
         size_mb = get_file_size(file_path) / (1024 * 1024)
         size_mb = get_file_size(file_path) / (1024 * 1024)
-        print(f"    检查Top5: {file_path.name[:30]}... (大小: {size_mb:.2f}MB)", end="", flush=True)
+        if verbose:
+            print(f"    检查Top5: {file_path.name[:30]}... (大小: {size_mb:.2f}MB)", end="", flush=True)
 
 
-        is_match, matched_kw = check_file_contains_keywords_with_pages(file_path, keywords, max_pages)
+        is_match, matched_kw = check_file_contains_keywords_with_pages(file_path, keywords, max_pages, verbose=verbose)
         if is_match:
         if is_match:
-            print(f" -> ✓ 包含关键词[{matched_kw}]")
+            if verbose:
+                print(f" -> ✓ 包含关键词[{matched_kw}]")
             matched_in_top5.append(file_path)
             matched_in_top5.append(file_path)
             if not matched_keyword_top5:
             if not matched_keyword_top5:
                 matched_keyword_top5 = matched_kw
                 matched_keyword_top5 = matched_kw
         else:
         else:
-            print(" -> ✗ 无关键词")
+            if verbose:
+                print(" -> ✗ 无关键词")
 
 
     # Top5中找到匹配
     # Top5中找到匹配
     if matched_in_top5:
     if matched_in_top5:
@@ -336,7 +523,8 @@ def process_single_directory_phase(dir_path: Path, phase: int, max_pages: int, r
             return True, f"Top5中找到{len(matched_in_top5)}个匹配,选择最新", selected, matched_in_top5, matched_keyword_top5
             return True, f"Top5中找到{len(matched_in_top5)}个匹配,选择最新", selected, matched_in_top5, matched_keyword_top5
         return True, "Top5中找到匹配文件", selected, matched_in_top5, matched_keyword_top5
         return True, "Top5中找到匹配文件", selected, matched_in_top5, matched_keyword_top5
 
 
-    print(f"    Top5未找到,扩展到其余{len(candidate_files) - len(top5_files)}个文件...")
+    if verbose:
+        print(f"    Top5未找到,扩展到其余{len(candidate_files) - len(top5_files)}个文件...")
 
 
     # 检查其余文件
     # 检查其余文件
     other_files = [f for f in candidate_files if f not in top5_files]
     other_files = [f for f in candidate_files if f not in top5_files]
@@ -345,16 +533,19 @@ def process_single_directory_phase(dir_path: Path, phase: int, max_pages: int, r
 
 
     for file_path in other_files:
     for file_path in other_files:
         size_mb = get_file_size(file_path) / (1024 * 1024)
         size_mb = get_file_size(file_path) / (1024 * 1024)
-        print(f"    检查其他: {file_path.name[:30]}... (大小: {size_mb:.2f}MB)", end="", flush=True)
+        if verbose:
+            print(f"    检查其他: {file_path.name[:30]}... (大小: {size_mb:.2f}MB)", end="", flush=True)
 
 
-        is_match, matched_kw = check_file_contains_keywords_with_pages(file_path, keywords, max_pages)
+        is_match, matched_kw = check_file_contains_keywords_with_pages(file_path, keywords, max_pages, verbose=verbose)
         if is_match:
         if is_match:
-            print(f" -> ✓ 包含关键词[{matched_kw}]")
+            if verbose:
+                print(f" -> ✓ 包含关键词[{matched_kw}]")
             matched_in_others.append(file_path)
             matched_in_others.append(file_path)
             if not matched_keyword_others:
             if not matched_keyword_others:
                 matched_keyword_others = matched_kw
                 matched_keyword_others = matched_kw
         else:
         else:
-            print(" -> ✗ 无关键词")
+            if verbose:
+                print(" -> ✗ 无关键词")
 
 
     if matched_in_others:
     if matched_in_others:
         selected = get_newest_file(matched_in_others)
         selected = get_newest_file(matched_in_others)
@@ -374,14 +565,14 @@ def copy_file_to_output(file_path: Path, output_dir: Path, new_name: str) -> Pat
 
 
 
 
 def process_review_type(dir_path: Path, phase: int, max_pages: int, review_type: str, 
 def process_review_type(dir_path: Path, phase: int, max_pages: int, review_type: str, 
-                        output_dir: Path, dir_id: str, stats: dict, results: list) -> Tuple[bool, Optional[Path]]:
+                        output_dir: Path, dir_id: str, verbose: bool = True) -> Tuple[bool, Optional[Path], Optional[Dict]]:
     """处理单个评审类型的筛选和输出
     """处理单个评审类型的筛选和输出
     
     
     Returns:
     Returns:
-        (是否成功, 选中的文件路径)
+        (是否成功, 选中的文件路径, 结果记录字典或None)
     """
     """
     success, message, selected_file, all_matched, matched_kw = process_single_directory_phase(
     success, message, selected_file, all_matched, matched_kw = process_single_directory_phase(
-        dir_path, phase=phase, max_pages=max_pages, review_type=review_type
+        dir_path, phase=phase, max_pages=max_pages, review_type=review_type, verbose=verbose
     )
     )
 
 
     if success and selected_file:
     if success and selected_file:
@@ -396,9 +587,10 @@ def process_review_type(dir_path: Path, phase: int, max_pages: int, review_type:
         new_filename = f"{dir_id}_{selected_file.name}"
         new_filename = f"{dir_id}_{selected_file.name}"
         try:
         try:
             dest_path = copy_file_to_output(selected_file, output_dir, new_filename)
             dest_path = copy_file_to_output(selected_file, output_dir, new_filename)
-            print(f"    ✅ [{review_type}] 已输出: {new_filename}")
+            if verbose:
+                print(f"    ✅ [{review_type}] 已输出: {new_filename}")
 
 
-            results.append({
+            result_record = {
                 '目录ID': dir_id,
                 '目录ID': dir_id,
                 '评审类型': review_type,
                 '评审类型': review_type,
                 '阶段': f'第{phase}阶段',
                 '阶段': f'第{phase}阶段',
@@ -413,11 +605,12 @@ def process_review_type(dir_path: Path, phase: int, max_pages: int, review_type:
                 '原路径': str(selected_file),
                 '原路径': str(selected_file),
                 '目标路径': str(dest_path),
                 '目标路径': str(dest_path),
                 '处理时间': datetime.now().isoformat()
                 '处理时间': datetime.now().isoformat()
-            })
-            return True, selected_file
+            }
+            return True, selected_file, result_record
         except Exception as e:
         except Exception as e:
-            print(f"    ❌ [{review_type}] 复制失败: {e}")
-            results.append({
+            if verbose:
+                print(f"    ❌ [{review_type}] 复制失败: {e}")
+            result_record = {
                 '目录ID': dir_id,
                 '目录ID': dir_id,
                 '评审类型': review_type,
                 '评审类型': review_type,
                 '阶段': f'第{phase}阶段',
                 '阶段': f'第{phase}阶段',
@@ -432,11 +625,12 @@ def process_review_type(dir_path: Path, phase: int, max_pages: int, review_type:
                 '原路径': str(selected_file),
                 '原路径': str(selected_file),
                 '目标路径': '',
                 '目标路径': '',
                 '处理时间': datetime.now().isoformat()
                 '处理时间': datetime.now().isoformat()
-            })
-            return False, None
+            }
+            return False, None, result_record
     else:
     else:
-        print(f"    ❌ [{review_type}] {message}")
-        return False, None
+        if verbose:
+            print(f"    ❌ [{review_type}] {message}")
+        return False, None, None
 
 
 
 
 def get_numeric_directories(base_dir: Path) -> List[Path]:
 def get_numeric_directories(base_dir: Path) -> List[Path]:
@@ -534,10 +728,175 @@ def resolve_config_path(path_value: str, script_dir: Path) -> Path:
     return (script_dir / path).resolve()
     return (script_dir / path).resolve()
 
 
 
 
+# ==================== 多进程工作函数 ====================
+
+def process_directory_worker(args_tuple):
+    """
+    工作进程函数:处理单个目录的两种评审类型
+    
+    此函数在独立的工作进程中运行,同时处理专家评审和公司评审。
+    工作进程之间互不影响,各自独立复制文件到输出目录。
+    
+    Args:
+        args_tuple: (
+            dir_path_str,      # 目录路径字符串
+            phase,             # 阶段(1或2)
+            max_pages,         # 检查的最大页数
+            expert_output_dir_str,  # 专家评审输出目录
+            company_output_dir_str, # 公司评审输出目录
+            verbose            # 是否打印详细日志
+        )
+    
+    Returns:
+        dict: {
+            "dir_id": str,              # 目录ID
+            "expert_success": bool,     # 专家评审是否成功
+            "company_success": bool,    # 公司评审是否成功
+            "results": list,            # 结果记录列表(可能为空)
+        }
+    """
+    dir_path_str, phase, max_pages, expert_output_dir_str, company_output_dir_str, verbose = args_tuple
+    
+    dir_path = Path(dir_path_str)
+    expert_output_dir = Path(expert_output_dir_str)
+    company_output_dir = Path(company_output_dir_str)
+    dir_id = dir_path.name
+    
+    results = []
+    
+    # 处理专家评审
+    expert_success, expert_file, expert_result = process_review_type(
+        dir_path, phase=phase, max_pages=max_pages, 
+        review_type="expert", output_dir=expert_output_dir,
+        dir_id=dir_id, verbose=verbose
+    )
+    if expert_result:
+        results.append(expert_result)
+    
+    # 处理公司评审
+    company_success, company_file, company_result = process_review_type(
+        dir_path, phase=phase, max_pages=max_pages, 
+        review_type="company", output_dir=company_output_dir,
+        dir_id=dir_id, verbose=verbose
+    )
+    if company_result:
+        results.append(company_result)
+    
+    return {
+        "dir_id": dir_id,
+        "expert_success": expert_success,
+        "company_success": company_success,
+        "results": results,
+    }
+
+
+def run_phase_concurrently(dirs_to_process: List[Path], phase: int, max_pages: int,
+                           expert_output_dir: Path, company_output_dir: Path,
+                           temp_dir: Path, numeric_dirs: List[Path],
+                           stats: Dict, processed_dirs: Set, 
+                           phase1_no_match_expert: Set, phase1_no_match_company: Set,
+                           verbose: bool = False, cache_every: int = 10) -> Tuple[Dict, Set, Set]:
+    """
+    并发运行一个阶段的筛选
+    
+    Args:
+        dirs_to_process: 待处理的目录列表
+        phase: 阶段(1或2)
+        max_pages: 检查的最大页数
+        expert_output_dir: 专家评审输出目录
+        company_output_dir: 公司评审输出目录
+        temp_dir: 缓存目录
+        numeric_dirs: 所有数字目录(用于统计总数)
+        stats: 统计字典(会被修改)
+        processed_dirs: 已处理目录集合(会被修改)
+        phase1_no_match_expert: 一级未匹配专家评审的目录集合(会被修改)
+        phase1_no_match_company: 一级未匹配公司评审的目录集合(会被修改)
+        verbose: 工作进程是否打印详细日志
+        cache_every: 每处理多少个目录保存一次缓存
+    
+    Returns:
+        (stats, phase1_no_match_expert, phase1_no_match_company)
+    """
+    total = len(dirs_to_process)
+    completed = 0
+    all_results = []
+    
+    # 构建参数列表
+    args_list = []
+    for dir_path in dirs_to_process:
+        args_list.append((
+            str(dir_path),
+            phase,
+            max_pages,
+            str(expert_output_dir),
+            str(company_output_dir),
+            verbose
+        ))
+    
+    print(f"\n  启动 {NUM_WORKERS} 个并发进程处理 {total} 个目录...")
+    print(f"  工作模式: {'详细日志' if verbose else '静默模式(仅显示进度)'}\n")
+    
+    # 使用进程池并发处理
+    with mp.Pool(processes=NUM_WORKERS) as pool:
+        # imap_unordered 不保证顺序,但返回速度最快
+        for result in pool.imap_unordered(process_directory_worker, args_list):
+            dir_id = result["dir_id"]
+            
+            # 更新结果列表
+            all_results.extend(result["results"])
+            
+            # 更新统计
+            if result["expert_success"]:
+                stats["expert_success_count"] = stats.get("expert_success_count", 0) + 1
+                # 如果之前标记为未匹配,现在成功了,移除标记
+                phase1_no_match_expert.discard(dir_id)
+            else:
+                # 只有在一级筛选时才添加未匹配标记
+                if phase == 1:
+                    phase1_no_match_expert.add(dir_id)
+            
+            if result["company_success"]:
+                stats["company_success_count"] = stats.get("company_success_count", 0) + 1
+                phase1_no_match_company.discard(dir_id)
+            else:
+                if phase == 1:
+                    phase1_no_match_company.add(dir_id)
+            
+            # 标记为已处理
+            processed_dirs.add(dir_id)
+            completed += 1
+            
+            # 显示进度
+            progress = completed / total * 100
+            print(f"\r  进度: {completed}/{total} ({progress:.1f}%) | "
+                  f"专家成功: {stats.get('expert_success_count', 0)} | "
+                  f"公司成功: {stats.get('company_success_count', 0)} | "
+                  f"当前: {dir_id}", end="", flush=True)
+            
+            # 定期保存缓存
+            if completed % cache_every == 0 or completed == total:
+                cache_data = {
+                    "processed_dirs": list(processed_dirs),
+                    "phase1_no_match_expert": list(phase1_no_match_expert),
+                    "phase1_no_match_company": list(phase1_no_match_company),
+                    "results": all_results,
+                    "stats": stats,
+                    "phase": phase,
+                    "total_directories": len(numeric_dirs)
+                }
+                save_progress_cache(temp_dir, cache_data)
+    
+    print(f"\n\n  ✅ 阶段完成!处理 {completed} 个目录")
+    print(f"     专家评审成功: {stats.get('expert_success_count', 0)} 个")
+    print(f"     公司评审成功: {stats.get('company_success_count', 0)} 个")
+    
+    return stats, phase1_no_match_expert, phase1_no_match_company
+
+
 def main():
 def main():
     """主函数"""
     """主函数"""
     print("=" * 70)
     print("=" * 70)
-    print("评审意见PDF筛选脚本 - 二级筛选+断点续传版")
+    print("评审意见PDF筛选脚本 - 多进程并发版+测试模式")
     print("=" * 70)
     print("=" * 70)
 
 
     # 按文件首部配置组装路径(不再按项目根目录拼接)
     # 按文件首部配置组装路径(不再按项目根目录拼接)
@@ -552,6 +911,8 @@ def main():
     print(f"  专家评审输出目录: {expert_output_dir}")
     print(f"  专家评审输出目录: {expert_output_dir}")
     print(f"  公司评审输出目录: {company_output_dir}")
     print(f"  公司评审输出目录: {company_output_dir}")
     print(f"  缓存目录: {temp_dir}")
     print(f"  缓存目录: {temp_dir}")
+    print(f"  并发进程数: {NUM_WORKERS}")
+    print(f"  测试模式: {'是(抽取5个目录)' if TEST_MODE else '否'}")
     print(f"  专家评审关键词: {KEYWORDS['expert']}")
     print(f"  专家评审关键词: {KEYWORDS['expert']}")
     print(f"  公司评审关键词: {KEYWORDS['company']}")
     print(f"  公司评审关键词: {KEYWORDS['company']}")
     print(f"  一级筛选: 前{PHASE_1_PAGES}页")
     print(f"  一级筛选: 前{PHASE_1_PAGES}页")
@@ -575,23 +936,50 @@ def main():
         sys.exit(1)
         sys.exit(1)
 
 
     print(f"  找到 {len(numeric_dirs)} 个数字编号子目录")
     print(f"  找到 {len(numeric_dirs)} 个数字编号子目录")
-
-    # 加载缓存(断点续传)
-    print(f"\n【步骤 2/6】加载进度缓存...")
-    cache = load_progress_cache(temp_dir)
+    
+    # ==================== 测试模式:随机抽取 ====================
+    if TEST_MODE:
+        print(f"\n【测试模式】随机抽取 {TEST_SAMPLE_SIZE} 个目录进行测试...")
+        if len(numeric_dirs) <= TEST_SAMPLE_SIZE:
+            test_dirs = numeric_dirs
+            print(f"  目录总数不足 {TEST_SAMPLE_SIZE},测试全部 {len(numeric_dirs)} 个目录")
+        else:
+            # 使用固定随机种子,确保可复现
+            random.seed(42)
+            test_dirs = random.sample(numeric_dirs, TEST_SAMPLE_SIZE)
+            test_dirs.sort(key=lambda d: int(d.name))  # 按数字排序,方便查看
+        
+        numeric_dirs = test_dirs
+        print(f"  测试目录: {[d.name for d in test_dirs]}")
+        # 测试模式不加载缓存,不进入二级筛选
+        cache = {
+            "processed_dirs": [],
+            "phase1_no_match_expert": [],
+            "phase1_no_match_company": [],
+            "results": [],
+            "stats": {},
+            "phase": 1,
+            "last_update": None
+        }
+    else:
+        # 加载缓存(断点续传)
+        print(f"\n【步骤 2/6】加载进度缓存...")
+        cache = load_progress_cache(temp_dir)
+    
     processed_dirs = set(cache.get("processed_dirs", []))
     processed_dirs = set(cache.get("processed_dirs", []))
     phase1_no_match_expert = set(cache.get("phase1_no_match_expert", []))
     phase1_no_match_expert = set(cache.get("phase1_no_match_expert", []))
     phase1_no_match_company = set(cache.get("phase1_no_match_company", []))
     phase1_no_match_company = set(cache.get("phase1_no_match_company", []))
     current_phase = cache.get("phase", 1)
     current_phase = cache.get("phase", 1)
+    all_results = cache.get("results", [])
 
 
-    if processed_dirs:
+    if processed_dirs and not TEST_MODE:
         print(f"  发现缓存:")
         print(f"  发现缓存:")
         print(f"    - 已处理: {len(processed_dirs)} 个目录")
         print(f"    - 已处理: {len(processed_dirs)} 个目录")
         print(f"    - 专家评审一级未找到: {len(phase1_no_match_expert)} 个目录")
         print(f"    - 专家评审一级未找到: {len(phase1_no_match_expert)} 个目录")
         print(f"    - 公司评审一级未找到: {len(phase1_no_match_company)} 个目录")
         print(f"    - 公司评审一级未找到: {len(phase1_no_match_company)} 个目录")
         print(f"    - 当前阶段: 第{current_phase}阶段")
         print(f"    - 当前阶段: 第{current_phase}阶段")
     else:
     else:
-        print(f"  无缓存,将从头开始处理")
+        print(f"  {'无缓存(测试模式),将从头开始处理' if TEST_MODE else '无缓存,将从头开始处理'}")
 
 
     # ==================== 一级筛选 ====================
     # ==================== 一级筛选 ====================
     if current_phase == 1:
     if current_phase == 1:
@@ -610,8 +998,6 @@ def main():
             print(f"  公司评审输出到: {company_output_dir}")
             print(f"  公司评审输出到: {company_output_dir}")
             print()
             print()
 
 
-            results = cache.get("results", [])
-
             default_stats = {
             default_stats = {
                 "total_directories": len(numeric_dirs),
                 "total_directories": len(numeric_dirs),
                 "expert_success_count": 0,
                 "expert_success_count": 0,
@@ -630,55 +1016,23 @@ def main():
                 if key not in stats:
                 if key not in stats:
                     stats[key] = value
                     stats[key] = value
 
 
-            expert_success_count = 0
-            company_success_count = 0
-            total_to_process = len(dirs_to_process)
-
-            for idx, dir_path in enumerate(dirs_to_process):
-                dir_id = dir_path.name
-                overall_idx = len(processed_dirs) + idx + 1
-
-                print(f"\n[{overall_idx}/{len(numeric_dirs)}] 当前目录: {dir_id}")
-                print_progress_bar(idx + 1, total_to_process)
-
-                # 处理专家评审
-                expert_success, expert_file = process_review_type(
-                    dir_path, phase=1, max_pages=PHASE_1_PAGES, 
-                    review_type="expert", output_dir=expert_output_dir,
-                    dir_id=dir_id, stats=stats, results=results
-                )
-                if expert_success:
-                    stats["expert_success_count"] += 1
-                    expert_success_count += 1
-                else:
-                    phase1_no_match_expert.add(dir_id)
-
-                # 处理公司评审
-                company_success, company_file = process_review_type(
-                    dir_path, phase=1, max_pages=PHASE_1_PAGES, 
-                    review_type="company", output_dir=company_output_dir,
-                    dir_id=dir_id, stats=stats, results=results
-                )
-                if company_success:
-                    stats["company_success_count"] += 1
-                    company_success_count += 1
-                else:
-                    phase1_no_match_company.add(dir_id)
-
-                processed_dirs.add(dir_id)
-
-                if (idx + 1) % 10 == 0 or idx == len(dirs_to_process) - 1:
-                    cache_data = {
-                        "processed_dirs": list(processed_dirs),
-                        "phase1_no_match_expert": list(phase1_no_match_expert),
-                        "phase1_no_match_company": list(phase1_no_match_company),
-                        "results": results,
-                        "stats": stats,
-                        "phase": 1,
-                        "total_directories": len(numeric_dirs)
-                    }
-                    save_progress_cache(temp_dir, cache_data)
-                    print(f"\n    💾 进度已缓存 (已处理 {len(processed_dirs)}/{len(numeric_dirs)} 个目录)")
+            # 并发处理一级筛选
+            # 在测试模式下使用详细日志(verbose=True),正式运行使用静默模式(verbose=False)
+            verbose_mode = TEST_MODE  # 测试模式打印详细日志,正式模式静默
+            stats, phase1_no_match_expert, phase1_no_match_company = run_phase_concurrently(
+                dirs_to_process, phase=1, max_pages=PHASE_1_PAGES,
+                expert_output_dir=expert_output_dir, company_output_dir=company_output_dir,
+                temp_dir=temp_dir, numeric_dirs=numeric_dirs,
+                stats=stats, processed_dirs=processed_dirs,
+                phase1_no_match_expert=phase1_no_match_expert,
+                phase1_no_match_company=phase1_no_match_company,
+                verbose=verbose_mode,
+                cache_every=10
+            )
+            
+            # 更新结果列表
+            cache = load_progress_cache(temp_dir)
+            all_results = cache.get("results", [])
 
 
             print(f"\n\n【一级筛选完成】")
             print(f"\n\n【一级筛选完成】")
             print(f"  专家评审成功: {stats['expert_success_count']} 个")
             print(f"  专家评审成功: {stats['expert_success_count']} 个")
@@ -686,6 +1040,23 @@ def main():
             print(f"  专家评审未找到: {len(phase1_no_match_expert)} 个")
             print(f"  专家评审未找到: {len(phase1_no_match_expert)} 个")
             print(f"  公司评审未找到: {len(phase1_no_match_company)} 个")
             print(f"  公司评审未找到: {len(phase1_no_match_company)} 个")
 
 
+            # 测试模式下直接退出,不进行二级筛选和保存
+            if TEST_MODE:
+                print(f"\n{'='*70}")
+                print("【测试模式完成】")
+                print(f"  共测试 {len(dirs_to_process)} 个目录")
+                print(f"  专家评审成功: {stats['expert_success_count']} 个")
+                print(f"  公司评审成功: {stats['company_success_count']} 个")
+                print(f"  测试结果已保存到缓存,可查看输出目录确认文件")
+                print("="*70)
+                
+                # 测试模式也保存最终结果
+                _save_final_results(
+                    temp_dir, expert_output_dir, company_output_dir,
+                    numeric_dirs, all_results, stats, processed_dirs
+                )
+                return
+
             # 询问是否进行二级筛选
             # 询问是否进行二级筛选
             total_no_match = len(phase1_no_match_expert.union(phase1_no_match_company))
             total_no_match = len(phase1_no_match_expert.union(phase1_no_match_company))
             print(f"\n{'='*70}")
             print(f"\n{'='*70}")
@@ -720,7 +1091,7 @@ def main():
                     "processed_dirs": list(processed_dirs),
                     "processed_dirs": list(processed_dirs),
                     "phase1_no_match_expert": list(phase1_no_match_expert),
                     "phase1_no_match_expert": list(phase1_no_match_expert),
                     "phase1_no_match_company": list(phase1_no_match_company),
                     "phase1_no_match_company": list(phase1_no_match_company),
-                    "results": results,
+                    "results": all_results,
                     "stats": stats,
                     "stats": stats,
                     "phase": 2,
                     "phase": 2,
                     "total_directories": len(numeric_dirs)
                     "total_directories": len(numeric_dirs)
@@ -735,7 +1106,7 @@ def main():
 
 
         # 重新加载以获取最新状态
         # 重新加载以获取最新状态
         cache = load_progress_cache(temp_dir)
         cache = load_progress_cache(temp_dir)
-        results = cache.get("results", [])
+        all_results = cache.get("results", [])
         stats = cache.get("stats", {})
         stats = cache.get("stats", {})
         phase1_no_match_expert = set(cache.get("phase1_no_match_expert", []))
         phase1_no_match_expert = set(cache.get("phase1_no_match_expert", []))
         phase1_no_match_company = set(cache.get("phase1_no_match_company", []))
         phase1_no_match_company = set(cache.get("phase1_no_match_company", []))
@@ -743,7 +1114,9 @@ def main():
         # 获取需要二级筛选的目录(专家评审或公司评审任一未找到)
         # 获取需要二级筛选的目录(专家评审或公司评审任一未找到)
         phase2_dirs_expert = [d for d in numeric_dirs if d.name in phase1_no_match_expert]
         phase2_dirs_expert = [d for d in numeric_dirs if d.name in phase1_no_match_expert]
         phase2_dirs_company = [d for d in numeric_dirs if d.name in phase1_no_match_company]
         phase2_dirs_company = [d for d in numeric_dirs if d.name in phase1_no_match_company]
-        all_phase2_dirs = set(phase2_dirs_expert + phase2_dirs_company)
+        all_phase2_dirs = list(set(phase2_dirs_expert + phase2_dirs_company))
+        # 按数字排序
+        all_phase2_dirs.sort(key=lambda d: int(d.name))
 
 
         if not all_phase2_dirs:
         if not all_phase2_dirs:
             print(f"\n  没有需要二级筛选的目录")
             print(f"\n  没有需要二级筛选的目录")
@@ -751,84 +1124,49 @@ def main():
             print(f"\n【步骤 4/6】二级筛选处理...")
             print(f"\n【步骤 4/6】二级筛选处理...")
             print(f"  专家评审需二级筛选: {len(phase2_dirs_expert)} 个目录")
             print(f"  专家评审需二级筛选: {len(phase2_dirs_expert)} 个目录")
             print(f"  公司评审需二级筛选: {len(phase2_dirs_company)} 个目录")
             print(f"  公司评审需二级筛选: {len(phase2_dirs_company)} 个目录")
-
-            expert_phase2_success = 0
-            company_phase2_success = 0
-
-            # 处理专家评审二级筛选
-            if phase2_dirs_expert:
-                print(f"\n  --- 专家评审二级筛选 ---")
-                for idx, dir_path in enumerate(phase2_dirs_expert):
-                    dir_id = dir_path.name
-                    print(f"\n[{idx+1}/{len(phase2_dirs_expert)}] 专家评审二级筛选: {dir_id}")
-
-                    success, selected_file = process_review_type(
-                        dir_path, phase=2, max_pages=PHASE_2_PAGES, 
-                        review_type="expert", output_dir=expert_output_dir,
-                        dir_id=dir_id, stats=stats, results=results
-                    )
-                    if success:
-                        stats["expert_phase2_success_count"] = stats.get("expert_phase2_success_count", 0) + 1
-                        expert_phase2_success += 1
-                    phase1_no_match_expert.discard(dir_id)
-
-                    if (idx + 1) % 10 == 0 or idx == len(phase2_dirs_expert) - 1:
-                        cache_data = {
-                            "processed_dirs": list(processed_dirs),
-                            "phase1_no_match_expert": list(phase1_no_match_expert),
-                            "phase1_no_match_company": list(phase1_no_match_company),
-                            "results": results,
-                            "stats": stats,
-                            "phase": 2,
-                            "total_directories": len(numeric_dirs)
-                        }
-                        save_progress_cache(temp_dir, cache_data)
-
-            # 处理公司评审二级筛选
-            if phase2_dirs_company:
-                print(f"\n  --- 公司评审二级筛选 ---")
-                for idx, dir_path in enumerate(phase2_dirs_company):
-                    dir_id = dir_path.name
-                    print(f"\n[{idx+1}/{len(phase2_dirs_company)}] 公司评审二级筛选: {dir_id}")
-
-                    success, selected_file = process_review_type(
-                        dir_path, phase=2, max_pages=PHASE_2_PAGES, 
-                        review_type="company", output_dir=company_output_dir,
-                        dir_id=dir_id, stats=stats, results=results
-                    )
-                    if success:
-                        stats["company_phase2_success_count"] = stats.get("company_phase2_success_count", 0) + 1
-                        company_phase2_success += 1
-                    phase1_no_match_company.discard(dir_id)
-
-                    if (idx + 1) % 10 == 0 or idx == len(phase2_dirs_company) - 1:
-                        cache_data = {
-                            "processed_dirs": list(processed_dirs),
-                            "phase1_no_match_expert": list(phase1_no_match_expert),
-                            "phase1_no_match_company": list(phase1_no_match_company),
-                            "results": results,
-                            "stats": stats,
-                            "phase": 2,
-                            "total_directories": len(numeric_dirs)
-                        }
-                        save_progress_cache(temp_dir, cache_data)
+            print(f"  总计需二级筛选: {len(all_phase2_dirs)} 个目录")
+            
+            # 二级筛选也使用并发处理
+            # 注意:二级筛选时,之前已成功的目录不需要再处理
+            # 但由于 process_directory_worker 会同时处理两种类型,
+            # 已成功的类型会再次被处理(但结果相同,不会重复复制因为文件名相同会覆盖)
+            # 为了效率,我们只处理有未匹配的目录
+            
+            stats, phase1_no_match_expert, phase1_no_match_company = run_phase_concurrently(
+                all_phase2_dirs, phase=2, max_pages=PHASE_2_PAGES,
+                expert_output_dir=expert_output_dir, company_output_dir=company_output_dir,
+                temp_dir=temp_dir, numeric_dirs=numeric_dirs,
+                stats=stats, processed_dirs=processed_dirs,
+                phase1_no_match_expert=phase1_no_match_expert,
+                phase1_no_match_company=phase1_no_match_company,
+                verbose=False,  # 二级筛选使用静默模式
+                cache_every=10
+            )
+            
+            # 更新结果
+            cache = load_progress_cache(temp_dir)
+            all_results = cache.get("results", [])
 
 
             print(f"\n\n【二级筛选完成】")
             print(f"\n\n【二级筛选完成】")
-            print(f"  专家评审二级筛选成功: {expert_phase2_success} 个")
-            print(f"  公司评审二级筛选成功: {company_phase2_success} 个")
+            print(f"  专家评审二级筛选成功: {stats.get('expert_phase2_success_count', 0)} 个")
+            print(f"  公司评审二级筛选成功: {stats.get('company_phase2_success_count', 0)} 个")
 
 
         current_phase = 3
         current_phase = 3
 
 
     # ==================== 保存最终结果 ====================
     # ==================== 保存最终结果 ====================
+    _save_final_results(
+        temp_dir, expert_output_dir, company_output_dir,
+        numeric_dirs, all_results, stats, processed_dirs
+    )
+
+
+def _save_final_results(temp_dir: Path, expert_output_dir: Path, company_output_dir: Path,
+                        numeric_dirs: List[Path], results: List[Dict], stats: Dict, processed_dirs: Set):
+    """保存最终结果(Excel、JSON统计、缺失目录ID等)"""
     print(f"\n\n【步骤 5/6】保存最终结果...")
     print(f"\n\n【步骤 5/6】保存最终结果...")
     expert_output_dir.mkdir(parents=True, exist_ok=True)
     expert_output_dir.mkdir(parents=True, exist_ok=True)
     company_output_dir.mkdir(parents=True, exist_ok=True)
     company_output_dir.mkdir(parents=True, exist_ok=True)
 
 
-    # 重新加载最新结果
-    cache = load_progress_cache(temp_dir)
-    results = cache.get("results", [])
-    stats = cache.get("stats", {})
-
     # 确保所有键存在
     # 确保所有键存在
     default_keys = [
     default_keys = [
         "total_directories", "expert_success_count", "company_success_count",
         "total_directories", "expert_success_count", "company_success_count",
@@ -854,7 +1192,7 @@ def main():
     company_phase1 = stats.get("company_success_count", 0)
     company_phase1 = stats.get("company_success_count", 0)
     company_phase2 = stats.get("company_phase2_success_count", 0)
     company_phase2 = stats.get("company_phase2_success_count", 0)
 
 
-    # 基于最终结果反推“未采集到”的目录ID,避免阶段缓存集合被覆盖导致不准确
+    # 基于最终结果反推"未采集到"的目录ID,避免阶段缓存集合被覆盖导致不准确
     all_dir_ids = {d.name for d in numeric_dirs}
     all_dir_ids = {d.name for d in numeric_dirs}
     expert_success_ids = {
     expert_success_ids = {
         str(r.get("目录ID"))
         str(r.get("目录ID"))
@@ -877,7 +1215,7 @@ def main():
         f.write("\n".join(company_missing_ids))
         f.write("\n".join(company_missing_ids))
 
 
     final_stats = {
     final_stats = {
-        "start_time": cache.get("start_time", datetime.now().isoformat()),
+        "start_time": stats.get("start_time", datetime.now().isoformat()),
         "end_time": datetime.now().isoformat(),
         "end_time": datetime.now().isoformat(),
         "total_directories": len(numeric_dirs),
         "total_directories": len(numeric_dirs),
         "processed_count": len(processed_dirs),
         "processed_count": len(processed_dirs),
@@ -944,6 +1282,8 @@ def main():
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
+    # Windows 下多进程必须使用 freeze_support
+    mp.freeze_support()
     try:
     try:
         main()
         main()
     except KeyboardInterrupt:
     except KeyboardInterrupt:

+ 3112 - 0
src/app/scripts/ceshi/temp/评审筛选进度缓存_已完成.json

@@ -0,0 +1,3112 @@
+{
+  "processed_dirs": [
+    "1308",
+    "41",
+    "645",
+    "2217",
+    "2316",
+    "304",
+    "542",
+    "1725",
+    "434",
+    "1504",
+    "191",
+    "1520",
+    "1923",
+    "2091",
+    "3533",
+    "4708",
+    "1113",
+    "2023",
+    "86",
+    "1986",
+    "1633",
+    "879",
+    "230",
+    "864",
+    "1218",
+    "1597",
+    "1892",
+    "2581",
+    "2194",
+    "1448",
+    "1238",
+    "2653",
+    "18",
+    "3372",
+    "1927",
+    "1872",
+    "2233",
+    "2857",
+    "3143",
+    "1874",
+    "511",
+    "947",
+    "2483",
+    "3215",
+    "819",
+    "3861",
+    "2278",
+    "1386",
+    "2102",
+    "3667",
+    "3968",
+    "1882",
+    "2094",
+    "1035",
+    "2066",
+    "3333",
+    "1791",
+    "1202",
+    "218",
+    "1855",
+    "2056",
+    "2847",
+    "1810",
+    "2534",
+    "2724",
+    "4867",
+    "186",
+    "1672",
+    "2355",
+    "1350",
+    "1493",
+    "245",
+    "382",
+    "185",
+    "3119",
+    "1652",
+    "3294",
+    "970",
+    "933",
+    "2140",
+    "1443",
+    "3113",
+    "1511",
+    "5729",
+    "808",
+    "934",
+    "1513",
+    "471",
+    "5057",
+    "635",
+    "437",
+    "893",
+    "1730",
+    "804",
+    "1159",
+    "2148",
+    "1181",
+    "973",
+    "2609",
+    "2565",
+    "2718",
+    "3303",
+    "1427",
+    "1970",
+    "2049",
+    "94",
+    "1768",
+    "2321",
+    "911",
+    "965",
+    "164",
+    "2789",
+    "508",
+    "4019",
+    "1572",
+    "2163",
+    "835",
+    "855",
+    "254",
+    "113",
+    "1491",
+    "278",
+    "2050",
+    "2046",
+    "1755",
+    "1753",
+    "1919",
+    "4415",
+    "1548",
+    "2219",
+    "2634",
+    "357",
+    "3217",
+    "1563",
+    "615",
+    "476",
+    "1546",
+    "1582",
+    "1668",
+    "1964",
+    "1570",
+    "2083",
+    "2866",
+    "3289",
+    "2386",
+    "2076",
+    "3489",
+    "2973",
+    "2123",
+    "2639",
+    "3357",
+    "1253",
+    "2285",
+    "2132",
+    "2729",
+    "2460",
+    "3591",
+    "1070",
+    "654",
+    "1661",
+    "1922",
+    "473",
+    "3525",
+    "3026",
+    "1119",
+    "400",
+    "2222",
+    "1875",
+    "1027",
+    "2319",
+    "2092",
+    "1506",
+    "909",
+    "1916",
+    "5142",
+    "1184",
+    "3332",
+    "1910",
+    "1372",
+    "2229",
+    "1932",
+    "3695",
+    "743",
+    "2134",
+    "5728",
+    "1908",
+    "717",
+    "3398",
+    "269",
+    "123",
+    "2596",
+    "1011",
+    "1351",
+    "386",
+    "2291",
+    "2417",
+    "3417",
+    "1373",
+    "2055",
+    "1272",
+    "713",
+    "2277",
+    "2497",
+    "2980",
+    "1130",
+    "3305",
+    "1710",
+    "3463",
+    "1579",
+    "544",
+    "111",
+    "285",
+    "246",
+    "3101",
+    "872",
+    "2239",
+    "2373",
+    "2854",
+    "976",
+    "684",
+    "1999",
+    "2168",
+    "2697",
+    "853",
+    "641",
+    "1689",
+    "2560",
+    "908",
+    "2127",
+    "3236",
+    "1153",
+    "1258",
+    "1085",
+    "4568",
+    "2087",
+    "989",
+    "2044",
+    "3162",
+    "2764",
+    "1264",
+    "4409",
+    "3155",
+    "1567",
+    "4582",
+    "2418",
+    "2016",
+    "3093",
+    "2283",
+    "1691",
+    "1848",
+    "1965",
+    "996",
+    "1581",
+    "1842",
+    "3211",
+    "823",
+    "2183",
+    "957",
+    "1602",
+    "232",
+    "396",
+    "1369",
+    "1353",
+    "1291",
+    "1519",
+    "867",
+    "1779",
+    "3809",
+    "3679",
+    "3243",
+    "1120",
+    "2977",
+    "1726",
+    "198",
+    "2737",
+    "2115",
+    "994",
+    "3423",
+    "2051",
+    "3557",
+    "1556",
+    "267",
+    "156",
+    "1178",
+    "2619",
+    "3126",
+    "4071",
+    "861",
+    "1819",
+    "2462",
+    "2669",
+    "1025",
+    "5128",
+    "3660",
+    "2187",
+    "140",
+    "1368",
+    "512",
+    "5419",
+    "1127",
+    "605",
+    "1889",
+    "736",
+    "1482",
+    "2248",
+    "566",
+    "1269",
+    "2208",
+    "3174",
+    "3747",
+    "539",
+    "977",
+    "1453",
+    "290",
+    "497",
+    "1501",
+    "1058",
+    "2251",
+    "3204",
+    "5629",
+    "3545",
+    "1860",
+    "482",
+    "984",
+    "1414",
+    "1676",
+    "2230",
+    "504",
+    "3096",
+    "599",
+    "1784",
+    "1341",
+    "958",
+    "1797",
+    "2529",
+    "5423",
+    "1837",
+    "1067",
+    "2170",
+    "2047",
+    "376",
+    "1884",
+    "2772",
+    "3350",
+    "2551",
+    "408",
+    "1920",
+    "902",
+    "5376",
+    "3182",
+    "1596",
+    "1429",
+    "1728",
+    "1888",
+    "4900",
+    "705",
+    "5643",
+    "3137",
+    "2192",
+    "3515",
+    "810",
+    "541",
+    "2842",
+    "1337",
+    "1949",
+    "1685",
+    "1760",
+    "516",
+    "767",
+    "2342",
+    "1832",
+    "1891",
+    "2206",
+    "1162",
+    "1216",
+    "5690",
+    "1176",
+    "257",
+    "609",
+    "2125",
+    "2326",
+    "354",
+    "2165",
+    "346",
+    "863",
+    "1179",
+    "1595",
+    "1309",
+    "828",
+    "1045",
+    "2211",
+    "337",
+    "1624",
+    "4834",
+    "1828",
+    "1154",
+    "1354",
+    "671",
+    "922",
+    "1928",
+    "1188",
+    "120",
+    "1894",
+    "1953",
+    "2971",
+    "4864",
+    "3362",
+    "1466",
+    "1110",
+    "2261",
+    "1609",
+    "1877",
+    "978",
+    "833",
+    "895",
+    "955",
+    "2252",
+    "785",
+    "2305",
+    "1311",
+    "1646",
+    "1223",
+    "1068",
+    "3003",
+    "251",
+    "2011",
+    "580",
+    "280",
+    "1088",
+    "4082",
+    "334",
+    "692",
+    "4731",
+    "1961",
+    "2527",
+    "417",
+    "1534",
+    "1973",
+    "844",
+    "133",
+    "640",
+    "501",
+    "1903",
+    "2263",
+    "456",
+    "2817",
+    "1549",
+    "1126",
+    "3142",
+    "181",
+    "1823",
+    "366",
+    "1914",
+    "2760",
+    "1687",
+    "551",
+    "2297",
+    "4292",
+    "2228",
+    "67",
+    "207",
+    "988",
+    "1979",
+    "575",
+    "878",
+    "1569",
+    "2372",
+    "1794",
+    "1626",
+    "4283",
+    "783",
+    "1255",
+    "5131",
+    "2801",
+    "768",
+    "3340",
+    "3229",
+    "2071",
+    "1303",
+    "5408",
+    "1680",
+    "325",
+    "1662",
+    "950",
+    "2849",
+    "1762",
+    "3430",
+    "787",
+    "2743",
+    "2714",
+    "115",
+    "699",
+    "1880",
+    "3253",
+    "505",
+    "2151",
+    "2178",
+    "51",
+    "3098",
+    "604",
+    "135",
+    "1675",
+    "2504",
+    "147",
+    "1508",
+    "1780",
+    "1583",
+    "3696",
+    "1765",
+    "3488",
+    "939",
+    "1371",
+    "3728",
+    "733",
+    "2186",
+    "807",
+    "624",
+    "4355",
+    "328",
+    "4162",
+    "2172",
+    "4209",
+    "1312",
+    "2448",
+    "1698",
+    "3033",
+    "1062",
+    "2275",
+    "2949",
+    "1406",
+    "1518",
+    "523",
+    "1477",
+    "336",
+    "3052",
+    "2312",
+    "2113",
+    "422",
+    "343",
+    "2550",
+    "2800",
+    "2394",
+    "1525",
+    "926",
+    "2010",
+    "727",
+    "2160",
+    "259",
+    "870",
+    "1001",
+    "1087",
+    "203",
+    "737",
+    "1183",
+    "840",
+    "298",
+    "494",
+    "1846",
+    "1329",
+    "3718",
+    "3775",
+    "2738",
+    "1820",
+    "1320",
+    "1349",
+    "2176",
+    "2562",
+    "1134",
+    "562",
+    "1206",
+    "262",
+    "729",
+    "2875",
+    "3683",
+    "781",
+    "1840",
+    "3697",
+    "214",
+    "3045",
+    "1994",
+    "1621",
+    "1995",
+    "5095",
+    "3194",
+    "3279",
+    "194",
+    "2898",
+    "3344",
+    "2595",
+    "1924",
+    "2762",
+    "141",
+    "587",
+    "403",
+    "2666",
+    "709",
+    "905",
+    "4885",
+    "1237",
+    "2344",
+    "4686",
+    "1331",
+    "1426",
+    "758",
+    "1158",
+    "4280",
+    "1925",
+    "1586",
+    "1918",
+    "2166",
+    "3538",
+    "1742",
+    "3326",
+    "2215",
+    "3046",
+    "1066",
+    "2019",
+    "3584",
+    "3114",
+    "159",
+    "1362",
+    "201",
+    "1138",
+    "549",
+    "407",
+    "2450",
+    "3733",
+    "383",
+    "1271",
+    "2189",
+    "820",
+    "3507",
+    "1876",
+    "2108",
+    "1751",
+    "1464",
+    "921",
+    "1822",
+    "3037",
+    "814",
+    "1056",
+    "1003",
+    "1861"
+  ],
+  "phase1_no_match_expert": [
+    "1308",
+    "41",
+    "645",
+    "2217",
+    "2316",
+    "304",
+    "542",
+    "1725",
+    "434",
+    "1504",
+    "1923",
+    "3533",
+    "2023",
+    "1986",
+    "1633",
+    "879",
+    "230",
+    "864",
+    "1597",
+    "1892",
+    "2581",
+    "2194",
+    "1448",
+    "1238",
+    "2653",
+    "18",
+    "3372",
+    "1927",
+    "1872",
+    "2233",
+    "2857",
+    "3143",
+    "1874",
+    "511",
+    "947",
+    "3215",
+    "819",
+    "2278",
+    "1386",
+    "2102",
+    "3667",
+    "3968",
+    "1882",
+    "1035",
+    "2066",
+    "1791",
+    "1202",
+    "2056",
+    "2847",
+    "1810",
+    "2534",
+    "2724",
+    "4867",
+    "186",
+    "1672",
+    "2355",
+    "1350",
+    "1493",
+    "245",
+    "382",
+    "185",
+    "3119",
+    "3294",
+    "970",
+    "933",
+    "2140",
+    "1443",
+    "3113",
+    "1511",
+    "5729",
+    "808",
+    "934",
+    "1513",
+    "5057",
+    "635",
+    "437",
+    "893",
+    "804",
+    "1159",
+    "2148",
+    "1181",
+    "973",
+    "2565",
+    "2718",
+    "3303",
+    "1427",
+    "1970",
+    "2049",
+    "94",
+    "1768",
+    "2321",
+    "911",
+    "965",
+    "164",
+    "2789",
+    "4019",
+    "1572",
+    "2163",
+    "835",
+    "855",
+    "254",
+    "113",
+    "1491",
+    "278",
+    "2050",
+    "2046",
+    "1755",
+    "1753",
+    "1919",
+    "4415",
+    "1548",
+    "2219",
+    "2634",
+    "357",
+    "3217",
+    "1563",
+    "615",
+    "476",
+    "1546",
+    "1582",
+    "1668",
+    "1964",
+    "1570",
+    "2866",
+    "3289",
+    "2386",
+    "2076",
+    "3489",
+    "2973",
+    "2639",
+    "3357",
+    "1253",
+    "2285",
+    "2132",
+    "2729",
+    "3591",
+    "1070",
+    "654",
+    "1922",
+    "1661",
+    "473",
+    "3026",
+    "1119",
+    "400",
+    "1875",
+    "1027",
+    "2319",
+    "2092",
+    "1506",
+    "909",
+    "1916",
+    "5142",
+    "1184",
+    "1910",
+    "1372",
+    "1932",
+    "3695",
+    "743",
+    "5728",
+    "1908",
+    "717",
+    "3398",
+    "1011",
+    "1351",
+    "386",
+    "2291",
+    "2417",
+    "3417",
+    "1373",
+    "2055",
+    "1272",
+    "713",
+    "2277",
+    "2497",
+    "2980",
+    "1130",
+    "3305",
+    "3463",
+    "1579",
+    "544",
+    "111",
+    "285",
+    "246",
+    "872",
+    "2239",
+    "2854",
+    "976",
+    "684",
+    "1999",
+    "2168",
+    "2697",
+    "641",
+    "1689",
+    "2560",
+    "908",
+    "2127",
+    "3236",
+    "1153",
+    "1258",
+    "1085",
+    "3162",
+    "989",
+    "2044",
+    "2764",
+    "1264",
+    "4409",
+    "3155",
+    "1567",
+    "4582",
+    "2418",
+    "2016",
+    "3093",
+    "2283",
+    "1691",
+    "1848",
+    "1965",
+    "996",
+    "1581",
+    "3211",
+    "823",
+    "957",
+    "1602",
+    "232",
+    "396",
+    "1369",
+    "1353",
+    "1291",
+    "1519",
+    "867",
+    "1779",
+    "3809",
+    "3679",
+    "3243",
+    "1120",
+    "2977",
+    "1726",
+    "198",
+    "2737",
+    "2115",
+    "994",
+    "3423",
+    "2051",
+    "3557",
+    "1556",
+    "267",
+    "156",
+    "1178",
+    "2619",
+    "3126",
+    "4071",
+    "861",
+    "1819",
+    "2462",
+    "2669",
+    "1025",
+    "5128",
+    "3660",
+    "2187",
+    "140",
+    "1368",
+    "512",
+    "5419",
+    "1127",
+    "605",
+    "1889",
+    "736",
+    "1482",
+    "2248",
+    "566",
+    "1269",
+    "2208",
+    "3174",
+    "3747",
+    "539",
+    "977",
+    "1453",
+    "290",
+    "497",
+    "1501",
+    "1058",
+    "2251",
+    "3204",
+    "5629",
+    "3545",
+    "1860",
+    "482",
+    "984",
+    "1676",
+    "504",
+    "3096",
+    "599",
+    "1784",
+    "1341",
+    "958",
+    "1797",
+    "2529",
+    "5423",
+    "1837",
+    "1067",
+    "2170",
+    "2047",
+    "376",
+    "1884",
+    "2772",
+    "3350",
+    "2551",
+    "408",
+    "1920",
+    "902",
+    "5376",
+    "3182",
+    "1596",
+    "1429",
+    "1728",
+    "1888",
+    "4900",
+    "705",
+    "5643",
+    "3137",
+    "3515",
+    "810",
+    "541",
+    "2842",
+    "1337",
+    "1949",
+    "1685",
+    "1760",
+    "516",
+    "767",
+    "2342",
+    "1832",
+    "1891",
+    "2206",
+    "1162",
+    "1216",
+    "5690",
+    "257",
+    "609",
+    "354",
+    "2165",
+    "346",
+    "863",
+    "1179",
+    "1595",
+    "828",
+    "1045",
+    "2211",
+    "337",
+    "1624",
+    "4834",
+    "1154",
+    "1354",
+    "671",
+    "922",
+    "1928",
+    "1188",
+    "120",
+    "1953",
+    "2971",
+    "4864",
+    "3362",
+    "1466",
+    "1110",
+    "2261",
+    "1609",
+    "978",
+    "833",
+    "895",
+    "785",
+    "2305",
+    "1223",
+    "1068",
+    "3003",
+    "251",
+    "580",
+    "1088",
+    "334",
+    "692",
+    "4731",
+    "1961",
+    "2527",
+    "417",
+    "1973",
+    "844",
+    "133",
+    "640",
+    "501",
+    "2263",
+    "456",
+    "2817",
+    "1549",
+    "1126",
+    "3142",
+    "1823",
+    "366",
+    "1914",
+    "2760",
+    "1687",
+    "2297",
+    "4292",
+    "2228",
+    "988",
+    "207",
+    "1979",
+    "575",
+    "1569",
+    "2372",
+    "1794",
+    "4283",
+    "783",
+    "1255",
+    "5131",
+    "2801",
+    "768",
+    "3340",
+    "3229",
+    "2071",
+    "1303",
+    "5408",
+    "1680",
+    "325",
+    "1662",
+    "950",
+    "2849",
+    "2743",
+    "2714",
+    "115",
+    "699",
+    "1880",
+    "3253",
+    "505",
+    "2178",
+    "3098",
+    "604",
+    "135",
+    "1675",
+    "147",
+    "1508",
+    "1780",
+    "3696",
+    "3488",
+    "939",
+    "1371",
+    "3728",
+    "733",
+    "2186",
+    "807",
+    "624",
+    "4355",
+    "328",
+    "4162",
+    "2172",
+    "4209",
+    "2448",
+    "1698",
+    "3033",
+    "2275",
+    "2949",
+    "1406",
+    "1518",
+    "523",
+    "336",
+    "3052",
+    "2312",
+    "2113",
+    "343",
+    "2550",
+    "2800",
+    "2394",
+    "926",
+    "2160",
+    "259",
+    "1001",
+    "203",
+    "737",
+    "1183",
+    "840",
+    "298",
+    "494",
+    "1846",
+    "1329",
+    "3718",
+    "3775",
+    "1820",
+    "1320",
+    "1349",
+    "2562",
+    "1134",
+    "562",
+    "729",
+    "2875",
+    "3683",
+    "781",
+    "1840",
+    "3697",
+    "3045",
+    "1994",
+    "1621",
+    "1995",
+    "5095",
+    "3279",
+    "2898",
+    "3344",
+    "1924",
+    "2762",
+    "587",
+    "403",
+    "2666",
+    "709",
+    "905",
+    "4885",
+    "1237",
+    "2344",
+    "4686",
+    "758",
+    "1158",
+    "4280",
+    "1925",
+    "1586",
+    "1918",
+    "2166",
+    "1742",
+    "3326",
+    "2215",
+    "3046",
+    "1066",
+    "2019",
+    "3584",
+    "159",
+    "1362",
+    "201",
+    "1138",
+    "549",
+    "407",
+    "2450",
+    "3733",
+    "383",
+    "1271",
+    "2189",
+    "820",
+    "1876",
+    "2108",
+    "1464",
+    "921",
+    "1822",
+    "3037",
+    "814",
+    "1003",
+    "1861"
+  ],
+  "phase1_no_match_company": [
+    "1308",
+    "645",
+    "2217",
+    "2316",
+    "304",
+    "1725",
+    "434",
+    "1504",
+    "191",
+    "1923",
+    "3533",
+    "1113",
+    "2023",
+    "86",
+    "1986",
+    "1633",
+    "879",
+    "230",
+    "864",
+    "1892",
+    "2581",
+    "1448",
+    "1238",
+    "2653",
+    "18",
+    "1927",
+    "1872",
+    "2233",
+    "2857",
+    "3143",
+    "1874",
+    "511",
+    "947",
+    "3215",
+    "819",
+    "3861",
+    "2278",
+    "1386",
+    "2102",
+    "3667",
+    "3968",
+    "1882",
+    "1035",
+    "2066",
+    "1791",
+    "1202",
+    "2056",
+    "2847",
+    "1810",
+    "2534",
+    "2724",
+    "4867",
+    "186",
+    "1672",
+    "2355",
+    "1350",
+    "1493",
+    "245",
+    "382",
+    "3119",
+    "3294",
+    "1652",
+    "933",
+    "2140",
+    "1443",
+    "3113",
+    "1511",
+    "5729",
+    "934",
+    "1513",
+    "471",
+    "635",
+    "437",
+    "893",
+    "1730",
+    "804",
+    "1159",
+    "2148",
+    "1181",
+    "973",
+    "2565",
+    "2718",
+    "3303",
+    "1427",
+    "1970",
+    "2049",
+    "94",
+    "1768",
+    "2321",
+    "911",
+    "965",
+    "164",
+    "2789",
+    "4019",
+    "1572",
+    "2163",
+    "835",
+    "855",
+    "254",
+    "113",
+    "1491",
+    "278",
+    "2050",
+    "2046",
+    "1755",
+    "1753",
+    "1919",
+    "4415",
+    "1548",
+    "2219",
+    "357",
+    "3217",
+    "1563",
+    "615",
+    "1546",
+    "1582",
+    "2866",
+    "1668",
+    "1964",
+    "1570",
+    "3289",
+    "2386",
+    "2076",
+    "2973",
+    "2639",
+    "3357",
+    "1253",
+    "2285",
+    "2132",
+    "2729",
+    "3591",
+    "1070",
+    "654",
+    "1922",
+    "1661",
+    "473",
+    "3026",
+    "1119",
+    "400",
+    "1875",
+    "1027",
+    "2319",
+    "2092",
+    "1506",
+    "1916",
+    "5142",
+    "1184",
+    "1910",
+    "1372",
+    "1932",
+    "3695",
+    "743",
+    "5728",
+    "1908",
+    "717",
+    "3398",
+    "2596",
+    "1011",
+    "1351",
+    "386",
+    "2291",
+    "2417",
+    "3417",
+    "1373",
+    "2055",
+    "1272",
+    "713",
+    "2277",
+    "2497",
+    "2980",
+    "1130",
+    "3305",
+    "1710",
+    "3463",
+    "1579",
+    "544",
+    "111",
+    "285",
+    "246",
+    "2239",
+    "976",
+    "684",
+    "1999",
+    "2168",
+    "2697",
+    "853",
+    "641",
+    "1689",
+    "908",
+    "2127",
+    "3236",
+    "1153",
+    "1258",
+    "1085",
+    "989",
+    "2044",
+    "2764",
+    "4409",
+    "3155",
+    "1567",
+    "4582",
+    "2418",
+    "2016",
+    "3093",
+    "2283",
+    "1965",
+    "996",
+    "1581",
+    "1842",
+    "3211",
+    "823",
+    "2183",
+    "957",
+    "1602",
+    "232",
+    "396",
+    "1369",
+    "1353",
+    "1291",
+    "1519",
+    "867",
+    "1779",
+    "3809",
+    "3679",
+    "1120",
+    "2977",
+    "1726",
+    "198",
+    "2737",
+    "2115",
+    "994",
+    "2051",
+    "3557",
+    "1556",
+    "267",
+    "156",
+    "1178",
+    "2619",
+    "3126",
+    "4071",
+    "861",
+    "1819",
+    "2462",
+    "2669",
+    "1025",
+    "5128",
+    "3660",
+    "2187",
+    "140",
+    "1368",
+    "512",
+    "5419",
+    "1127",
+    "605",
+    "1889",
+    "736",
+    "1482",
+    "2248",
+    "566",
+    "1269",
+    "2208",
+    "3174",
+    "3747",
+    "539",
+    "1453",
+    "290",
+    "497",
+    "1501",
+    "1058",
+    "2251",
+    "3204",
+    "3545",
+    "1860",
+    "482",
+    "984",
+    "1676",
+    "504",
+    "3096",
+    "599",
+    "1784",
+    "1341",
+    "958",
+    "1797",
+    "2529",
+    "1837",
+    "1067",
+    "2170",
+    "2047",
+    "376",
+    "1884",
+    "2772",
+    "3350",
+    "2551",
+    "408",
+    "1920",
+    "902",
+    "5376",
+    "3182",
+    "1596",
+    "1429",
+    "1728",
+    "1888",
+    "4900",
+    "705",
+    "5643",
+    "3137",
+    "2192",
+    "3515",
+    "810",
+    "541",
+    "1949",
+    "1685",
+    "1760",
+    "516",
+    "767",
+    "2342",
+    "1832",
+    "1891",
+    "2206",
+    "1162",
+    "1216",
+    "5690",
+    "1176",
+    "257",
+    "609",
+    "2326",
+    "354",
+    "2165",
+    "346",
+    "863",
+    "1179",
+    "1595",
+    "828",
+    "1045",
+    "2211",
+    "337",
+    "1624",
+    "4834",
+    "671",
+    "922",
+    "1928",
+    "1188",
+    "120",
+    "1894",
+    "1953",
+    "2971",
+    "4864",
+    "3362",
+    "1466",
+    "1110",
+    "2261",
+    "1609",
+    "978",
+    "833",
+    "895",
+    "785",
+    "2305",
+    "1311",
+    "1646",
+    "1068",
+    "251",
+    "2011",
+    "580",
+    "1088",
+    "4082",
+    "334",
+    "692",
+    "4731",
+    "1961",
+    "2527",
+    "417",
+    "1973",
+    "844",
+    "133",
+    "640",
+    "501",
+    "1903",
+    "2263",
+    "2817",
+    "1549",
+    "1126",
+    "3142",
+    "1823",
+    "366",
+    "1914",
+    "2760",
+    "1687",
+    "2297",
+    "4292",
+    "2228",
+    "67",
+    "207",
+    "988",
+    "1979",
+    "575",
+    "1569",
+    "2372",
+    "1794",
+    "4283",
+    "783",
+    "1255",
+    "5131",
+    "2801",
+    "768",
+    "3340",
+    "3229",
+    "1303",
+    "5408",
+    "1680",
+    "325",
+    "1662",
+    "950",
+    "2849",
+    "1762",
+    "2743",
+    "2714",
+    "115",
+    "699",
+    "1880",
+    "3253",
+    "505",
+    "2178",
+    "604",
+    "135",
+    "147",
+    "1508",
+    "1780",
+    "3696",
+    "3488",
+    "1371",
+    "3728",
+    "2186",
+    "807",
+    "624",
+    "4355",
+    "328",
+    "4162",
+    "2172",
+    "4209",
+    "2448",
+    "1698",
+    "2275",
+    "1406",
+    "1518",
+    "523",
+    "336",
+    "3052",
+    "2312",
+    "2113",
+    "343",
+    "2550",
+    "2800",
+    "926",
+    "2160",
+    "259",
+    "870",
+    "1001",
+    "203",
+    "737",
+    "1183",
+    "840",
+    "298",
+    "494",
+    "1846",
+    "1329",
+    "3718",
+    "3775",
+    "1820",
+    "1320",
+    "2562",
+    "1134",
+    "562",
+    "2875",
+    "3683",
+    "781",
+    "1840",
+    "3697",
+    "3045",
+    "1994",
+    "1621",
+    "1995",
+    "5095",
+    "3279",
+    "194",
+    "2898",
+    "1924",
+    "2762",
+    "141",
+    "587",
+    "403",
+    "2666",
+    "709",
+    "905",
+    "4885",
+    "1237",
+    "2344",
+    "4686",
+    "1331",
+    "758",
+    "1158",
+    "4280",
+    "1925",
+    "1586",
+    "1918",
+    "2166",
+    "3538",
+    "1742",
+    "3326",
+    "2215",
+    "3046",
+    "1066",
+    "2019",
+    "3584",
+    "3114",
+    "159",
+    "1362",
+    "201",
+    "1138",
+    "549",
+    "407",
+    "3733",
+    "383",
+    "1271",
+    "2189",
+    "820",
+    "1876",
+    "2108",
+    "1464",
+    "921",
+    "1822",
+    "3037",
+    "814",
+    "1003",
+    "1861"
+  ],
+  "results": [
+    {
+      "目录ID": "86",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "fa7b03d3-ab8f-4b5f-b035-e60e130c59ab.docx",
+      "新文件名": "86_fa7b03d3-ab8f-4b5f-b035-e60e130c59ab.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 1,
+      "文件大小_MB": 2.82,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\86\\fa7b03d3-ab8f-4b5f-b035-e60e130c59ab.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\86_fa7b03d3-ab8f-4b5f-b035-e60e130c59ab.docx",
+      "处理时间": "2026-05-08T14:27:04.824750"
+    },
+    {
+      "目录ID": "67",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "aa4fede7-4c83-47e0-8e77-1f548b083523.docx",
+      "新文件名": "67_aa4fede7-4c83-47e0-8e77-1f548b083523.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 2,
+      "文件大小_MB": 20.92,
+      "备注": "Top5中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\67\\aa4fede7-4c83-47e0-8e77-1f548b083523.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\67_aa4fede7-4c83-47e0-8e77-1f548b083523.docx",
+      "处理时间": "2026-05-08T14:27:05.517946"
+    },
+    {
+      "目录ID": "141",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "379f98a3-5f65-46c4-b95e-af7b8facd607.docx",
+      "新文件名": "141_379f98a3-5f65-46c4-b95e-af7b8facd607.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.02,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\141\\379f98a3-5f65-46c4-b95e-af7b8facd607.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\141_379f98a3-5f65-46c4-b95e-af7b8facd607.docx",
+      "处理时间": "2026-05-08T14:27:06.296909"
+    },
+    {
+      "目录ID": "185",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "9b671d5d-3769-421b-8705-58de3e52eaec.docx",
+      "新文件名": "185_9b671d5d-3769-421b-8705-58de3e52eaec.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "公司审批意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.02,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\185\\9b671d5d-3769-421b-8705-58de3e52eaec.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\185_9b671d5d-3769-421b-8705-58de3e52eaec.docx",
+      "处理时间": "2026-05-08T14:27:06.432373"
+    },
+    {
+      "目录ID": "191",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "0c021b64-b1ef-4c56-9015-eb4c770aaddc.docx",
+      "新文件名": "191_0c021b64-b1ef-4c56-9015-eb4c770aaddc.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "专家论证意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.02,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\191\\0c021b64-b1ef-4c56-9015-eb4c770aaddc.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\191_0c021b64-b1ef-4c56-9015-eb4c770aaddc.docx",
+      "处理时间": "2026-05-08T14:27:06.468253"
+    },
+    {
+      "目录ID": "194",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "c71e0949-24e6-418e-9378-627ab1642bf8.docx",
+      "新文件名": "194_c71e0949-24e6-418e-9378-627ab1642bf8.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}论证.{0,12}(意见|记录|结论|说明|建议|纪要|报告|审核表|论证表|结论表)",
+      "匹配文件数": 1,
+      "文件大小_MB": 10.82,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\194\\c71e0949-24e6-418e-9378-627ab1642bf8.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\194_c71e0949-24e6-418e-9378-627ab1642bf8.docx",
+      "处理时间": "2026-05-08T14:27:07.509957"
+    },
+    {
+      "目录ID": "41",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "4b7ecff2-6392-430b-8128-4c7772462365.docx",
+      "新文件名": "41_4b7ecff2-6392-430b-8128-4c7772462365.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:(公司|集团).{0,10}施工方案.{0,10}(审核|审查|评审).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 2,
+      "文件大小_MB": 0.01,
+      "备注": "Top5中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\41\\4b7ecff2-6392-430b-8128-4c7772462365.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\41_4b7ecff2-6392-430b-8128-4c7772462365.docx",
+      "处理时间": "2026-05-08T14:27:08.370059"
+    },
+    {
+      "目录ID": "456",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "4ee1d568-8613-4736-bcea-21a9aa11b4f3.docx",
+      "新文件名": "456_4ee1d568-8613-4736-bcea-21a9aa11b4f3.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "公司审批意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\456\\4ee1d568-8613-4736-bcea-21a9aa11b4f3.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\456_4ee1d568-8613-4736-bcea-21a9aa11b4f3.docx",
+      "处理时间": "2026-05-08T14:27:08.542616"
+    },
+    {
+      "目录ID": "476",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "e85effb5-7715-4516-ac91-7ca89a59a1ce.docx",
+      "新文件名": "476_e85effb5-7715-4516-ac91-7ca89a59a1ce.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "公司审批意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\476\\e85effb5-7715-4516-ac91-7ca89a59a1ce.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\476_e85effb5-7715-4516-ac91-7ca89a59a1ce.docx",
+      "处理时间": "2026-05-08T14:27:08.759055"
+    },
+    {
+      "目录ID": "542",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "fab7b33d-8698-4643-879f-4ae26376234a.docx",
+      "新文件名": "542_fab7b33d-8698-4643-879f-4ae26376234a.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "集团意见",
+      "匹配文件数": 3,
+      "文件大小_MB": 0.01,
+      "备注": "其余文件中找到3个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\542\\fab7b33d-8698-4643-879f-4ae26376234a.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\542_fab7b33d-8698-4643-879f-4ae26376234a.docx",
+      "处理时间": "2026-05-08T14:27:10.876412"
+    },
+    {
+      "目录ID": "729",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "74f72ca4-a16f-4cec-8747-e9863570fbdf.docx",
+      "新文件名": "729_74f72ca4-a16f-4cec-8747-e9863570fbdf.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "公司审批意见",
+      "匹配文件数": 2,
+      "文件大小_MB": 0.02,
+      "备注": "其余文件中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\729\\74f72ca4-a16f-4cec-8747-e9863570fbdf.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\729_74f72ca4-a16f-4cec-8747-e9863570fbdf.docx",
+      "处理时间": "2026-05-08T14:27:18.574102"
+    },
+    {
+      "目录ID": "727",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "e037ac12-0b53-4d24-93f9-059f0b158bd9.docx",
+      "新文件名": "727_e037ac12-0b53-4d24-93f9-059f0b158bd9.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.27,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\727\\e037ac12-0b53-4d24-93f9-059f0b158bd9.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\727_e037ac12-0b53-4d24-93f9-059f0b158bd9.docx",
+      "处理时间": "2026-05-08T14:27:18.636469"
+    },
+    {
+      "目录ID": "727",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "cf741260-e560-446a-8c5f-5b0383f434ea.docx",
+      "新文件名": "727_cf741260-e560-446a-8c5f-5b0383f434ea.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:监理.{0,8}工程师.{0,8}意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 11.51,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\727\\cf741260-e560-446a-8c5f-5b0383f434ea.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\727_cf741260-e560-446a-8c5f-5b0383f434ea.docx",
+      "处理时间": "2026-05-08T14:27:19.317109"
+    },
+    {
+      "目录ID": "808",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "4895009d-e438-437d-8eb1-91c34974d0a3.docx",
+      "新文件名": "808_4895009d-e438-437d-8eb1-91c34974d0a3.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "公司意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\808\\4895009d-e438-437d-8eb1-91c34974d0a3.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\808_4895009d-e438-437d-8eb1-91c34974d0a3.docx",
+      "处理时间": "2026-05-08T14:27:19.613821"
+    },
+    {
+      "目录ID": "471",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "3fbe428f-aa66-448f-8b97-e809830a7daf.docx",
+      "新文件名": "471_3fbe428f-aa66-448f-8b97-e809830a7daf.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\471\\3fbe428f-aa66-448f-8b97-e809830a7daf.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\471_3fbe428f-aa66-448f-8b97-e809830a7daf.docx",
+      "处理时间": "2026-05-08T14:27:19.152972"
+    },
+    {
+      "目录ID": "853",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "940ee19f-b37b-4d90-903a-051156b828eb.docx",
+      "新文件名": "853_940ee19f-b37b-4d90-903a-051156b828eb.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\853\\940ee19f-b37b-4d90-903a-051156b828eb.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\853_940ee19f-b37b-4d90-903a-051156b828eb.docx",
+      "处理时间": "2026-05-08T14:27:23.081409"
+    },
+    {
+      "目录ID": "870",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "aa4fede7-4c83-47e0-8e77-1f548b083523.docx",
+      "新文件名": "870_aa4fede7-4c83-47e0-8e77-1f548b083523.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 2,
+      "文件大小_MB": 20.92,
+      "备注": "Top5中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\870\\aa4fede7-4c83-47e0-8e77-1f548b083523.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\870_aa4fede7-4c83-47e0-8e77-1f548b083523.docx",
+      "处理时间": "2026-05-08T14:27:24.690495"
+    },
+    {
+      "目录ID": "939",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "4cad6d56-f766-4f5f-8e0c-e98486e55ff4.docx",
+      "新文件名": "939_4cad6d56-f766-4f5f-8e0c-e98486e55ff4.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "公司审批意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\939\\4cad6d56-f766-4f5f-8e0c-e98486e55ff4.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\939_4cad6d56-f766-4f5f-8e0c-e98486e55ff4.docx",
+      "处理时间": "2026-05-08T14:27:27.704113"
+    },
+    {
+      "目录ID": "872",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "da1b4798-6e44-413e-8d54-e8973d42ce5b.docx",
+      "新文件名": "872_da1b4798-6e44-413e-8d54-e8973d42ce5b.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "公司审核意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\872\\da1b4798-6e44-413e-8d54-e8973d42ce5b.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\872_da1b4798-6e44-413e-8d54-e8973d42ce5b.docx",
+      "处理时间": "2026-05-08T14:27:28.754247"
+    },
+    {
+      "目录ID": "909",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "a26a1c55-5362-4388-8d1d-d4c9796f76bc.docx",
+      "新文件名": "909_a26a1c55-5362-4388-8d1d-d4c9796f76bc.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "公司意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.02,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\909\\a26a1c55-5362-4388-8d1d-d4c9796f76bc.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\909_a26a1c55-5362-4388-8d1d-d4c9796f76bc.docx",
+      "处理时间": "2026-05-08T14:27:28.780153"
+    },
+    {
+      "目录ID": "733",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "0b358a52-cfbd-4d52-8daa-df3afe0e471e.docx",
+      "新文件名": "733_0b358a52-cfbd-4d52-8daa-df3afe0e471e.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:方案.{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.02,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\733\\0b358a52-cfbd-4d52-8daa-df3afe0e471e.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\733_0b358a52-cfbd-4d52-8daa-df3afe0e471e.docx",
+      "处理时间": "2026-05-08T14:27:30.024402"
+    },
+    {
+      "目录ID": "977",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "6111ee38-04a3-43f5-835c-3ff931af3a04.docx",
+      "新文件名": "977_6111ee38-04a3-43f5-835c-3ff931af3a04.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "审批意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 2.04,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\977\\6111ee38-04a3-43f5-835c-3ff931af3a04.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\977_6111ee38-04a3-43f5-835c-3ff931af3a04.docx",
+      "处理时间": "2026-05-08T14:27:30.193398"
+    },
+    {
+      "目录ID": "1062",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "e2e4d520-e022-46f4-8d52-1e0a8702540d.docx",
+      "新文件名": "1062_e2e4d520-e022-46f4-8d52-1e0a8702540d.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "专家评审意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 13.47,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1062\\e2e4d520-e022-46f4-8d52-1e0a8702540d.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\1062_e2e4d520-e022-46f4-8d52-1e0a8702540d.docx",
+      "处理时间": "2026-05-08T14:27:32.873729"
+    },
+    {
+      "目录ID": "1062",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "e2e4d520-e022-46f4-8d52-1e0a8702540d.docx",
+      "新文件名": "1062_e2e4d520-e022-46f4-8d52-1e0a8702540d.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:方案.{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 1,
+      "文件大小_MB": 13.47,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1062\\e2e4d520-e022-46f4-8d52-1e0a8702540d.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\1062_e2e4d520-e022-46f4-8d52-1e0a8702540d.docx",
+      "处理时间": "2026-05-08T14:27:33.659688"
+    },
+    {
+      "目录ID": "1154",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "660902be-dcfe-43d5-b1f7-6fff7a458ebf.docx",
+      "新文件名": "1154_660902be-dcfe-43d5-b1f7-6fff7a458ebf.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "公司审查意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1154\\660902be-dcfe-43d5-b1f7-6fff7a458ebf.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\1154_660902be-dcfe-43d5-b1f7-6fff7a458ebf.docx",
+      "处理时间": "2026-05-08T14:27:36.012508"
+    },
+    {
+      "目录ID": "970",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "871da903-719b-4305-b0a5-a94cb35bde17.docx",
+      "新文件名": "970_871da903-719b-4305-b0a5-a94cb35bde17.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:施工方案.{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 2,
+      "文件大小_MB": 11.25,
+      "备注": "Top5中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\970\\871da903-719b-4305-b0a5-a94cb35bde17.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\970_871da903-719b-4305-b0a5-a94cb35bde17.docx",
+      "处理时间": "2026-05-08T14:27:36.373774"
+    },
+    {
+      "目录ID": "1113",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "7801c037-744c-4d8a-8526-4e29caa30fd1.docx",
+      "新文件名": "1113_7801c037-744c-4d8a-8526-4e29caa30fd1.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 5,
+      "文件大小_MB": 11.52,
+      "备注": "Top5中找到5个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1113\\7801c037-744c-4d8a-8526-4e29caa30fd1.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\1113_7801c037-744c-4d8a-8526-4e29caa30fd1.docx",
+      "处理时间": "2026-05-08T14:27:35.007795"
+    },
+    {
+      "目录ID": "1176",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "b137f78d-629c-4bb6-86ec-36b68571c695.docx",
+      "新文件名": "1176_b137f78d-629c-4bb6-86ec-36b68571c695.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 2,
+      "文件大小_MB": 7.24,
+      "备注": "Top5中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1176\\b137f78d-629c-4bb6-86ec-36b68571c695.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\1176_b137f78d-629c-4bb6-86ec-36b68571c695.docx",
+      "处理时间": "2026-05-08T14:27:38.798052"
+    },
+    {
+      "目录ID": "1264",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "2a150abc-ec6f-4113-bece-cd16cee63e9a.docx",
+      "新文件名": "1264_2a150abc-ec6f-4113-bece-cd16cee63e9a.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:方案.{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1264\\2a150abc-ec6f-4113-bece-cd16cee63e9a.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\1264_2a150abc-ec6f-4113-bece-cd16cee63e9a.docx",
+      "处理时间": "2026-05-08T14:27:40.634598"
+    },
+    {
+      "目录ID": "1223",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "a0cf3cec-d6a6-415b-93c7-c02b59acf772.docx",
+      "新文件名": "1223_a0cf3cec-d6a6-415b-93c7-c02b59acf772.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "公司意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.03,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1223\\a0cf3cec-d6a6-415b-93c7-c02b59acf772.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\1223_a0cf3cec-d6a6-415b-93c7-c02b59acf772.docx",
+      "处理时间": "2026-05-08T14:27:40.965237"
+    },
+    {
+      "目录ID": "1311",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "2aba74b7-8771-43f3-8ac3-4a8e9233a389.docx",
+      "新文件名": "1311_2aba74b7-8771-43f3-8ac3-4a8e9233a389.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "专家评审意见",
+      "匹配文件数": 2,
+      "文件大小_MB": 22.55,
+      "备注": "Top5中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1311\\2aba74b7-8771-43f3-8ac3-4a8e9233a389.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\1311_2aba74b7-8771-43f3-8ac3-4a8e9233a389.docx",
+      "处理时间": "2026-05-08T14:27:42.326491"
+    },
+    {
+      "目录ID": "955",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "6e88478a-ad2d-4a23-ac67-9e7643afbb74.docx",
+      "新文件名": "955_6e88478a-ad2d-4a23-ac67-9e7643afbb74.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家组.{0,10}(意见|建议|结论)",
+      "匹配文件数": 1,
+      "文件大小_MB": 18.15,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\955\\6e88478a-ad2d-4a23-ac67-9e7643afbb74.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\955_6e88478a-ad2d-4a23-ac67-9e7643afbb74.docx",
+      "处理时间": "2026-05-08T14:27:35.846050"
+    },
+    {
+      "目录ID": "955",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "1d011fa9-ee92-4772-b666-62da6ffa410d.docx",
+      "新文件名": "955_1d011fa9-ee92-4772-b666-62da6ffa410d.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:方案.{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.02,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\955\\1d011fa9-ee92-4772-b666-62da6ffa410d.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\955_1d011fa9-ee92-4772-b666-62da6ffa410d.docx",
+      "处理时间": "2026-05-08T14:27:42.905355"
+    },
+    {
+      "目录ID": "1337",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "56816b99-95e9-44b2-86ed-27ee94d7cd30.docx",
+      "新文件名": "1337_56816b99-95e9-44b2-86ed-27ee94d7cd30.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:(公司|集团).{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1337\\56816b99-95e9-44b2-86ed-27ee94d7cd30.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\1337_56816b99-95e9-44b2-86ed-27ee94d7cd30.docx",
+      "处理时间": "2026-05-08T14:27:44.047197"
+    },
+    {
+      "目录ID": "1349",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "95956489-50ed-4496-b689-5158d2f9e680.docx",
+      "新文件名": "1349_95956489-50ed-4496-b689-5158d2f9e680.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:(公司|集团).{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1349\\95956489-50ed-4496-b689-5158d2f9e680.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\1349_95956489-50ed-4496-b689-5158d2f9e680.docx",
+      "处理时间": "2026-05-08T14:27:44.102835"
+    },
+    {
+      "目录ID": "1331",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "c4ffb667-c37a-49e4-b854-a390aeb291ee.docx",
+      "新文件名": "1331_c4ffb667-c37a-49e4-b854-a390aeb291ee.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 1,
+      "文件大小_MB": 6.98,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1331\\c4ffb667-c37a-49e4-b854-a390aeb291ee.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\1331_c4ffb667-c37a-49e4-b854-a390aeb291ee.docx",
+      "处理时间": "2026-05-08T14:27:43.885736"
+    },
+    {
+      "目录ID": "1354",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "ebec760d-9957-4a36-9643-a98621954c8d.docx",
+      "新文件名": "1354_ebec760d-9957-4a36-9643-a98621954c8d.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:方案.{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 1,
+      "文件大小_MB": 2.43,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1354\\ebec760d-9957-4a36-9643-a98621954c8d.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\1354_ebec760d-9957-4a36-9643-a98621954c8d.docx",
+      "处理时间": "2026-05-08T14:27:44.486106"
+    },
+    {
+      "目录ID": "1597",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "0c160278-043b-4922-be31-13dff19db638.docx",
+      "新文件名": "1597_0c160278-043b-4922-be31-13dff19db638.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:监理.{0,8}工程师.{0,8}意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 3.44,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1597\\0c160278-043b-4922-be31-13dff19db638.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\1597_0c160278-043b-4922-be31-13dff19db638.docx",
+      "处理时间": "2026-05-08T14:27:49.124299"
+    },
+    {
+      "目录ID": "1652",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "a0155673-204d-422f-b041-b26b793c6e6c.docx",
+      "新文件名": "1652_a0155673-204d-422f-b041-b26b793c6e6c.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1652\\a0155673-204d-422f-b041-b26b793c6e6c.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\1652_a0155673-204d-422f-b041-b26b793c6e6c.docx",
+      "处理时间": "2026-05-08T14:27:49.341312"
+    },
+    {
+      "目录ID": "1675",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "f8036631-9696-4325-889a-2289734bbb5a.docx",
+      "新文件名": "1675_f8036631-9696-4325-889a-2289734bbb5a.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:监理.{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 1,
+      "文件大小_MB": 32.87,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1675\\f8036631-9696-4325-889a-2289734bbb5a.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\1675_f8036631-9696-4325-889a-2289734bbb5a.docx",
+      "处理时间": "2026-05-08T14:27:51.890210"
+    },
+    {
+      "目录ID": "1691",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "b840eaf5-1873-443f-abdb-03a1aacd2621.docx",
+      "新文件名": "1691_b840eaf5-1873-443f-abdb-03a1aacd2621.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:(公司|集团).{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 2,
+      "文件大小_MB": 0.01,
+      "备注": "其余文件中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1691\\b840eaf5-1873-443f-abdb-03a1aacd2621.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\1691_b840eaf5-1873-443f-abdb-03a1aacd2621.docx",
+      "处理时间": "2026-05-08T14:27:52.029211"
+    },
+    {
+      "目录ID": "1646",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "dc2767f0-d41f-4750-856a-de4a835b5f23.docx",
+      "新文件名": "1646_dc2767f0-d41f-4750-856a-de4a835b5f23.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 2,
+      "文件大小_MB": 19.49,
+      "备注": "Top5中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1646\\dc2767f0-d41f-4750-856a-de4a835b5f23.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\1646_dc2767f0-d41f-4750-856a-de4a835b5f23.docx",
+      "处理时间": "2026-05-08T14:27:52.789087"
+    },
+    {
+      "目录ID": "1710",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "0250ed38-15c6-47d4-93d1-39f2573e9c81.docx",
+      "新文件名": "1710_0250ed38-15c6-47d4-93d1-39f2573e9c81.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 3,
+      "文件大小_MB": 4.14,
+      "备注": "其余文件中找到3个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1710\\0250ed38-15c6-47d4-93d1-39f2573e9c81.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\1710_0250ed38-15c6-47d4-93d1-39f2573e9c81.docx",
+      "处理时间": "2026-05-08T14:27:54.638292"
+    },
+    {
+      "目录ID": "1762",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "8f77f3a0-880d-4592-a6e1-f745417164b7.docx",
+      "新文件名": "1762_8f77f3a0-880d-4592-a6e1-f745417164b7.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家组.{0,10}(意见|建议|结论)",
+      "匹配文件数": 2,
+      "文件大小_MB": 23.07,
+      "备注": "Top5中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1762\\8f77f3a0-880d-4592-a6e1-f745417164b7.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\1762_8f77f3a0-880d-4592-a6e1-f745417164b7.docx",
+      "处理时间": "2026-05-08T14:27:54.600420"
+    },
+    {
+      "目录ID": "1842",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "32822734-ab2d-4e38-90cd-c0b38866ce61.docx",
+      "新文件名": "1842_32822734-ab2d-4e38-90cd-c0b38866ce61.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 1,
+      "文件大小_MB": 12.87,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1842\\32822734-ab2d-4e38-90cd-c0b38866ce61.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\1842_32822734-ab2d-4e38-90cd-c0b38866ce61.docx",
+      "处理时间": "2026-05-08T14:27:56.726759"
+    },
+    {
+      "目录ID": "1848",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "24fbdb2a-ed2f-4277-877a-fbc5302adde4.docx",
+      "新文件名": "1848_24fbdb2a-ed2f-4277-877a-fbc5302adde4.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:项目(部)?.{0,10}经理.{0,8}(意见|审核)",
+      "匹配文件数": 1,
+      "文件大小_MB": 17.7,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1848\\24fbdb2a-ed2f-4277-877a-fbc5302adde4.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\1848_24fbdb2a-ed2f-4277-877a-fbc5302adde4.docx",
+      "处理时间": "2026-05-08T14:27:57.522650"
+    },
+    {
+      "目录ID": "1894",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "c2e32c2a-fff0-4187-8927-dd391699b952.docx",
+      "新文件名": "1894_c2e32c2a-fff0-4187-8927-dd391699b952.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 2,
+      "文件大小_MB": 3.85,
+      "备注": "Top5中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1894\\c2e32c2a-fff0-4187-8927-dd391699b952.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\1894_c2e32c2a-fff0-4187-8927-dd391699b952.docx",
+      "处理时间": "2026-05-08T14:27:59.221342"
+    },
+    {
+      "目录ID": "1903",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "e38ebf2c-aa31-4239-9c7c-9b7ecc318c76.docx",
+      "新文件名": "1903_e38ebf2c-aa31-4239-9c7c-9b7ecc318c76.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 1,
+      "文件大小_MB": 5.14,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1903\\e38ebf2c-aa31-4239-9c7c-9b7ecc318c76.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\1903_e38ebf2c-aa31-4239-9c7c-9b7ecc318c76.docx",
+      "处理时间": "2026-05-08T14:28:00.069387"
+    },
+    {
+      "目录ID": "1730",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "c190d775-5767-4726-b955-683b5e10ae11.docx",
+      "新文件名": "1730_c190d775-5767-4726-b955-683b5e10ae11.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 3,
+      "文件大小_MB": 6.65,
+      "备注": "Top5中找到3个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1730\\c190d775-5767-4726-b955-683b5e10ae11.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\1730_c190d775-5767-4726-b955-683b5e10ae11.docx",
+      "处理时间": "2026-05-08T14:28:01.528296"
+    },
+    {
+      "目录ID": "2010",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "0256f642-242f-4ebd-9380-c5097befd97a.docx",
+      "新文件名": "2010_0256f642-242f-4ebd-9380-c5097befd97a.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 3,
+      "文件大小_MB": 10.33,
+      "备注": "Top5中找到3个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2010\\0256f642-242f-4ebd-9380-c5097befd97a.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\2010_0256f642-242f-4ebd-9380-c5097befd97a.docx",
+      "处理时间": "2026-05-08T14:28:04.917286"
+    },
+    {
+      "目录ID": "2010",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "0256f642-242f-4ebd-9380-c5097befd97a.docx",
+      "新文件名": "2010_0256f642-242f-4ebd-9380-c5097befd97a.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "监理工程师意见",
+      "匹配文件数": 2,
+      "文件大小_MB": 10.33,
+      "备注": "Top5中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2010\\0256f642-242f-4ebd-9380-c5097befd97a.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\2010_0256f642-242f-4ebd-9380-c5097befd97a.docx",
+      "处理时间": "2026-05-08T14:28:06.975536"
+    },
+    {
+      "目录ID": "2071",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "7c0f1d82-964c-48ac-9aba-45f8e93fe396.docx",
+      "新文件名": "2071_7c0f1d82-964c-48ac-9aba-45f8e93fe396.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "施工方案审查意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.15,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2071\\7c0f1d82-964c-48ac-9aba-45f8e93fe396.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\2071_7c0f1d82-964c-48ac-9aba-45f8e93fe396.docx",
+      "处理时间": "2026-05-08T14:28:09.610377"
+    },
+    {
+      "目录ID": "2183",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "b64b7bac-afb4-4828-a980-17fff5c1f69d.docx",
+      "新文件名": "2183_b64b7bac-afb4-4828-a980-17fff5c1f69d.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 1,
+      "文件大小_MB": 16.24,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2183\\b64b7bac-afb4-4828-a980-17fff5c1f69d.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\2183_b64b7bac-afb4-4828-a980-17fff5c1f69d.docx",
+      "处理时间": "2026-05-08T14:28:13.453085"
+    },
+    {
+      "目录ID": "2192",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "8411669a-2fd0-4878-a82a-d269f9ed771c.docx",
+      "新文件名": "2192_8411669a-2fd0-4878-a82a-d269f9ed771c.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,15}(意见|建议|结论)",
+      "匹配文件数": 1,
+      "文件大小_MB": 22.17,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2192\\8411669a-2fd0-4878-a82a-d269f9ed771c.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\2192_8411669a-2fd0-4878-a82a-d269f9ed771c.docx",
+      "处理时间": "2026-05-08T14:28:14.617328"
+    },
+    {
+      "目录ID": "2194",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "3288eb4e-4ee6-4096-9d0e-5ae5f845250d.docx",
+      "新文件名": "2194_3288eb4e-4ee6-4096-9d0e-5ae5f845250d.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:(公司|集团|企业).{0,8}意见",
+      "匹配文件数": 2,
+      "文件大小_MB": 14.5,
+      "备注": "Top5中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2194\\3288eb4e-4ee6-4096-9d0e-5ae5f845250d.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\2194_3288eb4e-4ee6-4096-9d0e-5ae5f845250d.docx",
+      "处理时间": "2026-05-08T14:28:17.825814"
+    },
+    {
+      "目录ID": "2011",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "c0a79b78-7be1-4a76-8fff-848c8b6c66ee.docx",
+      "新文件名": "2011_c0a79b78-7be1-4a76-8fff-848c8b6c66ee.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 3,
+      "文件大小_MB": 39.3,
+      "备注": "Top5中找到3个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2011\\c0a79b78-7be1-4a76-8fff-848c8b6c66ee.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\2011_c0a79b78-7be1-4a76-8fff-848c8b6c66ee.docx",
+      "处理时间": "2026-05-08T14:28:16.592242"
+    },
+    {
+      "目录ID": "2230",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "00b6f7b2-5db2-417d-ad8c-706d07796fae.docx",
+      "新文件名": "2230_00b6f7b2-5db2-417d-ad8c-706d07796fae.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 2,
+      "文件大小_MB": 16.58,
+      "备注": "Top5中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2230\\00b6f7b2-5db2-417d-ad8c-706d07796fae.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\2230_00b6f7b2-5db2-417d-ad8c-706d07796fae.docx",
+      "处理时间": "2026-05-08T14:28:20.528441"
+    },
+    {
+      "目录ID": "2230",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "7f5c9e5d-590f-4901-865c-3763f3f31d2a.docx",
+      "新文件名": "2230_7f5c9e5d-590f-4901-865c-3763f3f31d2a.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "方案审核意见",
+      "匹配文件数": 2,
+      "文件大小_MB": 0.41,
+      "备注": "Top5中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2230\\7f5c9e5d-590f-4901-865c-3763f3f31d2a.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\2230_7f5c9e5d-590f-4901-865c-3763f3f31d2a.docx",
+      "处理时间": "2026-05-08T14:28:21.913457"
+    },
+    {
+      "目录ID": "2394",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "aa53a0c0-84ea-419e-8c26-d42d59bbcf4c.docx",
+      "新文件名": "2394_aa53a0c0-84ea-419e-8c26-d42d59bbcf4c.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "集团审核意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2394\\aa53a0c0-84ea-419e-8c26-d42d59bbcf4c.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\2394_aa53a0c0-84ea-419e-8c26-d42d59bbcf4c.docx",
+      "处理时间": "2026-05-08T14:28:21.994392"
+    },
+    {
+      "目录ID": "2450",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "852e2060-4c92-4b5a-8303-560b8092a34f.docx",
+      "新文件名": "2450_852e2060-4c92-4b5a-8303-560b8092a34f.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "公司审批意见",
+      "匹配文件数": 2,
+      "文件大小_MB": 0.02,
+      "备注": "其余文件中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2450\\852e2060-4c92-4b5a-8303-560b8092a34f.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\2450_852e2060-4c92-4b5a-8303-560b8092a34f.docx",
+      "处理时间": "2026-05-08T14:28:23.384989"
+    },
+    {
+      "目录ID": "2326",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "c73358a6-3fb3-4865-b9a0-1e6a3dc66096.docx",
+      "新文件名": "2326_c73358a6-3fb3-4865-b9a0-1e6a3dc66096.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 1,
+      "文件大小_MB": 29.24,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2326\\c73358a6-3fb3-4865-b9a0-1e6a3dc66096.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\2326_c73358a6-3fb3-4865-b9a0-1e6a3dc66096.docx",
+      "处理时间": "2026-05-08T14:28:24.382512"
+    },
+    {
+      "目录ID": "2460",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "5f2ed26f-94e7-4c9d-b7e2-dafb83b838f3.docx",
+      "新文件名": "2460_5f2ed26f-94e7-4c9d-b7e2-dafb83b838f3.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 1,
+      "文件大小_MB": 4.16,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2460\\5f2ed26f-94e7-4c9d-b7e2-dafb83b838f3.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\2460_5f2ed26f-94e7-4c9d-b7e2-dafb83b838f3.docx",
+      "处理时间": "2026-05-08T14:28:25.660123"
+    },
+    {
+      "目录ID": "2460",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "95482385-1244-4912-adeb-87a2cdceb67b.docx",
+      "新文件名": "2460_95482385-1244-4912-adeb-87a2cdceb67b.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:(公司|集团).{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.04,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2460\\95482385-1244-4912-adeb-87a2cdceb67b.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\2460_95482385-1244-4912-adeb-87a2cdceb67b.docx",
+      "处理时间": "2026-05-08T14:28:26.040871"
+    },
+    {
+      "目录ID": "2560",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "63bc600e-e455-49a0-b66e-1de0e2816365.docx",
+      "新文件名": "2560_63bc600e-e455-49a0-b66e-1de0e2816365.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:(公司|集团).{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.13,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2560\\63bc600e-e455-49a0-b66e-1de0e2816365.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\2560_63bc600e-e455-49a0-b66e-1de0e2816365.docx",
+      "处理时间": "2026-05-08T14:28:29.492848"
+    },
+    {
+      "目录ID": "2634",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "0c796ce6-2d35-4af7-896b-080b91f7be40.docx",
+      "新文件名": "2634_0c796ce6-2d35-4af7-896b-080b91f7be40.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "监理报告",
+      "匹配文件数": 2,
+      "文件大小_MB": 6.69,
+      "备注": "Top5中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2634\\0c796ce6-2d35-4af7-896b-080b91f7be40.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\2634_0c796ce6-2d35-4af7-896b-080b91f7be40.docx",
+      "处理时间": "2026-05-08T14:28:40.347670"
+    },
+    {
+      "目录ID": "2842",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "cdd8f19f-902e-46f8-a664-f467ff884fe8.docx",
+      "新文件名": "2842_cdd8f19f-902e-46f8-a664-f467ff884fe8.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:方案.{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.02,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2842\\cdd8f19f-902e-46f8-a664-f467ff884fe8.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\2842_cdd8f19f-902e-46f8-a664-f467ff884fe8.docx",
+      "处理时间": "2026-05-08T14:28:59.973065"
+    },
+    {
+      "目录ID": "2949",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "86a6cfc3-d360-4c5f-a459-849bf4b82e00.docx",
+      "新文件名": "2949_86a6cfc3-d360-4c5f-a459-849bf4b82e00.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "监理报告",
+      "匹配文件数": 2,
+      "文件大小_MB": 9.71,
+      "备注": "Top5中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2949\\86a6cfc3-d360-4c5f-a459-849bf4b82e00.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\2949_86a6cfc3-d360-4c5f-a459-849bf4b82e00.docx",
+      "处理时间": "2026-05-08T14:29:03.482890"
+    },
+    {
+      "目录ID": "3003",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "55ac6a29-a201-4c6f-8757-c9ba190bc62d.docx",
+      "新文件名": "3003_55ac6a29-a201-4c6f-8757-c9ba190bc62d.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:(公司|集团).{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\3003\\55ac6a29-a201-4c6f-8757-c9ba190bc62d.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\3003_55ac6a29-a201-4c6f-8757-c9ba190bc62d.docx",
+      "处理时间": "2026-05-08T14:29:03.597363"
+    },
+    {
+      "目录ID": "3033",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "4832d88e-ecfe-49c9-8e1d-adf4a1bf7024.docx",
+      "新文件名": "3033_4832d88e-ecfe-49c9-8e1d-adf4a1bf7024.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "公司审批意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\3033\\4832d88e-ecfe-49c9-8e1d-adf4a1bf7024.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\3033_4832d88e-ecfe-49c9-8e1d-adf4a1bf7024.docx",
+      "处理时间": "2026-05-08T14:29:03.671116"
+    },
+    {
+      "目录ID": "2854",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "54831051-5fbf-4426-b07d-6e3f81732817.docx",
+      "新文件名": "2854_54831051-5fbf-4426-b07d-6e3f81732817.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:方案.{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 4,
+      "文件大小_MB": 0.02,
+      "备注": "其余文件中找到4个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2854\\54831051-5fbf-4426-b07d-6e3f81732817.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\2854_54831051-5fbf-4426-b07d-6e3f81732817.docx",
+      "处理时间": "2026-05-08T14:29:06.127389"
+    },
+    {
+      "目录ID": "3114",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "fff55f1a-7b12-48e4-9a63-b93f594a623b.docx",
+      "新文件名": "3114_fff55f1a-7b12-48e4-9a63-b93f594a623b.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\3114\\fff55f1a-7b12-48e4-9a63-b93f594a623b.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\3114_fff55f1a-7b12-48e4-9a63-b93f594a623b.docx",
+      "处理时间": "2026-05-08T14:29:09.040052"
+    },
+    {
+      "目录ID": "3098",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "86ce393a-9466-483a-a4ad-af0b63bdd727.docx",
+      "新文件名": "3098_86ce393a-9466-483a-a4ad-af0b63bdd727.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "监理工程师意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 6.66,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\3098\\86ce393a-9466-483a-a4ad-af0b63bdd727.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\3098_86ce393a-9466-483a-a4ad-af0b63bdd727.docx",
+      "处理时间": "2026-05-08T14:29:11.344767"
+    },
+    {
+      "目录ID": "3162",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "7178af81-5074-461e-96e2-28b145c8aabc.docx",
+      "新文件名": "3162_7178af81-5074-461e-96e2-28b145c8aabc.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:(公司|集团).{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 7,
+      "文件大小_MB": 0.04,
+      "备注": "其余文件中找到7个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\3162\\7178af81-5074-461e-96e2-28b145c8aabc.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\3162_7178af81-5074-461e-96e2-28b145c8aabc.docx",
+      "处理时间": "2026-05-08T14:29:13.771839"
+    },
+    {
+      "目录ID": "3243",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "115f2f35-e472-4ac8-a275-82480eebff06.docx",
+      "新文件名": "3243_115f2f35-e472-4ac8-a275-82480eebff06.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:项目(部)?.{0,10}技术.{0,8}(负责人|主管).{0,8}(意见|审核)",
+      "匹配文件数": 1,
+      "文件大小_MB": 7.51,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\3243\\115f2f35-e472-4ac8-a275-82480eebff06.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\3243_115f2f35-e472-4ac8-a275-82480eebff06.docx",
+      "处理时间": "2026-05-08T14:29:17.159641"
+    },
+    {
+      "目录ID": "2596",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "fba526f7-d36a-4412-9f23-e8ffb4d9aaeb.docx",
+      "新文件名": "2596_fba526f7-d36a-4412-9f23-e8ffb4d9aaeb.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:专家.{0,15}(意见|建议|结论)",
+      "匹配文件数": 1,
+      "文件大小_MB": 9.49,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2596\\fba526f7-d36a-4412-9f23-e8ffb4d9aaeb.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\2596_fba526f7-d36a-4412-9f23-e8ffb4d9aaeb.docx",
+      "处理时间": "2026-05-08T14:29:19.666173"
+    },
+    {
+      "目录ID": "3344",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "fd75910f-194e-4bef-9966-254ee85f11d1.docx",
+      "新文件名": "3344_fd75910f-194e-4bef-9966-254ee85f11d1.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "公司审查意见",
+      "匹配文件数": 2,
+      "文件大小_MB": 0.02,
+      "备注": "其余文件中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\3344\\fd75910f-194e-4bef-9966-254ee85f11d1.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\3344_fd75910f-194e-4bef-9966-254ee85f11d1.docx",
+      "处理时间": "2026-05-08T14:29:32.530896"
+    },
+    {
+      "目录ID": "3423",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "ccfb0457-3109-4c88-a140-58bdb595155e.docx",
+      "新文件名": "3423_ccfb0457-3109-4c88-a140-58bdb595155e.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "集团审核意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.07,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\3423\\ccfb0457-3109-4c88-a140-58bdb595155e.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\3423_ccfb0457-3109-4c88-a140-58bdb595155e.docx",
+      "处理时间": "2026-05-08T14:29:32.957967"
+    },
+    {
+      "目录ID": "3372",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "6c1f69a7-6ce8-4dfc-86c7-dacdf4c90332.docx",
+      "新文件名": "3372_6c1f69a7-6ce8-4dfc-86c7-dacdf4c90332.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "监理报告",
+      "匹配文件数": 3,
+      "文件大小_MB": 9.35,
+      "备注": "Top5中找到3个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\3372\\6c1f69a7-6ce8-4dfc-86c7-dacdf4c90332.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\3372_6c1f69a7-6ce8-4dfc-86c7-dacdf4c90332.docx",
+      "处理时间": "2026-05-08T14:29:34.917038"
+    },
+    {
+      "目录ID": "3489",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "4b7ecff2-6392-430b-8128-4c7772462365.docx",
+      "新文件名": "3489_4b7ecff2-6392-430b-8128-4c7772462365.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:(公司|集团).{0,10}施工方案.{0,10}(审核|审查|评审).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 2,
+      "文件大小_MB": 0.01,
+      "备注": "Top5中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\3489\\4b7ecff2-6392-430b-8128-4c7772462365.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\3489_4b7ecff2-6392-430b-8128-4c7772462365.docx",
+      "处理时间": "2026-05-08T14:29:41.204484"
+    },
+    {
+      "目录ID": "3538",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "9014c392-6ceb-4fc5-a5b8-81cdd9f52793.docx",
+      "新文件名": "3538_9014c392-6ceb-4fc5-a5b8-81cdd9f52793.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 1,
+      "文件大小_MB": 23.34,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\3538\\9014c392-6ceb-4fc5-a5b8-81cdd9f52793.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\3538_9014c392-6ceb-4fc5-a5b8-81cdd9f52793.docx",
+      "处理时间": "2026-05-08T14:29:36.899296"
+    },
+    {
+      "目录ID": "4082",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "0d5d4d45-69e9-4ca1-b513-5bfafaefd11d.docx",
+      "新文件名": "4082_0d5d4d45-69e9-4ca1-b513-5bfafaefd11d.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:专家.{0,12}论证.{0,12}(意见|记录|结论|说明|建议|纪要|报告|审核表|论证表|结论表)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\4082\\0d5d4d45-69e9-4ca1-b513-5bfafaefd11d.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\4082_0d5d4d45-69e9-4ca1-b513-5bfafaefd11d.docx",
+      "处理时间": "2026-05-08T14:29:44.841417"
+    },
+    {
+      "目录ID": "3861",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "b8b8352b-977f-4830-b28d-a8235c77b81e.docx",
+      "新文件名": "3861_b8b8352b-977f-4830-b28d-a8235c77b81e.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 1,
+      "文件大小_MB": 30.43,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\3861\\b8b8352b-977f-4830-b28d-a8235c77b81e.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\3861_b8b8352b-977f-4830-b28d-a8235c77b81e.docx",
+      "处理时间": "2026-05-08T14:29:46.472271"
+    },
+    {
+      "目录ID": "5057",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "52ab633d-ddc7-4851-9565-b3e6be30c255.docx",
+      "新文件名": "5057_52ab633d-ddc7-4851-9565-b3e6be30c255.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "施工方案审查意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.22,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\5057\\52ab633d-ddc7-4851-9565-b3e6be30c255.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\5057_52ab633d-ddc7-4851-9565-b3e6be30c255.docx",
+      "处理时间": "2026-05-08T14:29:52.416618"
+    },
+    {
+      "目录ID": "5423",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "bab16352-6031-4195-a85b-39a880809cde.docx",
+      "新文件名": "5423_bab16352-6031-4195-a85b-39a880809cde.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:(公司|集团).{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.02,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\5423\\bab16352-6031-4195-a85b-39a880809cde.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\5423_bab16352-6031-4195-a85b-39a880809cde.docx",
+      "处理时间": "2026-05-08T14:29:52.819688"
+    },
+    {
+      "目录ID": "5629",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "2eb1f5a0-2628-4bc8-a000-00c98d69ddd8.docx",
+      "新文件名": "5629_2eb1f5a0-2628-4bc8-a000-00c98d69ddd8.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:施工方案.{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\5629\\2eb1f5a0-2628-4bc8-a000-00c98d69ddd8.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\5629_2eb1f5a0-2628-4bc8-a000-00c98d69ddd8.docx",
+      "处理时间": "2026-05-08T14:29:52.896714"
+    }
+  ],
+  "stats": {
+    "total_directories": 643,
+    "expert_success_count": 120,
+    "company_success_count": 151,
+    "expert_phase2_success_count": 0,
+    "company_phase2_success_count": 0,
+    "expert_top5_match_count": 0,
+    "company_top5_match_count": 0,
+    "expert_others_match_count": 0,
+    "company_others_match_count": 0,
+    "expert_no_match_count": 0,
+    "company_no_match_count": 0
+  },
+  "phase": 2,
+  "total_directories": 643,
+  "last_update": "2026-05-08T14:29:55.981151"
+}

+ 771 - 0
src/app/scripts/md2excel_extractor_company.py

@@ -0,0 +1,771 @@
+#!/usr/bin/env python3
+"""
+md2excel_company: Markdown 公司集团评审意见文档批量提取工具
+
+功能说明:
+    遍历文件夹中的 Markdown 文档,使用大模型语义理解提取项目名称、
+    方案名称和公司集团评审意见,写入 Excel 汇总表。
+
+用法:
+    # 方式1:使用默认路径(无需参数)
+    python md2excel_extractor_company.py
+    
+    # 方式2:自定义路径
+    python md2excel_extractor_company.py <源文件夹路径> <输出Excel路径>
+
+示例:
+    python md2excel_extractor_company.py D:/公司集团评审意见/temp D:/汇总表.xlsx
+
+目录结构要求:
+    支持两种结构,自动识别:
+    
+    结构1(旧):
+    源文件夹/
+    ├── 子文件夹1/
+    │   └── auto/
+    │       └── xxx.md
+    ├── 子文件夹2/
+    │   └── auto/
+    │       └── yyy.md
+    └── ...
+    
+    结构2(新):
+    源文件夹/
+    ├── 子文件夹1/
+    │   └── xxx.md
+    ├── 子文件夹2/
+    │   └── subfolder/
+    │       └── yyy.md
+    └── ...
+"""
+
+import os
+import sys
+import json
+import time
+import re
+import requests
+from pathlib import Path
+from typing import List, Dict, Optional, Any
+from dataclasses import dataclass
+
+from openpyxl import Workbook, load_workbook
+from openpyxl.styles import Font, Alignment, Border, Side, PatternFill
+
+
+# ==================== 配置区域 ====================
+
+
+# ==================== LLM API 配置 ====================
+# 本地部署的大模型 API 配置
+LLM_API_URL = "http://183.220.37.46:25423/v1/chat/completions"
+LLM_API_KEY = "sk_prod_SELVoIV1d3gku28koH_ONg8L_B2cQis__71f55615"
+LLM_MODEL = "/model/Qwen3.5-122B-A10B"
+LLM_TEMPERATURE = 0.0      # 信息提取任务建议用 0,确保结果稳定可复现
+LLM_MAX_TOKENS = 8192      # 公司集团评审意见说明可能很长,建议设为 8192 或更大(原 512 可能不够)
+LLM_TIMEOUT = 120          # API 请求超时时间(秒)
+
+# Excel 列配置
+EXCEL_HEADERS = ["文件名称", "项目名称", "方案名称", "公司集团评审意见说明"]
+
+# 列宽配置
+COLUMN_WIDTHS = {
+    'A': 45,  # 文件名称
+    'B': 50,  # 项目名称
+    'C': 55,  # 方案名称
+    'D': 120, # 公司集团评审意见说明
+}
+
+# 数据行高度
+ROW_HEIGHT = 180
+
+# API 请求间隔(秒)
+API_DELAY = 0.5
+
+# 单文件最大读取字符数(控制 token 消耗)
+MAX_CONTENT_LENGTH = 12000
+
+# 默认路径配置(当不传入命令行参数时使用)
+DEFAULT_SOURCE_DIR = r"E:\提供的原始文件\原始文件\PDF分类结果_服务器MinerU版\公司集团评审意见说明\公司集团评审意见5.11ouput"
+DEFAULT_OUTPUT_FILE = r"E:\提供的原始文件\原始文件\PDF分类结果_服务器MinerU版\公司集团评审意见说明\公司集团评审意见汇总表5.11.xlsx"
+
+
+# ==================== 数据模型 ====================
+
+@dataclass
+class ExtractedInfo:
+    """提取的信息结构"""
+    file_name: str
+    project_name: str
+    plan_name: str
+    company_opinion: str
+
+
+# ==================== 大模型调用实现 ====================
+
+def call_llm_api(prompt: str) -> str:
+    """
+    调用本地部署的大模型 API 进行文本理解和信息提取
+    
+    API 端点: http://183.220.37.46:25423/v1/chat/completions
+    模型: /model/Qwen3.5-122B-A10B
+    
+    Args:
+        prompt: 完整的提示词文本(已包含待分析的文档内容)
+    
+    Returns:
+        大模型返回的文本结果(应为 JSON 格式字符串)
+    
+    Raises:
+        requests.RequestException: HTTP 请求失败
+        json.JSONDecodeError: 响应 JSON 解析失败
+        KeyError: 响应格式不符合预期
+    """
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {LLM_API_KEY}"
+    }
+    
+    payload = {
+        "model": LLM_MODEL,
+        "messages": [{"role": "user", "content": prompt}],
+        "temperature": LLM_TEMPERATURE,
+        "max_tokens": LLM_MAX_TOKENS
+    }
+    
+    try:
+        response = requests.post(
+            LLM_API_URL,
+            headers=headers,
+            json=payload,
+            timeout=LLM_TIMEOUT
+        )
+        response.raise_for_status()
+        
+        result = response.json()
+        
+        # 解析 OpenAI 兼容格式的响应
+        # 格式: {"choices": [{"message": {"content": "..."}}]}
+        if "choices" not in result or not result["choices"]:
+            raise KeyError(f"响应中未找到 'choices' 字段: {result.keys()}")
+        
+        message = result["choices"][0].get("message", {})
+        content = message.get("content", "").strip()
+        
+        if not content:
+            raise ValueError("模型返回内容为空")
+        
+        return content
+    
+    except requests.exceptions.ConnectionError as e:
+        raise ConnectionError(
+            f"无法连接到本地 LLM 服务 ({LLM_API_URL}),请确认服务已启动。\n"
+            f"原始错误: {e}"
+        )
+    except requests.exceptions.Timeout:
+        raise TimeoutError(
+            f"请求本地 LLM 服务超时 (>{LLM_TIMEOUT}秒),请检查模型是否过载或增大 LLM_TIMEOUT 配置。"
+        )
+    except requests.exceptions.HTTPError as e:
+        raise RuntimeError(
+            f"LLM API 返回 HTTP 错误: {e.response.status_code}\n"
+            f"响应内容: {e.response.text[:500]}"
+        )
+
+
+# ==================== 提示词模板 ====================
+
+def build_extraction_prompt(content: str) -> str:
+    """
+    构建用于大模型信息提取的详细提示词
+    
+    此提示词经过精心设计,包含:
+    - 角色设定: 让模型理解其作为文档分析专家的身份
+    - 任务说明: 明确需要提取的三个核心字段
+    - 提取规则: 详细的字段定位和推断规则
+    - 输出格式: 严格的 JSON 格式要求
+    - 容错处理: 信息缺失时的标注规范
+    - 示例说明: 帮助模型理解期望的输出形式
+    
+    Args:
+        content: Markdown 文档的原始内容
+    
+    Returns:
+        完整的提示词文本
+    """
+    
+    # 截取内容,避免超出模型上下文长度
+    truncated_content = content[:MAX_CONTENT_LENGTH]
+    if len(content) > MAX_CONTENT_LENGTH:
+        truncated_content += "\n\n... [文档内容已截断,剩余部分省略]"
+    
+    prompt = f"""你是一位资深的工程文档分析专家,擅长从施工方案评审意见文档中提取结构化信息。
+
+## 任务说明
+
+请仔细阅读以下 Markdown 格式的施工方案公司集团评审意见文档,从中提取三个关键字段的信息。
+
+## 提取字段及规则
+
+### 1. 项目名称
+**定义**: 该施工方案所对应的工程项目名称。
+
+**提取规则**(按优先级排序):
+- 优先从文档中的表格字段提取,查找包含以下关键词的单元格:
+  * "项目名称"
+  * "工程名称"
+  * "工程全称"
+  * "建设项目名称"
+  * "标段名称"
+  
+- 如果表格中没有明确字段,从文档标题、页眉或正文开头部分语义推断。
+  通常项目名称会出现在文档的显著位置,格式如:
+  * "XX高速公路XX标段"
+  * "XX大桥工程"
+  * "XX隧道工程"
+  * "XX合同段"
+
+- 如果确实无法确定,标注为"未明确"。
+
+### 2. 方案名称
+**定义**: 该文档所涉及的专项施工方案名称。
+
+**提取规则**(按优先级排序):
+- 优先从文档中的表格字段提取,查找包含以下关键词的单元格:
+  * "方案名称"
+  * "专项方案名称"
+  * "危险性较大分项工程名称"
+  * "分部分项工程名称"
+  * "施工方案名称"
+  
+- 如果表格中没有明确字段,从文档标题中推断。
+  方案名称通常包含以下关键词:
+  * "施工方案"
+  * "专项方案"
+  * "施工组织设计"
+  * "安全专项方案"
+  * "技术方案"
+
+- 注意区分"项目名称"和"方案名称":
+  * 项目名称:宏观的工程名称(如"XX高速公路")
+  * 方案名称:具体的施工方案(如"XX大桥桩基施工方案")
+
+- 如果确实无法确定,标注为"未明确"。
+
+### 3. 公司集团评审意见说明
+**定义**: 整合后的公司集团评审意见及修改回复内容。
+
+**提取规则**(按优先级排序):
+- 从以下命名的章节或表格中提取:
+  * "公司集团评审意见"
+  * "公司集团审查意见"
+  * "公司集团审核意见"
+  * "公司集团论证意见"
+  * "公司/集团意见及回复"
+  * "审查意见及修改回复"
+  * "审核意见及修改情况"
+  * "意见与建议"
+  * "公司集团评审会议纪要"
+  * "公司集团评审报告"
+
+- 内容整合要求:
+  * 将"公司集团评审意见/审查意见/审核意见"与"修改回复/修改情况/回复说明"进行配对整合
+  * 保留原始的公司集团评审意见原文
+  * 保留对应的修改回复或整改措施
+  * 如果有多位评审人员的意见,按顺序列出
+  * 如果公司集团评审意见与回复分散在文档不同位置,需要将它们关联起来
+
+- 格式要求:
+  * 使用清晰的编号列出每条公司集团评审意见及其回复
+  * 保留关键的专业术语和数据
+  * 如果原文有表格形式,转换为文本描述
+  * 每条意见格式建议:"意见X: [公司集团评审原文意见] -> 回复: [施工单位回复内容]"
+
+- 如果确实无法提取到公司集团评审意见内容,标注为"未明确"。
+
+## 输出格式要求
+
+必须以严格的 JSON 格式返回,不要包含任何其他解释文字:
+
+```json
+{{
+  "项目名称": "提取到的项目名称或'未明确'",
+  "方案名称": "提取到的方案名称或'未明确'",
+  "公司集团评审意见说明": "整合后的公司集团评审意见与回复内容,或'未明确'"
+}}
+```
+
+## 注意事项
+
+1. **语义理解优先**: 不要依赖固定的正则表达式,而是通过理解文档内容的语义来提取信息。
+2. **容错处理**: 即使文档格式不标准、表格缺失或字段名称不同,也要尝试从上下文中推断。
+3. **信息整合**: 对于分散在文档各处的公司集团评审意见和回复,需要整合成完整的记录。
+4. **不要编造**: 如果某项信息确实无法从文档中确定,必须标注为"未明确",严禁编造或猜测。
+5. **保持简洁**: 公司集团评审意见说明的内容可以适当精简,但要保留核心观点和关键数据。
+
+## 待分析文档
+
+```markdown
+{truncated_content}
+```
+
+请直接返回 JSON 格式的提取结果:"""
+    
+    return prompt
+
+
+# ==================== 文件处理 ====================
+
+def read_md_files(root_dir: str) -> List[Dict[str, str]]:
+    """
+    遍历文件夹,读取所有 md 文件内容
+    
+    支持两种目录结构:
+    结构1(旧):
+        root_dir/
+        ├── folder_1/
+        │   └── auto/
+        │       └── xxx.md
+        └── ...
+    
+    结构2(新):
+        root_dir/
+        ├── folder_1/
+        │   └── xxx.md
+        ├── folder_2/
+        │   └── subfolder/
+        │       └── yyy.md
+        └── ...
+    
+    搜索策略:
+        1. 优先在每个子文件夹的 auto/ 子目录中查找(兼容旧结构)
+        2. 如果没有 auto/,则在该子文件夹及其所有嵌套子文件夹中递归查找
+        3. 每个顶层子文件夹只取第一个找到的 md 文件
+    
+    Args:
+        root_dir: 源文件夹根目录路径
+    
+    Returns:
+        包含文件信息的字典列表,每个字典包含:
+        - file_name: 子文件夹名称
+        - content: md 文件内容
+    """
+    md_contents = []
+    root_path = Path(root_dir)
+    
+    if not root_path.exists():
+        raise FileNotFoundError(f"源文件夹不存在: {root_dir}")
+    
+    # 遍历所有子文件夹
+    for folder_path in sorted(root_path.iterdir()):
+        if not folder_path.is_dir():
+            continue
+        
+        md_file = None
+        
+        # 策略1: 优先查找 auto/ 子目录(兼容旧结构)
+        auto_dir = folder_path / "auto"
+        if auto_dir.exists() and auto_dir.is_dir():
+            md_files = list(auto_dir.glob("*.md"))
+            if md_files:
+                md_file = md_files[0]
+                print(f"  [已读取] {folder_path.name}/auto -> {md_file.name}")
+        
+        # 策略2: 如果没有 auto/ 或 auto/ 中没有 md,则在当前子文件夹中递归查找
+        if md_file is None:
+            # 使用 rglob 递归查找所有 .md 文件
+            all_md_files = list(folder_path.rglob("*.md"))
+            if all_md_files:
+                md_file = all_md_files[0]
+                relative_path = md_file.relative_to(folder_path)
+                print(f"  [已读取] {folder_path.name}/{relative_path} -> {md_file.name}")
+        
+        # 如果还是没找到,跳过
+        if md_file is None:
+            print(f"  [跳过] 未找到 md 文件: {folder_path.name}")
+            continue
+        
+        # 读取 md 文件
+        try:
+            content = md_file.read_text(encoding="utf-8")
+            md_contents.append({
+                "file_name": folder_path.name,
+                "content": content,
+                "file_path": str(md_file)
+            })
+        except Exception as e:
+            print(f"  [错误] 读取文件失败 {md_file}: {e}")
+            continue
+    
+    return md_contents
+
+
+def parse_llm_response(response_text: str) -> Dict[str, str]:
+    """
+    解析大模型返回的 JSON 响应
+
+    Args:
+        response_text: 大模型返回的原始文本
+
+    Returns:
+        解析后的字典,包含提取的字段
+    """
+    placeholder_keywords = ["提取到的", "或'", "...", "示例", "Extract", "Template"]
+
+    def is_valid(data: dict) -> bool:
+        pn = data.get("项目名称", "")
+        sn = data.get("方案名称", "")
+        return not any(kw in str(pn) or kw in str(sn) for kw in placeholder_keywords)
+
+    # 策略1: 提取所有 ```json ... ``` 代码块,取最后一个能成功解析的
+    json_blocks = re.findall(r'```json\s*(.*?)\s*```', response_text, re.DOTALL)
+    for block_text in reversed(json_blocks):
+        try:
+            data = json.loads(block_text)
+            if isinstance(data, dict) and "项目名称" in data and is_valid(data):
+                return data
+        except json.JSONDecodeError:
+            continue
+
+    # 策略2: 用正则匹配包含三个目标键的 JSON 对象
+    pattern = (
+        r'\{'
+        r'\s*"项目名称"\s*:\s*"((?:[^"\\]|\\.)*)"\s*,'
+        r'\s*"方案名称"\s*:\s*"((?:[^"\\]|\\.)*)"\s*,'
+        r'\s*"公司集团评审意见说明"\s*:\s*"((?:[^"\\]|\\.)*)"'
+        r'\s*\}'
+    )
+    matches = re.findall(pattern, response_text, re.DOTALL)
+    if matches:
+        pn, sn, eo = matches[-1]
+        return {
+            "项目名称": pn.strip(),
+            "方案名称": sn.strip(),
+            "公司集团评审意见说明": eo.strip()
+        }
+
+    # 策略3: 大括号深度匹配(兜底)
+    text = re.sub(r'<think>.*?</think>', '', response_text, flags=re.DOTALL)
+    json_objects = _extract_json_by_brace_company(text)
+
+    for obj in json_objects:
+        try:
+            data = json.loads(obj)
+            if isinstance(data, dict) and "项目名称" in data and is_valid(data):
+                return data
+        except json.JSONDecodeError:
+            continue
+
+    print(f"  [警告] 无法解析 JSON 响应,使用原始文本")
+    return {
+        "项目名称": "解析失败",
+        "方案名称": "解析失败",
+        "公司集团评审意见说明": response_text
+    }
+
+
+def _extract_json_by_brace_company(text: str) -> list:
+    """通过大括号深度匹配提取 JSON 对象列表。"""
+    brace_depth = 0
+    json_objects = []
+    start_idx = -1
+
+    for i, char in enumerate(text):
+        if char == '{':
+            if brace_depth == 0:
+                start_idx = i
+            brace_depth += 1
+        elif char == '}':
+            brace_depth -= 1
+            if brace_depth == 0 and start_idx != -1:
+                json_objects.append(text[start_idx:i+1])
+                start_idx = -1
+
+    return json_objects
+
+
+def extract_info_with_llm(content: str) -> Dict[str, str]:
+    """
+    使用大模型从文档中提取信息
+    
+    Args:
+        content: Markdown 文档内容
+    
+    Returns:
+        包含提取字段的字典
+    """
+    prompt = build_extraction_prompt(content)
+    
+    try:
+        response_text = call_llm_api(prompt)
+        extracted = parse_llm_response(response_text)
+        
+        # 确保所有必要字段存在
+        return {
+            "项目名称": extracted.get("项目名称", "未明确").strip(),
+            "方案名称": extracted.get("方案名称", "未明确").strip(),
+            "公司集团评审意见说明": extracted.get("公司集团评审意见说明", "未明确").strip()
+        }
+    
+    except Exception as e:
+        print(f"  [错误] LLM 提取失败: {e}")
+        return {
+            "项目名称": f"提取失败: {str(e)[:50]}",
+            "方案名称": f"提取失败: {str(e)[:50]}",
+            "公司集团评审意见说明": f"提取失败: {str(e)}"
+        }
+
+
+# ==================== Excel 生成 ====================
+
+def _init_excel_styles(ws):
+    """初始化 Excel 表头和列宽样式"""
+    # 设置表头样式
+    header_fill = PatternFill(
+        start_color="4472C4",
+        end_color="4472C4",
+        fill_type="solid"
+    )
+    header_font = Font(color="FFFFFF", bold=True, size=12)
+    header_align = Alignment(horizontal="center", vertical="center", wrap_text=True)
+    
+    for col_num, header in enumerate(EXCEL_HEADERS, 1):
+        cell = ws.cell(row=1, column=col_num)
+        cell.value = header
+        cell.fill = header_fill
+        cell.font = header_font
+        cell.alignment = header_align
+    
+    # 设置列宽
+    for col, width in COLUMN_WIDTHS.items():
+        ws.column_dimensions[col].width = width
+    
+    # 冻结首行
+    ws.freeze_panes = 'A2'
+
+
+def _apply_row_style(ws, row_num: int):
+    """为指定行应用数据样式(边框、对齐、行高)"""
+    thin_border = Border(
+        left=Side(style='thin'),
+        right=Side(style='thin'),
+        top=Side(style='thin'),
+        bottom=Side(style='thin')
+    )
+    
+    for col in range(1, len(EXCEL_HEADERS) + 1):
+        cell = ws.cell(row=row_num, column=col)
+        cell.border = thin_border
+        cell.alignment = Alignment(vertical="top", wrap_text=True)
+    
+    ws.row_dimensions[row_num].height = ROW_HEIGHT
+
+
+def append_to_excel(row_data: Dict[str, str], output_file: str):
+    """
+    追加单条数据到 Excel 文件
+    
+    如果文件不存在则创建新文件(含表头),如果存在则在末尾追加。
+    每次追加后立即保存,确保中断不丢失已处理数据。
+    
+    Args:
+        row_data: 单条数据字典
+        output_file: 输出 Excel 文件路径
+    """
+    output_path = Path(output_file)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    
+    if output_path.exists():
+        # 文件已存在,加载并追加
+        wb = load_workbook(output_file)
+        ws = wb.active
+        next_row = ws.max_row + 1
+    else:
+        # 文件不存在,创建新文件
+        wb = Workbook()
+        ws = wb.active
+        ws.title = "公司集团评审意见汇总"
+        _init_excel_styles(ws)
+        next_row = 2
+    
+    # 写入数据
+    ws.append([
+        row_data.get("文件名称", ""),
+        row_data.get("项目名称", ""),
+        row_data.get("方案名称", ""),
+        row_data.get("公司集团评审意见说明", "")
+    ])
+    
+    # 应用样式到新行
+    _apply_row_style(ws, next_row)
+    
+    # 立即保存
+    wb.save(output_file)
+
+
+def create_excel(data_rows: List[Dict[str, str]], output_file: str):
+    """
+    创建格式化的 Excel 文件(全量写入,用于最终汇总)
+    
+    包含以下样式:
+    - 蓝色表头背景 + 白色粗体文字
+    - 所有单元格细边框
+    - 自动换行
+    - 首行冻结
+    - 指定列宽和行高
+    
+    Args:
+        data_rows: 数据行列表,每行是一个字典
+        output_file: 输出 Excel 文件路径
+    """
+    output_path = Path(output_file)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    
+    wb = Workbook()
+    ws = wb.active
+    ws.title = "公司集团评审意见汇总"
+    
+    # 初始化样式
+    _init_excel_styles(ws)
+    
+    # 添加数据行
+    for row_data in data_rows:
+        ws.append([
+            row_data.get("文件名称", ""),
+            row_data.get("项目名称", ""),
+            row_data.get("方案名称", ""),
+            row_data.get("公司集团评审意见说明", "")
+        ])
+    
+    # 应用样式到所有数据行
+    for row_num in range(2, ws.max_row + 1):
+        _apply_row_style(ws, row_num)
+    
+    # 保存文件
+    wb.save(output_file)
+    
+    print(f"\n✅ 已成功保存到: {output_file}")
+    print(f"📊 共写入 {len(data_rows)} 条记录")
+
+
+# ==================== 主流程 ====================
+
+def main():
+    """主函数"""
+    # 解析命令行参数
+    if len(sys.argv) >= 3:
+        # 使用命令行传入的参数
+        root_dir = sys.argv[1]
+        output_file = sys.argv[2]
+        print("✅ 使用命令行传入的路径")
+    else:
+        # 使用默认路径
+        root_dir = DEFAULT_SOURCE_DIR
+        output_file = DEFAULT_OUTPUT_FILE
+        print("⚠️  未提供命令行参数,使用默认路径")
+        print(f"    如需自定义路径,请运行: python md2excel_extractor_company.py <源文件夹> <输出Excel>")
+    
+    # 验证源目录
+    if not os.path.isdir(root_dir):
+        print(f"错误: 源文件夹不存在: {root_dir}")
+        sys.exit(1)
+    
+    print("=" * 70)
+    print("Markdown 公司集团评审意见文档批量提取工具")
+    print("=" * 70)
+    print(f"\n📁 源文件夹: {root_dir}")
+    print(f"📄 输出文件: {output_file}")
+    
+    # 读取 md 文件
+    print(f"\n【步骤 1/3】扫描并读取 Markdown 文件...")
+    try:
+        md_contents = read_md_files(root_dir)
+    except Exception as e:
+        print(f"错误: 读取文件失败: {e}")
+        sys.exit(1)
+    
+    if not md_contents:
+        print("未找到任何有效的 md 文件,请检查目录结构")
+        sys.exit(1)
+    
+    print(f"\n✅ 共找到 {len(md_contents)} 个有效文档")
+    
+    # 使用大模型提取信息
+    print(f"\n【步骤 2/3】使用大模型提取信息...")
+    print(f"  LLM 端点: {LLM_API_URL}")
+    print(f"  模型: {LLM_MODEL}")
+    print(f"  Temperature: {LLM_TEMPERATURE} | Max tokens: {LLM_MAX_TOKENS}")
+    print(f"  💡 每处理完一个文件会立即追加写入 Excel,支持断点续传\n")
+    
+    # 检查是否已有进度(Excel 文件已存在)
+    output_path = Path(output_file)
+    processed_files = set()
+    if output_path.exists():
+        try:
+            wb = load_workbook(output_file)
+            ws = wb.active
+            # 读取已处理的文件名称(第1列,从第2行开始)
+            for row in ws.iter_rows(min_row=2, values_only=True):
+                if row and row[0]:
+                    processed_files.add(row[0])
+            print(f"  📋 检测到已有进度,已处理 {len(processed_files)} 个文件,将跳过这些文件")
+        except Exception:
+            pass
+    
+    data_rows = []
+    total = len(md_contents)
+    processed_count = 0
+    
+    for i, item in enumerate(md_contents, 1):
+        file_name = item['file_name']
+        
+        # 跳过已处理的文件
+        if file_name in processed_files:
+            print(f"[{i}/{total}] ⏭️  跳过已处理: {file_name}")
+            continue
+        
+        print(f"[{i}/{total}] 正在处理: {file_name}")
+        
+        try:
+            extracted = extract_info_with_llm(item['content'])
+            row_data = {
+                "文件名称": file_name,
+                "项目名称": extracted["项目名称"],
+                "方案名称": extracted["方案名称"],
+                "公司集团评审意见说明": extracted["公司集团评审意见说明"]
+            }
+            data_rows.append(row_data)
+            
+            # 立即追加写入 Excel
+            append_to_excel(row_data, output_file)
+            processed_count += 1
+            print(f"  ✅ 提取完成并已写入 Excel")
+            
+        except Exception as e:
+            print(f"  ❌ 处理失败: {e}")
+            row_data = {
+                "文件名称": file_name,
+                "项目名称": "处理异常",
+                "方案名称": "处理异常",
+                "公司集团评审意见说明": f"处理异常: {str(e)}"
+            }
+            data_rows.append(row_data)
+            append_to_excel(row_data, output_file)
+            processed_count += 1
+        
+        # API 调用间隔,避免请求过快
+        if i < total:
+            time.sleep(API_DELAY)
+    
+    # 生成最终汇总(可选:重新整理整个 Excel 确保格式一致)
+    print(f"\n【步骤 3/3】生成 Excel 汇总表...")
+    print(f"  本次新处理: {processed_count} 个文件")
+    print(f"  总计写入: {len(processed_files) + processed_count} 个文件")
+    
+    print("\n" + "=" * 70)
+    print("🎉 处理完成!")
+    print("=" * 70)
+
+
+if __name__ == "__main__":
+    main()

+ 159 - 88
src/app/scripts/md2excel_extractor.py → src/app/scripts/md2excel_extractor_expert.py

@@ -7,12 +7,19 @@ md2excel: Markdown 专家意见文档批量提取工具
     方案名称和专家意见,写入 Excel 汇总表。
     方案名称和专家意见,写入 Excel 汇总表。
 
 
 用法:
 用法:
+    # 方式1:使用默认路径(无需参数)
+    python md2excel_extractor.py
+    
+    # 方式2:自定义路径
     python md2excel_extractor.py <源文件夹路径> <输出Excel路径>
     python md2excel_extractor.py <源文件夹路径> <输出Excel路径>
 
 
 示例:
 示例:
     python md2excel_extractor.py D:/专家意见/temp D:/汇总表.xlsx
     python md2excel_extractor.py D:/专家意见/temp D:/汇总表.xlsx
 
 
 目录结构要求:
 目录结构要求:
+    支持两种结构,自动识别:
+    
+    结构1(旧):
     源文件夹/
     源文件夹/
     ├── 子文件夹1/
     ├── 子文件夹1/
     │   └── auto/
     │   └── auto/
@@ -21,6 +28,15 @@ md2excel: Markdown 专家意见文档批量提取工具
     │   └── auto/
     │   └── auto/
     │       └── yyy.md
     │       └── yyy.md
     └── ...
     └── ...
+    
+    结构2(新):
+    源文件夹/
+    ├── 子文件夹1/
+    │   └── xxx.md
+    ├── 子文件夹2/
+    │   └── subfolder/
+    │       └── yyy.md
+    └── ...
 """
 """
 
 
 import os
 import os
@@ -39,6 +55,16 @@ from openpyxl.styles import Font, Alignment, Border, Side, PatternFill
 
 
 # ==================== 配置区域 ====================
 # ==================== 配置区域 ====================
 
 
+
+# ==================== LLM API 配置 ====================
+# 本地部署的大模型 API 配置
+LLM_API_URL = "http://183.220.37.46:25423/v1/chat/completions"
+LLM_API_KEY = "sk_prod_SELVoIV1d3gku28koH_ONg8L_B2cQis__71f55615"
+LLM_MODEL = "/model/Qwen3.5-122B-A10B"
+LLM_TEMPERATURE = 0.0      # 信息提取任务建议用 0,确保结果稳定可复现
+LLM_MAX_TOKENS = 8192      # 专家意见回复可能很长,建议设为 8192 或更大(原 512 可能不够)
+LLM_TIMEOUT = 120          # API 请求超时时间(秒)
+
 # Excel 列配置
 # Excel 列配置
 EXCEL_HEADERS = ["文件名称", "项目名称", "方案名称", "专项方案专家评审意见回复表"]
 EXCEL_HEADERS = ["文件名称", "项目名称", "方案名称", "专项方案专家评审意见回复表"]
 
 
@@ -59,14 +85,9 @@ API_DELAY = 0.5
 # 单文件最大读取字符数(控制 token 消耗)
 # 单文件最大读取字符数(控制 token 消耗)
 MAX_CONTENT_LENGTH = 12000
 MAX_CONTENT_LENGTH = 12000
 
 
-# ==================== LLM API 配置 ====================
-# 本地部署的大模型 API 配置
-LLM_API_URL = "http://localhost:25423/v1/chat/completions"
-LLM_API_KEY = "sk_prod_SELVoIV1d3gku28koH_ONg8L_B2cQis__71f55615"
-LLM_MODEL = "/model/Qwen3.5-122B-A10B"
-LLM_TEMPERATURE = 0.0      # 信息提取任务建议用 0,确保结果稳定可复现
-LLM_MAX_TOKENS = 8192      # 专家意见回复可能很长,建议设为 8192 或更大(原 512 可能不够)
-LLM_TIMEOUT = 120          # API 请求超时时间(秒)
+# 默认路径配置(当不传入命令行参数时使用)
+DEFAULT_SOURCE_DIR = r"E:\提供的原始文件\原始文件\PDF分类结果_服务器MinerU版\专家评审意见_记录\专家意见5.11output"
+DEFAULT_OUTPUT_FILE = r"E:\提供的原始文件\原始文件\PDF分类结果_服务器MinerU版\专家评审意见_记录\专家意见汇总表5.11.xlsx"
 
 
 
 
 # ==================== 数据模型 ====================
 # ==================== 数据模型 ====================
@@ -86,7 +107,7 @@ def call_llm_api(prompt: str) -> str:
     """
     """
     调用本地部署的大模型 API 进行文本理解和信息提取
     调用本地部署的大模型 API 进行文本理解和信息提取
     
     
-    API 端点: http://localhost:25423/v1/chat/completions
+    API 端点: http://183.220.37.46:25423/v1/chat/completions
     模型: /model/Qwen3.5-122B-A10B
     模型: /model/Qwen3.5-122B-A10B
     
     
     Args:
     Args:
@@ -291,22 +312,117 @@ def build_extraction_prompt(content: str) -> str:
     return prompt
     return prompt
 
 
 
 
+def parse_llm_response(response_text: str) -> Dict[str, str]:
+    """
+    解析大模型返回的 JSON 响应。
+
+    Qwen3 模型的输出结构:
+    1. Thinking Process 思考过程(可能含 JSON 示例)
+    2. 正式回答:```json 代码块
+
+    策略:从后往前遍历 ```json 代码块,返回第一个有效且非占位符的。
+    """
+    placeholder_keywords = ["提取到的", "或'", "...", "示例", "Extract", "Template"]
+
+    def is_valid(data: dict) -> bool:
+        pn = data.get("项目名称", "")
+        sn = data.get("方案名称", "")
+        return not any(kw in str(pn) or kw in str(sn) for kw in placeholder_keywords)
+
+    # 策略1: 提取所有 ```json ... ``` 代码块,从后往前找第一个有效的
+    json_blocks = re.findall(r'```json\s*(.*?)\s*```', response_text, re.DOTALL)
+    for block_text in reversed(json_blocks):
+        try:
+            data = json.loads(block_text)
+            if isinstance(data, dict) and "项目名称" in data and is_valid(data):
+                return data
+        except json.JSONDecodeError:
+            continue
+
+    # 策略2: 用正则直接匹配包含三个目标键的 JSON 对象
+    pattern = (
+        r'\{'
+        r'\s*"项目名称"\s*:\s*"((?:[^"\\]|\\.)*)"\s*,'
+        r'\s*"方案名称"\s*:\s*"((?:[^"\\]|\\.)*)"\s*,'
+        r'\s*"专项方案专家评审意见回复表"\s*:\s*"((?:[^"\\]|\\.)*)"'
+        r'\s*\}'
+    )
+    matches = re.findall(pattern, response_text, re.DOTALL)
+    for pn, sn, eo in reversed(matches):
+        if not any(kw in pn or kw in sn for kw in placeholder_keywords):
+            return {
+                "项目名称": pn.strip(),
+                "方案名称": sn.strip(),
+                "专项方案专家评审意见回复表": eo.strip()
+            }
+
+    # 策略3: 大括号深度匹配(兜底)
+    text = re.sub(r'<think>.*?</think>', '', response_text, flags=re.DOTALL)
+    json_objects = _extract_json_by_brace(text)
+    for obj in json_objects:
+        try:
+            data = json.loads(obj)
+            if isinstance(data, dict) and "项目名称" in data and is_valid(data):
+                return data
+        except json.JSONDecodeError:
+            continue
+
+    print(f"  [警告] 无法解析 JSON 响应,使用原始文本")
+    return {
+        "项目名称": "解析失败",
+        "方案名称": "解析失败",
+        "专项方案专家评审意见回复表": response_text
+    }
+
+
+def _extract_json_by_brace(text: str) -> list:
+    """通过大括号深度匹配提取 JSON 对象列表。"""
+    brace_depth = 0
+    json_objects = []
+    start_idx = -1
+
+    for i, char in enumerate(text):
+        if char == '{':
+            if brace_depth == 0:
+                start_idx = i
+            brace_depth += 1
+        elif char == '}':
+            brace_depth -= 1
+            if brace_depth == 0 and start_idx != -1:
+                json_objects.append(text[start_idx:i+1])
+                start_idx = -1
+
+    return json_objects
+
+
 # ==================== 文件处理 ====================
 # ==================== 文件处理 ====================
 
 
 def read_md_files(root_dir: str) -> List[Dict[str, str]]:
 def read_md_files(root_dir: str) -> List[Dict[str, str]]:
     """
     """
     遍历文件夹,读取所有 md 文件内容
     遍历文件夹,读取所有 md 文件内容
     
     
-    目录结构要求:
+    支持两种目录结构:
+    结构1(旧):
         root_dir/
         root_dir/
         ├── folder_1/
         ├── folder_1/
         │   └── auto/
         │   └── auto/
         │       └── xxx.md
         │       └── xxx.md
+        └── ...
+    
+    结构2(新):
+        root_dir/
+        ├── folder_1/
+        │   └── xxx.md
         ├── folder_2/
         ├── folder_2/
-        │   └── auto/
+        │   └── subfolder/
         │       └── yyy.md
         │       └── yyy.md
         └── ...
         └── ...
     
     
+    搜索策略:
+        1. 优先在每个子文件夹的 auto/ 子目录中查找(兼容旧结构)
+        2. 如果没有 auto/,则在该子文件夹及其所有嵌套子文件夹中递归查找
+        3. 每个顶层子文件夹只取第一个找到的 md 文件
+    
     Args:
     Args:
         root_dir: 源文件夹根目录路径
         root_dir: 源文件夹根目录路径
     
     
@@ -326,20 +442,31 @@ def read_md_files(root_dir: str) -> List[Dict[str, str]]:
         if not folder_path.is_dir():
         if not folder_path.is_dir():
             continue
             continue
         
         
-        # 查找 auto 子目录
+        md_file = None
+        
+        # 策略1: 优先查找 auto/ 子目录(兼容旧结构)
         auto_dir = folder_path / "auto"
         auto_dir = folder_path / "auto"
-        if not auto_dir.exists() or not auto_dir.is_dir():
-            print(f"  [跳过] 未找到 auto 目录: {folder_path.name}")
-            continue
+        if auto_dir.exists() and auto_dir.is_dir():
+            md_files = list(auto_dir.glob("*.md"))
+            if md_files:
+                md_file = md_files[0]
+                print(f"  [已读取] {folder_path.name}/auto -> {md_file.name}")
+        
+        # 策略2: 如果没有 auto/ 或 auto/ 中没有 md,则在当前子文件夹中递归查找
+        if md_file is None:
+            # 使用 rglob 递归查找所有 .md 文件
+            all_md_files = list(folder_path.rglob("*.md"))
+            if all_md_files:
+                md_file = all_md_files[0]
+                relative_path = md_file.relative_to(folder_path)
+                print(f"  [已读取] {folder_path.name}/{relative_path} -> {md_file.name}")
         
         
-        # 查找 md 文件
-        md_files = list(auto_dir.glob("*.md"))
-        if not md_files:
-            print(f"  [跳过] auto 目录中无 md 文件: {folder_path.name}")
+        # 如果还是没找到,跳过
+        if md_file is None:
+            print(f"  [跳过] 未找到 md 文件: {folder_path.name}")
             continue
             continue
         
         
-        # 读取第一个 md 文件
-        md_file = md_files[0]
+        # 读取 md 文件
         try:
         try:
             content = md_file.read_text(encoding="utf-8")
             content = md_file.read_text(encoding="utf-8")
             md_contents.append({
             md_contents.append({
@@ -347,7 +474,6 @@ def read_md_files(root_dir: str) -> List[Dict[str, str]]:
                 "content": content,
                 "content": content,
                 "file_path": str(md_file)
                 "file_path": str(md_file)
             })
             })
-            print(f"  [已读取] {folder_path.name} -> {md_file.name}")
         except Exception as e:
         except Exception as e:
             print(f"  [错误] 读取文件失败 {md_file}: {e}")
             print(f"  [错误] 读取文件失败 {md_file}: {e}")
             continue
             continue
@@ -355,65 +481,6 @@ def read_md_files(root_dir: str) -> List[Dict[str, str]]:
     return md_contents
     return md_contents
 
 
 
 
-def parse_llm_response(response_text: str) -> Dict[str, str]:
-    """
-    解析大模型返回的 JSON 响应
-    
-    Args:
-        response_text: 大模型返回的原始文本
-    
-    Returns:
-        解析后的字典,包含提取的字段
-    """
-    def extract_json_objects(text):
-        brace_depth = 0
-        json_objects = []
-        start_idx = -1
-        
-        for i, char in enumerate(text):
-            if char == '{':
-                if brace_depth == 0:
-                    start_idx = i
-                brace_depth += 1
-            elif char == '}':
-                brace_depth -= 1
-                if brace_depth == 0 and start_idx != -1:
-                    json_objects.append(text[start_idx:i+1])
-                    start_idx = -1
-        
-        return json_objects
-    
-    json_objects = extract_json_objects(response_text)
-    
-    placeholder_keywords = ["提取到的", "或'", "...", "示例", "Extract", "Template"]
-    
-    for obj in json_objects:
-        try:
-            data = json.loads(obj)
-            if isinstance(data, dict) and "项目名称" in data:
-                project_name = data.get("项目名称", "")
-                plan_name = data.get("方案名称", "")
-                expert_opinion = data.get("专项方案专家评审意见回复表", "")
-                
-                is_placeholder = False
-                for keyword in placeholder_keywords:
-                    if keyword in str(project_name) or keyword in str(plan_name):
-                        is_placeholder = True
-                        break
-                
-                if not is_placeholder and len(str(expert_opinion)) > 50:
-                    return data
-        except json.JSONDecodeError:
-            continue
-    
-    print(f"  [警告] 无法解析 JSON 响应,使用原始文本")
-    return {
-        "项目名称": "解析失败",
-        "方案名称": "解析失败",
-        "专项方案专家评审意见回复表": response_text
-    }
-
-
 def extract_info_with_llm(content: str) -> Dict[str, str]:
 def extract_info_with_llm(content: str) -> Dict[str, str]:
     """
     """
     使用大模型从文档中提取信息
     使用大模型从文档中提取信息
@@ -583,13 +650,17 @@ def create_excel(data_rows: List[Dict[str, str]], output_file: str):
 def main():
 def main():
     """主函数"""
     """主函数"""
     # 解析命令行参数
     # 解析命令行参数
-    if len(sys.argv) < 3:
-        print("用法: python md2excel_extractor.py <源文件夹路径> <输出Excel路径>")
-        print("示例: python md2excel_extractor.py D:/专家意见/temp D:/汇总表.xlsx")
-        sys.exit(1)
-    
-    root_dir = sys.argv[1]
-    output_file = sys.argv[2]
+    if len(sys.argv) >= 3:
+        # 使用命令行传入的参数
+        root_dir = sys.argv[1]
+        output_file = sys.argv[2]
+        print("✅ 使用命令行传入的路径")
+    else:
+        # 使用默认路径
+        root_dir = DEFAULT_SOURCE_DIR
+        output_file = DEFAULT_OUTPUT_FILE
+        print("⚠️  未提供命令行参数,使用默认路径")
+        print(f"    如需自定义路径,请运行: python md2excel_extractor.py <源文件夹> <输出Excel>")
     
     
     # 验证源目录
     # 验证源目录
     if not os.path.isdir(root_dir):
     if not os.path.isdir(root_dir):