Przeglądaj źródła

fix:修复minerU转换脚本以及json解析

Meric 2 tygodni temu
rodzic
commit
a84d580059

+ 107 - 0
src/app/minerU/minerU.py

@@ -0,0 +1,107 @@
+#!/usr/bin/env python3
+import os
+import requests
+import zipfile
+import shutil
+from pathlib import Path
+from concurrent.futures import ThreadPoolExecutor, as_completed
+
+API_URL = "http://183.220.37.46:25428/file_parse"
+INPUT_DIR = Path(r"E:\提供的原始文件\原始文件\PDF分类结果_服务器MinerU版\公司集团评审意见说明\公司集团评审意见_input")
+OUTPUT_DIR = Path(r"E:\提供的原始文件\原始文件\PDF分类结果_服务器MinerU版\公司集团评审意见说明\公司集团评审意见_output")
+
+def parse_file(file_path):
+    filename = file_path.name
+    
+    try:
+        with open(file_path, 'rb') as f:
+            files = {
+                'files': (filename, f, 'application/pdf')
+            }
+            data = {
+                'return_md': 'true',
+                'response_format_zip': 'true',
+                'return_original_file': 'true',
+                'return_middle_json': 'true',
+                'return_content_list': 'true',
+                'return_images': 'true'
+            }
+            
+            print(f"Processing: {filename}")
+            response = requests.post(API_URL, files=files, data=data)
+            
+            if response.status_code == 200:
+                zip_filename = f"{file_path.stem}_result.zip"
+                zip_path = OUTPUT_DIR / zip_filename
+                extract_dir = OUTPUT_DIR / file_path.stem
+                
+                with open(zip_path, 'wb') as out_f:
+                    out_f.write(response.content)
+                
+                print(f"  Saved zip to: {zip_path}")
+                
+                extract_dir.mkdir(exist_ok=True)
+                with zipfile.ZipFile(zip_path, 'r') as zipf:
+                    zipf.extractall(extract_dir)
+                
+                nested_dir = extract_dir / file_path.stem
+                if nested_dir.exists() and nested_dir.is_dir():
+                    for item in nested_dir.iterdir():
+                        shutil.move(str(item), str(extract_dir / item.name))
+                    nested_dir.rmdir()
+                
+                os.remove(zip_path)
+                print(f"  Extracted to: {extract_dir}")
+                return (filename, True, None)
+            else:
+                error_msg = f"HTTP {response.status_code}: {response.text}"
+                print(f"  Error: {error_msg}")
+                return (filename, False, error_msg)
+    except Exception as e:
+        print(f"  Exception: {str(e)}")
+        return (filename, False, str(e))
+
+def main(max_workers=10):
+    INPUT_DIR.mkdir(exist_ok=True)
+    OUTPUT_DIR.mkdir(exist_ok=True)
+    
+    pdf_files = list(INPUT_DIR.glob("*.pdf"))
+    
+    if not pdf_files:
+        print("No PDF files found in input directory")
+        return
+    
+    print(f"Found {len(pdf_files)} PDF file(s)")
+    print(f"Processing with {max_workers} concurrent workers\n")
+    
+    with ThreadPoolExecutor(max_workers=max_workers) as executor:
+        futures = {executor.submit(parse_file, pdf_file): pdf_file for pdf_file in pdf_files}
+        
+        success_count = 0
+        fail_count = 0
+        failed_files = []
+        
+        for future in as_completed(futures):
+            filename, success, error = future.result()
+            if success:
+                success_count += 1
+            else:
+                fail_count += 1
+                failed_files.append((filename, error))
+    
+    print(f"\nDone! Success: {success_count}, Failed: {fail_count}")
+    
+    if failed_files:
+        print("\nFailed files:")
+        for filename, error in failed_files:
+            print(f"  - {filename}: {error}")
+
+if __name__ == "__main__":
+    import argparse
+    
+    parser = argparse.ArgumentParser(description='Parse PDF files using MineRU API')
+    parser.add_argument('-w', '--workers', type=int, default=10,
+                        help='Number of concurrent workers (default: 10)')
+    args = parser.parse_args()
+    
+    main(max_workers=args.workers)

+ 522 - 182
src/app/scripts/ceshi/03-施工方案筛选.py

@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 """
-评审意见PDF文件筛选脚本 - 二级筛选+断点续传版
+评审意见PDF文件筛选脚本 - 多进程并发版+测试模式
 
 功能说明:
     从raw/670目录下的数字编号子目录中筛选评审意见PDF文件。
@@ -21,6 +21,8 @@
     6. 专家评审输出到output/expert_review目录
     7. 公司/集团评审输出到output/company_review目录
     8. 支持断点续传,在temp目录缓存处理进度
+    9. 【新增】支持多进程并发筛选,提高效率
+    10.【新增】支持随机抽取测试模式,快速验证
 
 输入:
     - 源目录: raw/670/ (包含数字编号子目录,如1567、1569等)
@@ -35,6 +37,7 @@
 
 作者: Claude
 日期: 2026-04-21
+更新: 2026-05-08 - 增加多进程并发和测试模式
 """
 
 import pandas as pd
@@ -44,6 +47,8 @@ import os
 import shutil
 import re
 import warnings
+import multiprocessing as mp
+import random
 from pathlib import Path
 from datetime import datetime
 from typing import List, Dict, Tuple, Optional, Set
@@ -59,36 +64,211 @@ warnings.filterwarnings('ignore', category=Warning)
 # 1) 填绝对路径(如 E:/data/raw/670)则直接使用(Windows 建议用 / 或 \\)
 # 2) 填相对路径(如 ../../raw/670)则相对当前脚本目录解析
 SOURCE_DIR = r"E:\提供的原始文件\原始文件\全部的原始文档\未提取"
-EXPERT_OUTPUT_DIR = r"F:\提供的原始文件\原始文件\PDF分类结果_服务器MinerU版\专家评审意见_记录"
-COMPANY_OUTPUT_DIR = r"F:\提供的原始文件\原始文件\PDF分类结果_服务器MinerU版\公司集团评审意见说明"
+EXPERT_OUTPUT_DIR = r"E:\提供的原始文件\原始文件\PDF分类结果_服务器MinerU版\专家评审意见_记录"
+COMPANY_OUTPUT_DIR = r"E:\提供的原始文件\原始文件\PDF分类结果_服务器MinerU版\公司集团评审意见说明"
 TEMP_DIR = "temp"
 
 
+# ==================== 并发与测试配置 ====================
+# 多进程并发配置
+NUM_WORKERS = 4  # 并发进程数,建议设为CPU核心数(如CPU有8核则设为6-8)
+                   # 注意:每个工作进程内部还会为单个PDF创建子进程(超时控制)
+                   # 因此 NUM_WORKERS 不宜过大,避免进程过多导致系统资源耗尽
+
+# 测试模式配置
+TEST_MODE = False       # 是否启用测试模式:随机抽取少量目录快速测试
+TEST_SAMPLE_SIZE = 5    # 测试模式下随机抽取的目录数量
+                        # 测试完成后会输出结果并自动退出,不会进入二级筛选
+
+
 # 分批配置(仅用于统计显示,不创建子目录)
 BATCH_SIZE = 50  # 每批处理的目录数量(仅用于进度显示)
 
 # 关键词配置
 KEYWORDS = {
-    # 专家意见:必须命中“专家审查”相关表述(仅“评审/评估”不算专家审查)
+    # 专家意见:涵盖"审查"、"评审"、"论证"、"咨询"、"签字"等多种表述
     "expert": [
+        # === 专家审查系列(核心关键词)===
         "专家审查意见", "专家审查记录", "专家审查结论",
         "专家审查说明", "专家审查建议", "专家审查纪要", "专家审查报告",
         "专家审查审核表", "专家审查审查表",
+        
+        # === 专家评审系列(评审与审查并重)===
+        "专家评审意见", "专家评审记录", "专家评审结论",
+        "专家评审说明", "专家评审建议", "专家评审纪要", "专家评审报告",
+        "专家评审审核表", "专家评审表",
+        
+        # === 专家论证系列(常见于危大工程)===
+        "专家论证意见", "专家论证记录", "专家论证结论",
+        "专家论证说明", "专家论证建议", "专家论证纪要", "专家论证报告",
+        "专家论证审核表", "专家论证审查表",
+        
+        # === 专家组系列(多位专家集体意见)===
+        "专家组意见", "专家组审查意见", "专家组评审意见",
+        "专家组论证意见", "专家组建议", "专家组结论",
+        "专家组纪要", "专家组报告",
+        
+        # === 专家咨询系列(技术咨询类)===
+        "专家咨询意见", "专家咨询建议", "专家咨询记录",
+        "专家咨询结论", "专家咨询说明",
+        
+        # === 专家签字/签名系列(专家参与确认)===
+        "专家签字", "专家签名", "专家签章",
+        "专家签字表", "专家签名表", "专家签认",
+        "专家确认", "专家审核签字",
+        
+        # === 专家意见回复/修改系列(回复与整改)===
+        "专家意见回复", "专家意见修改回复", "专家意见整改回复",
+        "专家意见回复表", "专家意见修改表", "专家意见回复单",
+        
+        # === 专家意见通用表述(兜底关键词)===
+        "专家意见", "专家建议", "专家结论",
+        "专家名单", "专家签到表", "专家签到",
+        
+        # === 常见简写/变体 ===
+        "专家意见及回复", "专家意见及整改", "专家意见及修改",
+        "专家审查结论表", "专家评审结论表", "专家论证结论表",
+        "专家技术意见", "专家技术审查", "专家技术评审",
     ],
-    # 公司/集团:必须包含“公司”或“集团”主体表述
-    "company": ["公司评审意见", "集团评审意见", "公司审核意见", "集团审核意见", "公司审查意见", "集团审查意见"]  # 公司/集团关键词
+    
+    # 公司/集团:涵盖"公司"、"集团"、"企业"、"项目部"、"总包"、"监理"等多种主体
+    "company": [
+        # === 公司/集团系列(核心关键词)===
+        "公司评审意见", "集团评审意见", "公司审核意见", "集团审核意见", 
+        "公司审查意见", "集团审查意见",
+        "公司评审记录", "集团评审记录", "公司审核记录", "集团审核记录",
+        "公司审查记录", "集团审查记录",
+        "公司评审纪要", "集团评审纪要", "公司审核纪要", "集团审核纪要",
+        "公司审查纪要", "集团审查纪要",
+        "公司评审报告", "集团评审报告", "公司审核报告", "集团审核报告",
+        "公司审查报告", "集团审查报告",
+        
+        # === 企业系列(企业为主体)===
+        "企业评审意见", "企业审核意见", "企业审查意见",
+        "企业评审记录", "企业审核记录", "企业审查记录",
+        "企业评审纪要", "企业审核纪要", "企业审查纪要",
+        "企业技术负责人意见", "企业技术负责人审核",
+        
+        # === 项目部/项目系列(项目部为主体)===
+        "项目部评审意见", "项目部审核意见", "项目部审查意见",
+        "项目部评审记录", "项目部审核记录", "项目部审查记录",
+        "项目评审意见", "项目审核意见", "项目审查意见",
+        "项目经理意见", "项目经理审核",
+        
+        # === 总包/总承包系列(总包单位为主体)===
+        "总包评审意见", "总包审核意见", "总包审查意见",
+        "总承包评审意见", "总承包审核意见", "总承包审查意见",
+        "总包单位意见", "总承包单位意见",
+        
+        # === 内部评审系列(内部流程)===
+        "内部评审意见", "内部审核意见", "内部审查意见",
+        "内部评审记录", "内部审核记录", "内部审查记录",
+        "内部评审纪要", "内部审核纪要", "内部审查纪要",
+        "内部审批意见", "内部会签意见",
+        
+        # === 审批/会签系列(审批流程)===
+        "公司审批意见", "集团审批意见", "企业审批意见",
+        "公司会签意见", "集团会签意见", "企业会签意见",
+        "审批意见", "会签意见", "审批记录",
+        
+        # === 监理系列(监理单位意见)===
+        "监理评审意见", "监理审核意见", "监理审查意见",
+        "监理记录", "监理纪要", "监理报告",
+        "监理工程师意见", "总监理工程师意见",
+        
+        # === 常见简写/变体 ===
+        "公司意见", "集团意见", "企业意见",
+        "公司技术意见", "集团技术意见",
+        "施工方案评审意见", "施工方案审核意见", "施工方案审查意见",
+        "方案评审意见", "方案审核意见", "方案审查意见",
+        "评审会纪要", "审核会纪要", "审查会纪要",
+        "评审会议记录", "审核会议记录", "审查会议记录",
+    ]  
 }
 
 # 更宽松但可控的规则,用于提升OCR/版式噪声下的命中率
 KEYWORD_PATTERNS = {
     "expert": [
-        # 仅允许“审查”语义命中(避免把“专家评审/评估”误判为专家审查)
-        r"专家.{0,12}审查.{0,12}(意见|记录|结论|说明|建议|纪要|报告|审核表|审查表)",
-        r"审查.{0,10}专家.{0,12}(意见|记录|结论|说明|建议|纪要|报告|审核表|审查表)",
+        # === 专家审查系列 ===
+        r"专家.{0,12}审查.{0,12}(意见|记录|结论|说明|建议|纪要|报告|审核表|审查表|结论表)",
+        r"审查.{0,10}专家.{0,12}(意见|记录|结论|说明|建议|纪要|报告|审核表|审查表|结论表)",
+        
+        # === 专家评审系列 ===
+        r"专家.{0,12}评审.{0,12}(意见|记录|结论|说明|建议|纪要|报告|审核表|评审表|结论表)",
+        r"评审.{0,10}专家.{0,12}(意见|记录|结论|说明|建议|纪要|报告|审核表|评审表|结论表)",
+        
+        # === 专家论证系列(危大工程常见)===
+        r"专家.{0,12}论证.{0,12}(意见|记录|结论|说明|建议|纪要|报告|审核表|论证表|结论表)",
+        r"论证.{0,10}专家.{0,12}(意见|记录|结论|说明|建议|纪要|报告|审核表|论证表|结论表)",
+        
+        # === 专家组系列 ===
+        r"专家组.{0,10}(审查|评审|论证).{0,10}(意见|记录|结论|说明|建议|纪要|报告)",
+        r"专家组.{0,10}(意见|建议|结论)",
+        
+        # === 专家咨询系列 ===
+        r"专家.{0,10}咨询.{0,10}(意见|建议|记录|结论|说明)",
+        r"咨询.{0,8}专家.{0,10}(意见|建议|记录|结论|说明)",
+        
+        # === 专家签字/签名系列 ===
+        r"专家.{0,8}(签字|签名|签章|签认|确认)",
+        r"(签字|签名|签章).{0,8}专家",
+        
+        # === 专家意见回复/修改系列 ===
+        r"专家.{0,8}意见.{0,8}(回复|修改|整改).{0,8}(表|单|记录)",
+        r"(审查|评审|论证).{0,8}意见.{0,8}(回复|修改|整改).{0,8}(表|单|记录)",
+        r"(审查|评审|论证).{0,8}意见.{0,8}回复",
+        r"对.{0,10}专家.{0,10}意见.{0,10}回复",
+        
+        # === 专家通用表述(兜底)===
+        r"专家.{0,15}(意见|建议|结论)",
+        r"专家.{0,8}(名单|签到)",
+        r"专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+        
+        # === 常见简写变体 ===
+        r"专家.{0,8}(审查|评审|论证).{0,8}结论",
+        r"专家.{0,10}意见.{0,10}(及|和).{0,10}(回复|修改|整改)",
     ],
+    
     "company": [
-        r"(公司|集团).{0,10}(评审|审核|审查).{0,10}(意见|说明|记录)",
-        r"(公司|集团).{0,10}施工方案.{0,10}(审核|审查|评审).{0,10}(意见|说明|记录)",
+        # === 公司/集团系列 ===
+        r"(公司|集团).{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+        r"(公司|集团).{0,10}施工方案.{0,10}(审核|审查|评审).{0,10}(意见|说明|记录|纪要|报告)",
+        r"(公司|集团).{0,10}(审批|会签).{0,10}意见",
+        r"(公司|集团).{0,10}技术.{0,8}(负责人|主管).{0,8}意见",
+        
+        # === 企业系列 ===
+        r"企业.{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+        r"企业.{0,10}技术.{0,8}(负责人|主管).{0,8}(意见|审核)",
+        
+        # === 项目部系列 ===
+        r"项目(部)?.{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+        r"项目(部)?.{0,10}经理.{0,8}(意见|审核)",
+        r"项目(部)?.{0,10}技术.{0,8}(负责人|主管).{0,8}(意见|审核)",
+        
+        # === 总包/总承包系列 ===
+        r"(总包|总承包).{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+        r"(总包|总承包).{0,10}单位.{0,8}意见",
+        
+        # === 内部评审系列 ===
+        r"内部.{0,10}(评审|审核|审查|审批|会签).{0,10}(意见|说明|记录|纪要|报告)",
+        
+        # === 监理系列 ===
+        r"监理.{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+        r"监理.{0,8}工程师.{0,8}意见",
+        r"总监理.{0,8}工程师.{0,8}意见",
+        
+        # === 施工方案评审系列 ===
+        r"施工方案.{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+        r"方案.{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+        
+        # === 会议/纪要系列 ===
+        r"(评审|审核|审查).{0,8}会.{0,8}(纪要|记录)",
+        r"(评审|审核|审查).{0,8}会议.{0,8}(纪要|记录)",
+        r"(评审|审核|审查).{0,8}纪要",
+        
+        # === 通用兜底 ===
+        r"(公司|集团|企业).{0,8}意见",
+        r"(公司|集团|企业).{0,8}技术.{0,8}意见",
     ],
 }
 
@@ -141,14 +321,12 @@ def _extract_pdf_text_worker(pdf_path_str: str, max_pages: int, result_queue):
         result_queue.put({"ok": False, "error": str(e)})
 
 
-def extract_text_with_pages(pdf_path: Path, max_pages: int, timeout_seconds: int = 30) -> str:
+def extract_text_with_pages(pdf_path: Path, max_pages: int, timeout_seconds: int = 30, verbose: bool = True) -> str:
     """从PDF文件中提取文本内容(指定页数),带硬超时机制(子进程)"""
-    import multiprocessing as mp
-
     text = ""
     file_size_mb = pdf_path.stat().st_size / (1024 * 1024)
 
-    if file_size_mb > 50:
+    if verbose and file_size_mb > 50:
         print(f"\n      [大文件 {file_size_mb:.1f}MB,读取中...]", end="", flush=True)
 
     try:
@@ -165,24 +343,27 @@ def extract_text_with_pages(pdf_path: Path, max_pages: int, timeout_seconds: int
         if process.is_alive():
             process.terminate()
             process.join(timeout=2)
-            print(f" [超时跳过]", end="", flush=True)
+            if verbose:
+                print(f" [超时跳过]", end="", flush=True)
             return ""
 
         result = result_queue.get_nowait() if not result_queue.empty() else {"ok": False, "error": "子进程无返回结果"}
 
         if not result.get("ok"):
             error_msg = result.get("error", "")
-            if "PyCryptodome" in error_msg or "AES" in error_msg:
-                print(f" [加密PDF需PyCryptodome]", end="", flush=True)
-            elif "Password" in error_msg or "password" in error_msg:
-                print(f" [PDF加密需要密码]", end="", flush=True)
-            else:
-                print(f" [读取失败]", end="", flush=True)
+            if verbose:
+                if "PyCryptodome" in error_msg or "AES" in error_msg:
+                    print(f" [加密PDF需PyCryptodome]", end="", flush=True)
+                elif "Password" in error_msg or "password" in error_msg:
+                    print(f" [PDF加密需要密码]", end="", flush=True)
+                else:
+                    print(f" [读取失败]", end="", flush=True)
             return ""
 
         text = result.get("text", "")
     except Exception:
-        print(f" [读取错误]", end="", flush=True)
+        if verbose:
+            print(f" [读取错误]", end="", flush=True)
 
     return text
 
@@ -206,7 +387,7 @@ def check_pdf_contains_keywords_with_pages(pdf_path: Path, keywords: List[str],
 def extract_docx_text(docx_path: Path, max_pages: int) -> str:
     """提取DOCX文本(按段落近似页数限制)"""
     try:
-        # Word没有固定分页信息,这里用“每页约40段”进行近似截断,避免读取过慢。
+        # Word没有固定分页信息,这里用"每页约40段"进行近似截断,避免读取过慢。
         approx_max_paragraphs = max(1, max_pages * 40)
         doc = Document(str(docx_path))
         texts = []
@@ -220,15 +401,16 @@ def extract_docx_text(docx_path: Path, max_pages: int) -> str:
         return ""
 
 
-def check_file_contains_keywords_with_pages(file_path: Path, keywords: List[str], max_pages: int) -> Tuple[bool, str]:
+def check_file_contains_keywords_with_pages(file_path: Path, keywords: List[str], max_pages: int, verbose: bool = True) -> Tuple[bool, str]:
     """检查文件(PDF/DOCX)指定范围内是否包含任一关键词"""
     suffix = file_path.suffix.lower()
     if suffix == ".pdf":
-        text = extract_text_with_pages(file_path, max_pages=max_pages)
+        text = extract_text_with_pages(file_path, max_pages=max_pages, verbose=verbose)
     elif suffix == ".docx":
         text = extract_docx_text(file_path, max_pages=max_pages)
     elif suffix == ".doc":
-        print(" [DOC暂不支持,跳过]", end="", flush=True)
+        if verbose:
+            print(" [DOC暂不支持,跳过]", end="", flush=True)
         return False, ""
     else:
         return False, ""
@@ -285,7 +467,7 @@ def get_newest_file(files: List[Path]) -> Optional[Path]:
     return max(files, key=lambda f: get_file_creation_time(f))
 
 
-def process_single_directory_phase(dir_path: Path, phase: int, max_pages: int, review_type: str) -> Tuple[bool, str, Optional[Path], List[Path], str]:
+def process_single_directory_phase(dir_path: Path, phase: int, max_pages: int, review_type: str, verbose: bool = True) -> Tuple[bool, str, Optional[Path], List[Path], str]:
     """
     处理单个目录的文件筛选(指定阶段和评审类型)
 
@@ -294,6 +476,7 @@ def process_single_directory_phase(dir_path: Path, phase: int, max_pages: int, r
         phase: 阶段(1或2)
         max_pages: 检查的最大页数
         review_type: 评审类型 ('expert' 或 'company')
+        verbose: 是否打印详细日志
 
     Returns:
         (是否成功, 状态信息, 选中的文件路径, 所有包含关键词的文件列表, 匹配到的关键词)
@@ -314,20 +497,24 @@ def process_single_directory_phase(dir_path: Path, phase: int, max_pages: int, r
     matched_in_top5 = []
     matched_keyword_top5 = ""
     phase_str = f"【第{phase}阶段-{review_type}】"
-    print(f"\n  {phase_str} 目录: {dir_path.name} - 共{len(candidate_files)}个文件,检查前{max_pages}页,先检查Top5...")
+    if verbose:
+        print(f"\n  {phase_str} 目录: {dir_path.name} - 共{len(candidate_files)}个文件,检查前{max_pages}页,先检查Top5...")
 
     for file_path in top5_files:
         size_mb = get_file_size(file_path) / (1024 * 1024)
-        print(f"    检查Top5: {file_path.name[:30]}... (大小: {size_mb:.2f}MB)", end="", flush=True)
+        if verbose:
+            print(f"    检查Top5: {file_path.name[:30]}... (大小: {size_mb:.2f}MB)", end="", flush=True)
 
-        is_match, matched_kw = check_file_contains_keywords_with_pages(file_path, keywords, max_pages)
+        is_match, matched_kw = check_file_contains_keywords_with_pages(file_path, keywords, max_pages, verbose=verbose)
         if is_match:
-            print(f" -> ✓ 包含关键词[{matched_kw}]")
+            if verbose:
+                print(f" -> ✓ 包含关键词[{matched_kw}]")
             matched_in_top5.append(file_path)
             if not matched_keyword_top5:
                 matched_keyword_top5 = matched_kw
         else:
-            print(" -> ✗ 无关键词")
+            if verbose:
+                print(" -> ✗ 无关键词")
 
     # Top5中找到匹配
     if matched_in_top5:
@@ -336,7 +523,8 @@ def process_single_directory_phase(dir_path: Path, phase: int, max_pages: int, r
             return True, f"Top5中找到{len(matched_in_top5)}个匹配,选择最新", selected, matched_in_top5, matched_keyword_top5
         return True, "Top5中找到匹配文件", selected, matched_in_top5, matched_keyword_top5
 
-    print(f"    Top5未找到,扩展到其余{len(candidate_files) - len(top5_files)}个文件...")
+    if verbose:
+        print(f"    Top5未找到,扩展到其余{len(candidate_files) - len(top5_files)}个文件...")
 
     # 检查其余文件
     other_files = [f for f in candidate_files if f not in top5_files]
@@ -345,16 +533,19 @@ def process_single_directory_phase(dir_path: Path, phase: int, max_pages: int, r
 
     for file_path in other_files:
         size_mb = get_file_size(file_path) / (1024 * 1024)
-        print(f"    检查其他: {file_path.name[:30]}... (大小: {size_mb:.2f}MB)", end="", flush=True)
+        if verbose:
+            print(f"    检查其他: {file_path.name[:30]}... (大小: {size_mb:.2f}MB)", end="", flush=True)
 
-        is_match, matched_kw = check_file_contains_keywords_with_pages(file_path, keywords, max_pages)
+        is_match, matched_kw = check_file_contains_keywords_with_pages(file_path, keywords, max_pages, verbose=verbose)
         if is_match:
-            print(f" -> ✓ 包含关键词[{matched_kw}]")
+            if verbose:
+                print(f" -> ✓ 包含关键词[{matched_kw}]")
             matched_in_others.append(file_path)
             if not matched_keyword_others:
                 matched_keyword_others = matched_kw
         else:
-            print(" -> ✗ 无关键词")
+            if verbose:
+                print(" -> ✗ 无关键词")
 
     if matched_in_others:
         selected = get_newest_file(matched_in_others)
@@ -374,14 +565,14 @@ def copy_file_to_output(file_path: Path, output_dir: Path, new_name: str) -> Pat
 
 
 def process_review_type(dir_path: Path, phase: int, max_pages: int, review_type: str, 
-                        output_dir: Path, dir_id: str, stats: dict, results: list) -> Tuple[bool, Optional[Path]]:
+                        output_dir: Path, dir_id: str, verbose: bool = True) -> Tuple[bool, Optional[Path], Optional[Dict]]:
     """处理单个评审类型的筛选和输出
     
     Returns:
-        (是否成功, 选中的文件路径)
+        (是否成功, 选中的文件路径, 结果记录字典或None)
     """
     success, message, selected_file, all_matched, matched_kw = process_single_directory_phase(
-        dir_path, phase=phase, max_pages=max_pages, review_type=review_type
+        dir_path, phase=phase, max_pages=max_pages, review_type=review_type, verbose=verbose
     )
 
     if success and selected_file:
@@ -396,9 +587,10 @@ def process_review_type(dir_path: Path, phase: int, max_pages: int, review_type:
         new_filename = f"{dir_id}_{selected_file.name}"
         try:
             dest_path = copy_file_to_output(selected_file, output_dir, new_filename)
-            print(f"    ✅ [{review_type}] 已输出: {new_filename}")
+            if verbose:
+                print(f"    ✅ [{review_type}] 已输出: {new_filename}")
 
-            results.append({
+            result_record = {
                 '目录ID': dir_id,
                 '评审类型': review_type,
                 '阶段': f'第{phase}阶段',
@@ -413,11 +605,12 @@ def process_review_type(dir_path: Path, phase: int, max_pages: int, review_type:
                 '原路径': str(selected_file),
                 '目标路径': str(dest_path),
                 '处理时间': datetime.now().isoformat()
-            })
-            return True, selected_file
+            }
+            return True, selected_file, result_record
         except Exception as e:
-            print(f"    ❌ [{review_type}] 复制失败: {e}")
-            results.append({
+            if verbose:
+                print(f"    ❌ [{review_type}] 复制失败: {e}")
+            result_record = {
                 '目录ID': dir_id,
                 '评审类型': review_type,
                 '阶段': f'第{phase}阶段',
@@ -432,11 +625,12 @@ def process_review_type(dir_path: Path, phase: int, max_pages: int, review_type:
                 '原路径': str(selected_file),
                 '目标路径': '',
                 '处理时间': datetime.now().isoformat()
-            })
-            return False, None
+            }
+            return False, None, result_record
     else:
-        print(f"    ❌ [{review_type}] {message}")
-        return False, None
+        if verbose:
+            print(f"    ❌ [{review_type}] {message}")
+        return False, None, None
 
 
 def get_numeric_directories(base_dir: Path) -> List[Path]:
@@ -534,10 +728,175 @@ def resolve_config_path(path_value: str, script_dir: Path) -> Path:
     return (script_dir / path).resolve()
 
 
+# ==================== 多进程工作函数 ====================
+
+def process_directory_worker(args_tuple):
+    """
+    工作进程函数:处理单个目录的两种评审类型
+    
+    此函数在独立的工作进程中运行,同时处理专家评审和公司评审。
+    工作进程之间互不影响,各自独立复制文件到输出目录。
+    
+    Args:
+        args_tuple: (
+            dir_path_str,      # 目录路径字符串
+            phase,             # 阶段(1或2)
+            max_pages,         # 检查的最大页数
+            expert_output_dir_str,  # 专家评审输出目录
+            company_output_dir_str, # 公司评审输出目录
+            verbose            # 是否打印详细日志
+        )
+    
+    Returns:
+        dict: {
+            "dir_id": str,              # 目录ID
+            "expert_success": bool,     # 专家评审是否成功
+            "company_success": bool,    # 公司评审是否成功
+            "results": list,            # 结果记录列表(可能为空)
+        }
+    """
+    dir_path_str, phase, max_pages, expert_output_dir_str, company_output_dir_str, verbose = args_tuple
+    
+    dir_path = Path(dir_path_str)
+    expert_output_dir = Path(expert_output_dir_str)
+    company_output_dir = Path(company_output_dir_str)
+    dir_id = dir_path.name
+    
+    results = []
+    
+    # 处理专家评审
+    expert_success, expert_file, expert_result = process_review_type(
+        dir_path, phase=phase, max_pages=max_pages, 
+        review_type="expert", output_dir=expert_output_dir,
+        dir_id=dir_id, verbose=verbose
+    )
+    if expert_result:
+        results.append(expert_result)
+    
+    # 处理公司评审
+    company_success, company_file, company_result = process_review_type(
+        dir_path, phase=phase, max_pages=max_pages, 
+        review_type="company", output_dir=company_output_dir,
+        dir_id=dir_id, verbose=verbose
+    )
+    if company_result:
+        results.append(company_result)
+    
+    return {
+        "dir_id": dir_id,
+        "expert_success": expert_success,
+        "company_success": company_success,
+        "results": results,
+    }
+
+
+def run_phase_concurrently(dirs_to_process: List[Path], phase: int, max_pages: int,
+                           expert_output_dir: Path, company_output_dir: Path,
+                           temp_dir: Path, numeric_dirs: List[Path],
+                           stats: Dict, processed_dirs: Set, 
+                           phase1_no_match_expert: Set, phase1_no_match_company: Set,
+                           verbose: bool = False, cache_every: int = 10) -> Tuple[Dict, Set, Set]:
+    """
+    并发运行一个阶段的筛选
+    
+    Args:
+        dirs_to_process: 待处理的目录列表
+        phase: 阶段(1或2)
+        max_pages: 检查的最大页数
+        expert_output_dir: 专家评审输出目录
+        company_output_dir: 公司评审输出目录
+        temp_dir: 缓存目录
+        numeric_dirs: 所有数字目录(用于统计总数)
+        stats: 统计字典(会被修改)
+        processed_dirs: 已处理目录集合(会被修改)
+        phase1_no_match_expert: 一级未匹配专家评审的目录集合(会被修改)
+        phase1_no_match_company: 一级未匹配公司评审的目录集合(会被修改)
+        verbose: 工作进程是否打印详细日志
+        cache_every: 每处理多少个目录保存一次缓存
+    
+    Returns:
+        (stats, phase1_no_match_expert, phase1_no_match_company)
+    """
+    total = len(dirs_to_process)
+    completed = 0
+    all_results = []
+    
+    # 构建参数列表
+    args_list = []
+    for dir_path in dirs_to_process:
+        args_list.append((
+            str(dir_path),
+            phase,
+            max_pages,
+            str(expert_output_dir),
+            str(company_output_dir),
+            verbose
+        ))
+    
+    print(f"\n  启动 {NUM_WORKERS} 个并发进程处理 {total} 个目录...")
+    print(f"  工作模式: {'详细日志' if verbose else '静默模式(仅显示进度)'}\n")
+    
+    # 使用进程池并发处理
+    with mp.Pool(processes=NUM_WORKERS) as pool:
+        # imap_unordered 不保证顺序,但返回速度最快
+        for result in pool.imap_unordered(process_directory_worker, args_list):
+            dir_id = result["dir_id"]
+            
+            # 更新结果列表
+            all_results.extend(result["results"])
+            
+            # 更新统计
+            if result["expert_success"]:
+                stats["expert_success_count"] = stats.get("expert_success_count", 0) + 1
+                # 如果之前标记为未匹配,现在成功了,移除标记
+                phase1_no_match_expert.discard(dir_id)
+            else:
+                # 只有在一级筛选时才添加未匹配标记
+                if phase == 1:
+                    phase1_no_match_expert.add(dir_id)
+            
+            if result["company_success"]:
+                stats["company_success_count"] = stats.get("company_success_count", 0) + 1
+                phase1_no_match_company.discard(dir_id)
+            else:
+                if phase == 1:
+                    phase1_no_match_company.add(dir_id)
+            
+            # 标记为已处理
+            processed_dirs.add(dir_id)
+            completed += 1
+            
+            # 显示进度
+            progress = completed / total * 100
+            print(f"\r  进度: {completed}/{total} ({progress:.1f}%) | "
+                  f"专家成功: {stats.get('expert_success_count', 0)} | "
+                  f"公司成功: {stats.get('company_success_count', 0)} | "
+                  f"当前: {dir_id}", end="", flush=True)
+            
+            # 定期保存缓存
+            if completed % cache_every == 0 or completed == total:
+                cache_data = {
+                    "processed_dirs": list(processed_dirs),
+                    "phase1_no_match_expert": list(phase1_no_match_expert),
+                    "phase1_no_match_company": list(phase1_no_match_company),
+                    "results": all_results,
+                    "stats": stats,
+                    "phase": phase,
+                    "total_directories": len(numeric_dirs)
+                }
+                save_progress_cache(temp_dir, cache_data)
+    
+    print(f"\n\n  ✅ 阶段完成!处理 {completed} 个目录")
+    print(f"     专家评审成功: {stats.get('expert_success_count', 0)} 个")
+    print(f"     公司评审成功: {stats.get('company_success_count', 0)} 个")
+    
+    return stats, phase1_no_match_expert, phase1_no_match_company
+
+
 def main():
     """主函数"""
     print("=" * 70)
-    print("评审意见PDF筛选脚本 - 二级筛选+断点续传版")
+    print("评审意见PDF筛选脚本 - 多进程并发版+测试模式")
     print("=" * 70)
 
     # 按文件首部配置组装路径(不再按项目根目录拼接)
@@ -552,6 +911,8 @@ def main():
     print(f"  专家评审输出目录: {expert_output_dir}")
     print(f"  公司评审输出目录: {company_output_dir}")
     print(f"  缓存目录: {temp_dir}")
+    print(f"  并发进程数: {NUM_WORKERS}")
+    print(f"  测试模式: {'是(抽取5个目录)' if TEST_MODE else '否'}")
     print(f"  专家评审关键词: {KEYWORDS['expert']}")
     print(f"  公司评审关键词: {KEYWORDS['company']}")
     print(f"  一级筛选: 前{PHASE_1_PAGES}页")
@@ -575,23 +936,50 @@ def main():
         sys.exit(1)
 
     print(f"  找到 {len(numeric_dirs)} 个数字编号子目录")
-
-    # 加载缓存(断点续传)
-    print(f"\n【步骤 2/6】加载进度缓存...")
-    cache = load_progress_cache(temp_dir)
+    
+    # ==================== 测试模式:随机抽取 ====================
+    if TEST_MODE:
+        print(f"\n【测试模式】随机抽取 {TEST_SAMPLE_SIZE} 个目录进行测试...")
+        if len(numeric_dirs) <= TEST_SAMPLE_SIZE:
+            test_dirs = numeric_dirs
+            print(f"  目录总数不足 {TEST_SAMPLE_SIZE},测试全部 {len(numeric_dirs)} 个目录")
+        else:
+            # 使用固定随机种子,确保可复现
+            random.seed(42)
+            test_dirs = random.sample(numeric_dirs, TEST_SAMPLE_SIZE)
+            test_dirs.sort(key=lambda d: int(d.name))  # 按数字排序,方便查看
+        
+        numeric_dirs = test_dirs
+        print(f"  测试目录: {[d.name for d in test_dirs]}")
+        # 测试模式不加载缓存,不进入二级筛选
+        cache = {
+            "processed_dirs": [],
+            "phase1_no_match_expert": [],
+            "phase1_no_match_company": [],
+            "results": [],
+            "stats": {},
+            "phase": 1,
+            "last_update": None
+        }
+    else:
+        # 加载缓存(断点续传)
+        print(f"\n【步骤 2/6】加载进度缓存...")
+        cache = load_progress_cache(temp_dir)
+    
     processed_dirs = set(cache.get("processed_dirs", []))
     phase1_no_match_expert = set(cache.get("phase1_no_match_expert", []))
     phase1_no_match_company = set(cache.get("phase1_no_match_company", []))
     current_phase = cache.get("phase", 1)
+    all_results = cache.get("results", [])
 
-    if processed_dirs:
+    if processed_dirs and not TEST_MODE:
         print(f"  发现缓存:")
         print(f"    - 已处理: {len(processed_dirs)} 个目录")
         print(f"    - 专家评审一级未找到: {len(phase1_no_match_expert)} 个目录")
         print(f"    - 公司评审一级未找到: {len(phase1_no_match_company)} 个目录")
         print(f"    - 当前阶段: 第{current_phase}阶段")
     else:
-        print(f"  无缓存,将从头开始处理")
+        print(f"  {'无缓存(测试模式),将从头开始处理' if TEST_MODE else '无缓存,将从头开始处理'}")
 
     # ==================== 一级筛选 ====================
     if current_phase == 1:
@@ -610,8 +998,6 @@ def main():
             print(f"  公司评审输出到: {company_output_dir}")
             print()
 
-            results = cache.get("results", [])
-
             default_stats = {
                 "total_directories": len(numeric_dirs),
                 "expert_success_count": 0,
@@ -630,55 +1016,23 @@ def main():
                 if key not in stats:
                     stats[key] = value
 
-            expert_success_count = 0
-            company_success_count = 0
-            total_to_process = len(dirs_to_process)
-
-            for idx, dir_path in enumerate(dirs_to_process):
-                dir_id = dir_path.name
-                overall_idx = len(processed_dirs) + idx + 1
-
-                print(f"\n[{overall_idx}/{len(numeric_dirs)}] 当前目录: {dir_id}")
-                print_progress_bar(idx + 1, total_to_process)
-
-                # 处理专家评审
-                expert_success, expert_file = process_review_type(
-                    dir_path, phase=1, max_pages=PHASE_1_PAGES, 
-                    review_type="expert", output_dir=expert_output_dir,
-                    dir_id=dir_id, stats=stats, results=results
-                )
-                if expert_success:
-                    stats["expert_success_count"] += 1
-                    expert_success_count += 1
-                else:
-                    phase1_no_match_expert.add(dir_id)
-
-                # 处理公司评审
-                company_success, company_file = process_review_type(
-                    dir_path, phase=1, max_pages=PHASE_1_PAGES, 
-                    review_type="company", output_dir=company_output_dir,
-                    dir_id=dir_id, stats=stats, results=results
-                )
-                if company_success:
-                    stats["company_success_count"] += 1
-                    company_success_count += 1
-                else:
-                    phase1_no_match_company.add(dir_id)
-
-                processed_dirs.add(dir_id)
-
-                if (idx + 1) % 10 == 0 or idx == len(dirs_to_process) - 1:
-                    cache_data = {
-                        "processed_dirs": list(processed_dirs),
-                        "phase1_no_match_expert": list(phase1_no_match_expert),
-                        "phase1_no_match_company": list(phase1_no_match_company),
-                        "results": results,
-                        "stats": stats,
-                        "phase": 1,
-                        "total_directories": len(numeric_dirs)
-                    }
-                    save_progress_cache(temp_dir, cache_data)
-                    print(f"\n    💾 进度已缓存 (已处理 {len(processed_dirs)}/{len(numeric_dirs)} 个目录)")
+            # 并发处理一级筛选
+            # 在测试模式下使用详细日志(verbose=True),正式运行使用静默模式(verbose=False)
+            verbose_mode = TEST_MODE  # 测试模式打印详细日志,正式模式静默
+            stats, phase1_no_match_expert, phase1_no_match_company = run_phase_concurrently(
+                dirs_to_process, phase=1, max_pages=PHASE_1_PAGES,
+                expert_output_dir=expert_output_dir, company_output_dir=company_output_dir,
+                temp_dir=temp_dir, numeric_dirs=numeric_dirs,
+                stats=stats, processed_dirs=processed_dirs,
+                phase1_no_match_expert=phase1_no_match_expert,
+                phase1_no_match_company=phase1_no_match_company,
+                verbose=verbose_mode,
+                cache_every=10
+            )
+            
+            # 更新结果列表
+            cache = load_progress_cache(temp_dir)
+            all_results = cache.get("results", [])
 
             print(f"\n\n【一级筛选完成】")
             print(f"  专家评审成功: {stats['expert_success_count']} 个")
@@ -686,6 +1040,23 @@ def main():
             print(f"  专家评审未找到: {len(phase1_no_match_expert)} 个")
             print(f"  公司评审未找到: {len(phase1_no_match_company)} 个")
 
+            # 测试模式下直接退出,不进行二级筛选和保存
+            if TEST_MODE:
+                print(f"\n{'='*70}")
+                print("【测试模式完成】")
+                print(f"  共测试 {len(dirs_to_process)} 个目录")
+                print(f"  专家评审成功: {stats['expert_success_count']} 个")
+                print(f"  公司评审成功: {stats['company_success_count']} 个")
+                print(f"  测试结果已保存到缓存,可查看输出目录确认文件")
+                print("="*70)
+                
+                # 测试模式也保存最终结果
+                _save_final_results(
+                    temp_dir, expert_output_dir, company_output_dir,
+                    numeric_dirs, all_results, stats, processed_dirs
+                )
+                return
+
             # 询问是否进行二级筛选
             total_no_match = len(phase1_no_match_expert.union(phase1_no_match_company))
             print(f"\n{'='*70}")
@@ -720,7 +1091,7 @@ def main():
                     "processed_dirs": list(processed_dirs),
                     "phase1_no_match_expert": list(phase1_no_match_expert),
                     "phase1_no_match_company": list(phase1_no_match_company),
-                    "results": results,
+                    "results": all_results,
                     "stats": stats,
                     "phase": 2,
                     "total_directories": len(numeric_dirs)
@@ -735,7 +1106,7 @@ def main():
 
         # 重新加载以获取最新状态
         cache = load_progress_cache(temp_dir)
-        results = cache.get("results", [])
+        all_results = cache.get("results", [])
         stats = cache.get("stats", {})
         phase1_no_match_expert = set(cache.get("phase1_no_match_expert", []))
         phase1_no_match_company = set(cache.get("phase1_no_match_company", []))
@@ -743,7 +1114,9 @@ def main():
         # 获取需要二级筛选的目录(专家评审或公司评审任一未找到)
         phase2_dirs_expert = [d for d in numeric_dirs if d.name in phase1_no_match_expert]
         phase2_dirs_company = [d for d in numeric_dirs if d.name in phase1_no_match_company]
-        all_phase2_dirs = set(phase2_dirs_expert + phase2_dirs_company)
+        all_phase2_dirs = list(set(phase2_dirs_expert + phase2_dirs_company))
+        # 按数字排序
+        all_phase2_dirs.sort(key=lambda d: int(d.name))
 
         if not all_phase2_dirs:
             print(f"\n  没有需要二级筛选的目录")
@@ -751,84 +1124,49 @@ def main():
             print(f"\n【步骤 4/6】二级筛选处理...")
             print(f"  专家评审需二级筛选: {len(phase2_dirs_expert)} 个目录")
             print(f"  公司评审需二级筛选: {len(phase2_dirs_company)} 个目录")
-
-            expert_phase2_success = 0
-            company_phase2_success = 0
-
-            # 处理专家评审二级筛选
-            if phase2_dirs_expert:
-                print(f"\n  --- 专家评审二级筛选 ---")
-                for idx, dir_path in enumerate(phase2_dirs_expert):
-                    dir_id = dir_path.name
-                    print(f"\n[{idx+1}/{len(phase2_dirs_expert)}] 专家评审二级筛选: {dir_id}")
-
-                    success, selected_file = process_review_type(
-                        dir_path, phase=2, max_pages=PHASE_2_PAGES, 
-                        review_type="expert", output_dir=expert_output_dir,
-                        dir_id=dir_id, stats=stats, results=results
-                    )
-                    if success:
-                        stats["expert_phase2_success_count"] = stats.get("expert_phase2_success_count", 0) + 1
-                        expert_phase2_success += 1
-                    phase1_no_match_expert.discard(dir_id)
-
-                    if (idx + 1) % 10 == 0 or idx == len(phase2_dirs_expert) - 1:
-                        cache_data = {
-                            "processed_dirs": list(processed_dirs),
-                            "phase1_no_match_expert": list(phase1_no_match_expert),
-                            "phase1_no_match_company": list(phase1_no_match_company),
-                            "results": results,
-                            "stats": stats,
-                            "phase": 2,
-                            "total_directories": len(numeric_dirs)
-                        }
-                        save_progress_cache(temp_dir, cache_data)
-
-            # 处理公司评审二级筛选
-            if phase2_dirs_company:
-                print(f"\n  --- 公司评审二级筛选 ---")
-                for idx, dir_path in enumerate(phase2_dirs_company):
-                    dir_id = dir_path.name
-                    print(f"\n[{idx+1}/{len(phase2_dirs_company)}] 公司评审二级筛选: {dir_id}")
-
-                    success, selected_file = process_review_type(
-                        dir_path, phase=2, max_pages=PHASE_2_PAGES, 
-                        review_type="company", output_dir=company_output_dir,
-                        dir_id=dir_id, stats=stats, results=results
-                    )
-                    if success:
-                        stats["company_phase2_success_count"] = stats.get("company_phase2_success_count", 0) + 1
-                        company_phase2_success += 1
-                    phase1_no_match_company.discard(dir_id)
-
-                    if (idx + 1) % 10 == 0 or idx == len(phase2_dirs_company) - 1:
-                        cache_data = {
-                            "processed_dirs": list(processed_dirs),
-                            "phase1_no_match_expert": list(phase1_no_match_expert),
-                            "phase1_no_match_company": list(phase1_no_match_company),
-                            "results": results,
-                            "stats": stats,
-                            "phase": 2,
-                            "total_directories": len(numeric_dirs)
-                        }
-                        save_progress_cache(temp_dir, cache_data)
+            print(f"  总计需二级筛选: {len(all_phase2_dirs)} 个目录")
+            
+            # 二级筛选也使用并发处理
+            # 注意:二级筛选时,之前已成功的目录不需要再处理
+            # 但由于 process_directory_worker 会同时处理两种类型,
+            # 已成功的类型会再次被处理(但结果相同,不会重复复制因为文件名相同会覆盖)
+            # 为了效率,我们只处理有未匹配的目录
+            
+            stats, phase1_no_match_expert, phase1_no_match_company = run_phase_concurrently(
+                all_phase2_dirs, phase=2, max_pages=PHASE_2_PAGES,
+                expert_output_dir=expert_output_dir, company_output_dir=company_output_dir,
+                temp_dir=temp_dir, numeric_dirs=numeric_dirs,
+                stats=stats, processed_dirs=processed_dirs,
+                phase1_no_match_expert=phase1_no_match_expert,
+                phase1_no_match_company=phase1_no_match_company,
+                verbose=False,  # 二级筛选使用静默模式
+                cache_every=10
+            )
+            
+            # 更新结果
+            cache = load_progress_cache(temp_dir)
+            all_results = cache.get("results", [])
 
             print(f"\n\n【二级筛选完成】")
-            print(f"  专家评审二级筛选成功: {expert_phase2_success} 个")
-            print(f"  公司评审二级筛选成功: {company_phase2_success} 个")
+            print(f"  专家评审二级筛选成功: {stats.get('expert_phase2_success_count', 0)} 个")
+            print(f"  公司评审二级筛选成功: {stats.get('company_phase2_success_count', 0)} 个")
 
         current_phase = 3
 
     # ==================== 保存最终结果 ====================
+    _save_final_results(
+        temp_dir, expert_output_dir, company_output_dir,
+        numeric_dirs, all_results, stats, processed_dirs
+    )
+
+
+def _save_final_results(temp_dir: Path, expert_output_dir: Path, company_output_dir: Path,
+                        numeric_dirs: List[Path], results: List[Dict], stats: Dict, processed_dirs: Set):
+    """保存最终结果(Excel、JSON统计、缺失目录ID等)"""
     print(f"\n\n【步骤 5/6】保存最终结果...")
     expert_output_dir.mkdir(parents=True, exist_ok=True)
     company_output_dir.mkdir(parents=True, exist_ok=True)
 
-    # 重新加载最新结果
-    cache = load_progress_cache(temp_dir)
-    results = cache.get("results", [])
-    stats = cache.get("stats", {})
-
     # 确保所有键存在
     default_keys = [
         "total_directories", "expert_success_count", "company_success_count",
@@ -854,7 +1192,7 @@ def main():
     company_phase1 = stats.get("company_success_count", 0)
     company_phase2 = stats.get("company_phase2_success_count", 0)
 
-    # 基于最终结果反推“未采集到”的目录ID,避免阶段缓存集合被覆盖导致不准确
+    # 基于最终结果反推"未采集到"的目录ID,避免阶段缓存集合被覆盖导致不准确
     all_dir_ids = {d.name for d in numeric_dirs}
     expert_success_ids = {
         str(r.get("目录ID"))
@@ -877,7 +1215,7 @@ def main():
         f.write("\n".join(company_missing_ids))
 
     final_stats = {
-        "start_time": cache.get("start_time", datetime.now().isoformat()),
+        "start_time": stats.get("start_time", datetime.now().isoformat()),
         "end_time": datetime.now().isoformat(),
         "total_directories": len(numeric_dirs),
         "processed_count": len(processed_dirs),
@@ -944,6 +1282,8 @@ def main():
 
 
 if __name__ == '__main__':
+    # Windows 下多进程必须使用 freeze_support
+    mp.freeze_support()
     try:
         main()
     except KeyboardInterrupt:

+ 3112 - 0
src/app/scripts/ceshi/temp/评审筛选进度缓存_已完成.json

@@ -0,0 +1,3112 @@
+{
+  "processed_dirs": [
+    "1308",
+    "41",
+    "645",
+    "2217",
+    "2316",
+    "304",
+    "542",
+    "1725",
+    "434",
+    "1504",
+    "191",
+    "1520",
+    "1923",
+    "2091",
+    "3533",
+    "4708",
+    "1113",
+    "2023",
+    "86",
+    "1986",
+    "1633",
+    "879",
+    "230",
+    "864",
+    "1218",
+    "1597",
+    "1892",
+    "2581",
+    "2194",
+    "1448",
+    "1238",
+    "2653",
+    "18",
+    "3372",
+    "1927",
+    "1872",
+    "2233",
+    "2857",
+    "3143",
+    "1874",
+    "511",
+    "947",
+    "2483",
+    "3215",
+    "819",
+    "3861",
+    "2278",
+    "1386",
+    "2102",
+    "3667",
+    "3968",
+    "1882",
+    "2094",
+    "1035",
+    "2066",
+    "3333",
+    "1791",
+    "1202",
+    "218",
+    "1855",
+    "2056",
+    "2847",
+    "1810",
+    "2534",
+    "2724",
+    "4867",
+    "186",
+    "1672",
+    "2355",
+    "1350",
+    "1493",
+    "245",
+    "382",
+    "185",
+    "3119",
+    "1652",
+    "3294",
+    "970",
+    "933",
+    "2140",
+    "1443",
+    "3113",
+    "1511",
+    "5729",
+    "808",
+    "934",
+    "1513",
+    "471",
+    "5057",
+    "635",
+    "437",
+    "893",
+    "1730",
+    "804",
+    "1159",
+    "2148",
+    "1181",
+    "973",
+    "2609",
+    "2565",
+    "2718",
+    "3303",
+    "1427",
+    "1970",
+    "2049",
+    "94",
+    "1768",
+    "2321",
+    "911",
+    "965",
+    "164",
+    "2789",
+    "508",
+    "4019",
+    "1572",
+    "2163",
+    "835",
+    "855",
+    "254",
+    "113",
+    "1491",
+    "278",
+    "2050",
+    "2046",
+    "1755",
+    "1753",
+    "1919",
+    "4415",
+    "1548",
+    "2219",
+    "2634",
+    "357",
+    "3217",
+    "1563",
+    "615",
+    "476",
+    "1546",
+    "1582",
+    "1668",
+    "1964",
+    "1570",
+    "2083",
+    "2866",
+    "3289",
+    "2386",
+    "2076",
+    "3489",
+    "2973",
+    "2123",
+    "2639",
+    "3357",
+    "1253",
+    "2285",
+    "2132",
+    "2729",
+    "2460",
+    "3591",
+    "1070",
+    "654",
+    "1661",
+    "1922",
+    "473",
+    "3525",
+    "3026",
+    "1119",
+    "400",
+    "2222",
+    "1875",
+    "1027",
+    "2319",
+    "2092",
+    "1506",
+    "909",
+    "1916",
+    "5142",
+    "1184",
+    "3332",
+    "1910",
+    "1372",
+    "2229",
+    "1932",
+    "3695",
+    "743",
+    "2134",
+    "5728",
+    "1908",
+    "717",
+    "3398",
+    "269",
+    "123",
+    "2596",
+    "1011",
+    "1351",
+    "386",
+    "2291",
+    "2417",
+    "3417",
+    "1373",
+    "2055",
+    "1272",
+    "713",
+    "2277",
+    "2497",
+    "2980",
+    "1130",
+    "3305",
+    "1710",
+    "3463",
+    "1579",
+    "544",
+    "111",
+    "285",
+    "246",
+    "3101",
+    "872",
+    "2239",
+    "2373",
+    "2854",
+    "976",
+    "684",
+    "1999",
+    "2168",
+    "2697",
+    "853",
+    "641",
+    "1689",
+    "2560",
+    "908",
+    "2127",
+    "3236",
+    "1153",
+    "1258",
+    "1085",
+    "4568",
+    "2087",
+    "989",
+    "2044",
+    "3162",
+    "2764",
+    "1264",
+    "4409",
+    "3155",
+    "1567",
+    "4582",
+    "2418",
+    "2016",
+    "3093",
+    "2283",
+    "1691",
+    "1848",
+    "1965",
+    "996",
+    "1581",
+    "1842",
+    "3211",
+    "823",
+    "2183",
+    "957",
+    "1602",
+    "232",
+    "396",
+    "1369",
+    "1353",
+    "1291",
+    "1519",
+    "867",
+    "1779",
+    "3809",
+    "3679",
+    "3243",
+    "1120",
+    "2977",
+    "1726",
+    "198",
+    "2737",
+    "2115",
+    "994",
+    "3423",
+    "2051",
+    "3557",
+    "1556",
+    "267",
+    "156",
+    "1178",
+    "2619",
+    "3126",
+    "4071",
+    "861",
+    "1819",
+    "2462",
+    "2669",
+    "1025",
+    "5128",
+    "3660",
+    "2187",
+    "140",
+    "1368",
+    "512",
+    "5419",
+    "1127",
+    "605",
+    "1889",
+    "736",
+    "1482",
+    "2248",
+    "566",
+    "1269",
+    "2208",
+    "3174",
+    "3747",
+    "539",
+    "977",
+    "1453",
+    "290",
+    "497",
+    "1501",
+    "1058",
+    "2251",
+    "3204",
+    "5629",
+    "3545",
+    "1860",
+    "482",
+    "984",
+    "1414",
+    "1676",
+    "2230",
+    "504",
+    "3096",
+    "599",
+    "1784",
+    "1341",
+    "958",
+    "1797",
+    "2529",
+    "5423",
+    "1837",
+    "1067",
+    "2170",
+    "2047",
+    "376",
+    "1884",
+    "2772",
+    "3350",
+    "2551",
+    "408",
+    "1920",
+    "902",
+    "5376",
+    "3182",
+    "1596",
+    "1429",
+    "1728",
+    "1888",
+    "4900",
+    "705",
+    "5643",
+    "3137",
+    "2192",
+    "3515",
+    "810",
+    "541",
+    "2842",
+    "1337",
+    "1949",
+    "1685",
+    "1760",
+    "516",
+    "767",
+    "2342",
+    "1832",
+    "1891",
+    "2206",
+    "1162",
+    "1216",
+    "5690",
+    "1176",
+    "257",
+    "609",
+    "2125",
+    "2326",
+    "354",
+    "2165",
+    "346",
+    "863",
+    "1179",
+    "1595",
+    "1309",
+    "828",
+    "1045",
+    "2211",
+    "337",
+    "1624",
+    "4834",
+    "1828",
+    "1154",
+    "1354",
+    "671",
+    "922",
+    "1928",
+    "1188",
+    "120",
+    "1894",
+    "1953",
+    "2971",
+    "4864",
+    "3362",
+    "1466",
+    "1110",
+    "2261",
+    "1609",
+    "1877",
+    "978",
+    "833",
+    "895",
+    "955",
+    "2252",
+    "785",
+    "2305",
+    "1311",
+    "1646",
+    "1223",
+    "1068",
+    "3003",
+    "251",
+    "2011",
+    "580",
+    "280",
+    "1088",
+    "4082",
+    "334",
+    "692",
+    "4731",
+    "1961",
+    "2527",
+    "417",
+    "1534",
+    "1973",
+    "844",
+    "133",
+    "640",
+    "501",
+    "1903",
+    "2263",
+    "456",
+    "2817",
+    "1549",
+    "1126",
+    "3142",
+    "181",
+    "1823",
+    "366",
+    "1914",
+    "2760",
+    "1687",
+    "551",
+    "2297",
+    "4292",
+    "2228",
+    "67",
+    "207",
+    "988",
+    "1979",
+    "575",
+    "878",
+    "1569",
+    "2372",
+    "1794",
+    "1626",
+    "4283",
+    "783",
+    "1255",
+    "5131",
+    "2801",
+    "768",
+    "3340",
+    "3229",
+    "2071",
+    "1303",
+    "5408",
+    "1680",
+    "325",
+    "1662",
+    "950",
+    "2849",
+    "1762",
+    "3430",
+    "787",
+    "2743",
+    "2714",
+    "115",
+    "699",
+    "1880",
+    "3253",
+    "505",
+    "2151",
+    "2178",
+    "51",
+    "3098",
+    "604",
+    "135",
+    "1675",
+    "2504",
+    "147",
+    "1508",
+    "1780",
+    "1583",
+    "3696",
+    "1765",
+    "3488",
+    "939",
+    "1371",
+    "3728",
+    "733",
+    "2186",
+    "807",
+    "624",
+    "4355",
+    "328",
+    "4162",
+    "2172",
+    "4209",
+    "1312",
+    "2448",
+    "1698",
+    "3033",
+    "1062",
+    "2275",
+    "2949",
+    "1406",
+    "1518",
+    "523",
+    "1477",
+    "336",
+    "3052",
+    "2312",
+    "2113",
+    "422",
+    "343",
+    "2550",
+    "2800",
+    "2394",
+    "1525",
+    "926",
+    "2010",
+    "727",
+    "2160",
+    "259",
+    "870",
+    "1001",
+    "1087",
+    "203",
+    "737",
+    "1183",
+    "840",
+    "298",
+    "494",
+    "1846",
+    "1329",
+    "3718",
+    "3775",
+    "2738",
+    "1820",
+    "1320",
+    "1349",
+    "2176",
+    "2562",
+    "1134",
+    "562",
+    "1206",
+    "262",
+    "729",
+    "2875",
+    "3683",
+    "781",
+    "1840",
+    "3697",
+    "214",
+    "3045",
+    "1994",
+    "1621",
+    "1995",
+    "5095",
+    "3194",
+    "3279",
+    "194",
+    "2898",
+    "3344",
+    "2595",
+    "1924",
+    "2762",
+    "141",
+    "587",
+    "403",
+    "2666",
+    "709",
+    "905",
+    "4885",
+    "1237",
+    "2344",
+    "4686",
+    "1331",
+    "1426",
+    "758",
+    "1158",
+    "4280",
+    "1925",
+    "1586",
+    "1918",
+    "2166",
+    "3538",
+    "1742",
+    "3326",
+    "2215",
+    "3046",
+    "1066",
+    "2019",
+    "3584",
+    "3114",
+    "159",
+    "1362",
+    "201",
+    "1138",
+    "549",
+    "407",
+    "2450",
+    "3733",
+    "383",
+    "1271",
+    "2189",
+    "820",
+    "3507",
+    "1876",
+    "2108",
+    "1751",
+    "1464",
+    "921",
+    "1822",
+    "3037",
+    "814",
+    "1056",
+    "1003",
+    "1861"
+  ],
+  "phase1_no_match_expert": [
+    "1308",
+    "41",
+    "645",
+    "2217",
+    "2316",
+    "304",
+    "542",
+    "1725",
+    "434",
+    "1504",
+    "1923",
+    "3533",
+    "2023",
+    "1986",
+    "1633",
+    "879",
+    "230",
+    "864",
+    "1597",
+    "1892",
+    "2581",
+    "2194",
+    "1448",
+    "1238",
+    "2653",
+    "18",
+    "3372",
+    "1927",
+    "1872",
+    "2233",
+    "2857",
+    "3143",
+    "1874",
+    "511",
+    "947",
+    "3215",
+    "819",
+    "2278",
+    "1386",
+    "2102",
+    "3667",
+    "3968",
+    "1882",
+    "1035",
+    "2066",
+    "1791",
+    "1202",
+    "2056",
+    "2847",
+    "1810",
+    "2534",
+    "2724",
+    "4867",
+    "186",
+    "1672",
+    "2355",
+    "1350",
+    "1493",
+    "245",
+    "382",
+    "185",
+    "3119",
+    "3294",
+    "970",
+    "933",
+    "2140",
+    "1443",
+    "3113",
+    "1511",
+    "5729",
+    "808",
+    "934",
+    "1513",
+    "5057",
+    "635",
+    "437",
+    "893",
+    "804",
+    "1159",
+    "2148",
+    "1181",
+    "973",
+    "2565",
+    "2718",
+    "3303",
+    "1427",
+    "1970",
+    "2049",
+    "94",
+    "1768",
+    "2321",
+    "911",
+    "965",
+    "164",
+    "2789",
+    "4019",
+    "1572",
+    "2163",
+    "835",
+    "855",
+    "254",
+    "113",
+    "1491",
+    "278",
+    "2050",
+    "2046",
+    "1755",
+    "1753",
+    "1919",
+    "4415",
+    "1548",
+    "2219",
+    "2634",
+    "357",
+    "3217",
+    "1563",
+    "615",
+    "476",
+    "1546",
+    "1582",
+    "1668",
+    "1964",
+    "1570",
+    "2866",
+    "3289",
+    "2386",
+    "2076",
+    "3489",
+    "2973",
+    "2639",
+    "3357",
+    "1253",
+    "2285",
+    "2132",
+    "2729",
+    "3591",
+    "1070",
+    "654",
+    "1922",
+    "1661",
+    "473",
+    "3026",
+    "1119",
+    "400",
+    "1875",
+    "1027",
+    "2319",
+    "2092",
+    "1506",
+    "909",
+    "1916",
+    "5142",
+    "1184",
+    "1910",
+    "1372",
+    "1932",
+    "3695",
+    "743",
+    "5728",
+    "1908",
+    "717",
+    "3398",
+    "1011",
+    "1351",
+    "386",
+    "2291",
+    "2417",
+    "3417",
+    "1373",
+    "2055",
+    "1272",
+    "713",
+    "2277",
+    "2497",
+    "2980",
+    "1130",
+    "3305",
+    "3463",
+    "1579",
+    "544",
+    "111",
+    "285",
+    "246",
+    "872",
+    "2239",
+    "2854",
+    "976",
+    "684",
+    "1999",
+    "2168",
+    "2697",
+    "641",
+    "1689",
+    "2560",
+    "908",
+    "2127",
+    "3236",
+    "1153",
+    "1258",
+    "1085",
+    "3162",
+    "989",
+    "2044",
+    "2764",
+    "1264",
+    "4409",
+    "3155",
+    "1567",
+    "4582",
+    "2418",
+    "2016",
+    "3093",
+    "2283",
+    "1691",
+    "1848",
+    "1965",
+    "996",
+    "1581",
+    "3211",
+    "823",
+    "957",
+    "1602",
+    "232",
+    "396",
+    "1369",
+    "1353",
+    "1291",
+    "1519",
+    "867",
+    "1779",
+    "3809",
+    "3679",
+    "3243",
+    "1120",
+    "2977",
+    "1726",
+    "198",
+    "2737",
+    "2115",
+    "994",
+    "3423",
+    "2051",
+    "3557",
+    "1556",
+    "267",
+    "156",
+    "1178",
+    "2619",
+    "3126",
+    "4071",
+    "861",
+    "1819",
+    "2462",
+    "2669",
+    "1025",
+    "5128",
+    "3660",
+    "2187",
+    "140",
+    "1368",
+    "512",
+    "5419",
+    "1127",
+    "605",
+    "1889",
+    "736",
+    "1482",
+    "2248",
+    "566",
+    "1269",
+    "2208",
+    "3174",
+    "3747",
+    "539",
+    "977",
+    "1453",
+    "290",
+    "497",
+    "1501",
+    "1058",
+    "2251",
+    "3204",
+    "5629",
+    "3545",
+    "1860",
+    "482",
+    "984",
+    "1676",
+    "504",
+    "3096",
+    "599",
+    "1784",
+    "1341",
+    "958",
+    "1797",
+    "2529",
+    "5423",
+    "1837",
+    "1067",
+    "2170",
+    "2047",
+    "376",
+    "1884",
+    "2772",
+    "3350",
+    "2551",
+    "408",
+    "1920",
+    "902",
+    "5376",
+    "3182",
+    "1596",
+    "1429",
+    "1728",
+    "1888",
+    "4900",
+    "705",
+    "5643",
+    "3137",
+    "3515",
+    "810",
+    "541",
+    "2842",
+    "1337",
+    "1949",
+    "1685",
+    "1760",
+    "516",
+    "767",
+    "2342",
+    "1832",
+    "1891",
+    "2206",
+    "1162",
+    "1216",
+    "5690",
+    "257",
+    "609",
+    "354",
+    "2165",
+    "346",
+    "863",
+    "1179",
+    "1595",
+    "828",
+    "1045",
+    "2211",
+    "337",
+    "1624",
+    "4834",
+    "1154",
+    "1354",
+    "671",
+    "922",
+    "1928",
+    "1188",
+    "120",
+    "1953",
+    "2971",
+    "4864",
+    "3362",
+    "1466",
+    "1110",
+    "2261",
+    "1609",
+    "978",
+    "833",
+    "895",
+    "785",
+    "2305",
+    "1223",
+    "1068",
+    "3003",
+    "251",
+    "580",
+    "1088",
+    "334",
+    "692",
+    "4731",
+    "1961",
+    "2527",
+    "417",
+    "1973",
+    "844",
+    "133",
+    "640",
+    "501",
+    "2263",
+    "456",
+    "2817",
+    "1549",
+    "1126",
+    "3142",
+    "1823",
+    "366",
+    "1914",
+    "2760",
+    "1687",
+    "2297",
+    "4292",
+    "2228",
+    "988",
+    "207",
+    "1979",
+    "575",
+    "1569",
+    "2372",
+    "1794",
+    "4283",
+    "783",
+    "1255",
+    "5131",
+    "2801",
+    "768",
+    "3340",
+    "3229",
+    "2071",
+    "1303",
+    "5408",
+    "1680",
+    "325",
+    "1662",
+    "950",
+    "2849",
+    "2743",
+    "2714",
+    "115",
+    "699",
+    "1880",
+    "3253",
+    "505",
+    "2178",
+    "3098",
+    "604",
+    "135",
+    "1675",
+    "147",
+    "1508",
+    "1780",
+    "3696",
+    "3488",
+    "939",
+    "1371",
+    "3728",
+    "733",
+    "2186",
+    "807",
+    "624",
+    "4355",
+    "328",
+    "4162",
+    "2172",
+    "4209",
+    "2448",
+    "1698",
+    "3033",
+    "2275",
+    "2949",
+    "1406",
+    "1518",
+    "523",
+    "336",
+    "3052",
+    "2312",
+    "2113",
+    "343",
+    "2550",
+    "2800",
+    "2394",
+    "926",
+    "2160",
+    "259",
+    "1001",
+    "203",
+    "737",
+    "1183",
+    "840",
+    "298",
+    "494",
+    "1846",
+    "1329",
+    "3718",
+    "3775",
+    "1820",
+    "1320",
+    "1349",
+    "2562",
+    "1134",
+    "562",
+    "729",
+    "2875",
+    "3683",
+    "781",
+    "1840",
+    "3697",
+    "3045",
+    "1994",
+    "1621",
+    "1995",
+    "5095",
+    "3279",
+    "2898",
+    "3344",
+    "1924",
+    "2762",
+    "587",
+    "403",
+    "2666",
+    "709",
+    "905",
+    "4885",
+    "1237",
+    "2344",
+    "4686",
+    "758",
+    "1158",
+    "4280",
+    "1925",
+    "1586",
+    "1918",
+    "2166",
+    "1742",
+    "3326",
+    "2215",
+    "3046",
+    "1066",
+    "2019",
+    "3584",
+    "159",
+    "1362",
+    "201",
+    "1138",
+    "549",
+    "407",
+    "2450",
+    "3733",
+    "383",
+    "1271",
+    "2189",
+    "820",
+    "1876",
+    "2108",
+    "1464",
+    "921",
+    "1822",
+    "3037",
+    "814",
+    "1003",
+    "1861"
+  ],
+  "phase1_no_match_company": [
+    "1308",
+    "645",
+    "2217",
+    "2316",
+    "304",
+    "1725",
+    "434",
+    "1504",
+    "191",
+    "1923",
+    "3533",
+    "1113",
+    "2023",
+    "86",
+    "1986",
+    "1633",
+    "879",
+    "230",
+    "864",
+    "1892",
+    "2581",
+    "1448",
+    "1238",
+    "2653",
+    "18",
+    "1927",
+    "1872",
+    "2233",
+    "2857",
+    "3143",
+    "1874",
+    "511",
+    "947",
+    "3215",
+    "819",
+    "3861",
+    "2278",
+    "1386",
+    "2102",
+    "3667",
+    "3968",
+    "1882",
+    "1035",
+    "2066",
+    "1791",
+    "1202",
+    "2056",
+    "2847",
+    "1810",
+    "2534",
+    "2724",
+    "4867",
+    "186",
+    "1672",
+    "2355",
+    "1350",
+    "1493",
+    "245",
+    "382",
+    "3119",
+    "3294",
+    "1652",
+    "933",
+    "2140",
+    "1443",
+    "3113",
+    "1511",
+    "5729",
+    "934",
+    "1513",
+    "471",
+    "635",
+    "437",
+    "893",
+    "1730",
+    "804",
+    "1159",
+    "2148",
+    "1181",
+    "973",
+    "2565",
+    "2718",
+    "3303",
+    "1427",
+    "1970",
+    "2049",
+    "94",
+    "1768",
+    "2321",
+    "911",
+    "965",
+    "164",
+    "2789",
+    "4019",
+    "1572",
+    "2163",
+    "835",
+    "855",
+    "254",
+    "113",
+    "1491",
+    "278",
+    "2050",
+    "2046",
+    "1755",
+    "1753",
+    "1919",
+    "4415",
+    "1548",
+    "2219",
+    "357",
+    "3217",
+    "1563",
+    "615",
+    "1546",
+    "1582",
+    "2866",
+    "1668",
+    "1964",
+    "1570",
+    "3289",
+    "2386",
+    "2076",
+    "2973",
+    "2639",
+    "3357",
+    "1253",
+    "2285",
+    "2132",
+    "2729",
+    "3591",
+    "1070",
+    "654",
+    "1922",
+    "1661",
+    "473",
+    "3026",
+    "1119",
+    "400",
+    "1875",
+    "1027",
+    "2319",
+    "2092",
+    "1506",
+    "1916",
+    "5142",
+    "1184",
+    "1910",
+    "1372",
+    "1932",
+    "3695",
+    "743",
+    "5728",
+    "1908",
+    "717",
+    "3398",
+    "2596",
+    "1011",
+    "1351",
+    "386",
+    "2291",
+    "2417",
+    "3417",
+    "1373",
+    "2055",
+    "1272",
+    "713",
+    "2277",
+    "2497",
+    "2980",
+    "1130",
+    "3305",
+    "1710",
+    "3463",
+    "1579",
+    "544",
+    "111",
+    "285",
+    "246",
+    "2239",
+    "976",
+    "684",
+    "1999",
+    "2168",
+    "2697",
+    "853",
+    "641",
+    "1689",
+    "908",
+    "2127",
+    "3236",
+    "1153",
+    "1258",
+    "1085",
+    "989",
+    "2044",
+    "2764",
+    "4409",
+    "3155",
+    "1567",
+    "4582",
+    "2418",
+    "2016",
+    "3093",
+    "2283",
+    "1965",
+    "996",
+    "1581",
+    "1842",
+    "3211",
+    "823",
+    "2183",
+    "957",
+    "1602",
+    "232",
+    "396",
+    "1369",
+    "1353",
+    "1291",
+    "1519",
+    "867",
+    "1779",
+    "3809",
+    "3679",
+    "1120",
+    "2977",
+    "1726",
+    "198",
+    "2737",
+    "2115",
+    "994",
+    "2051",
+    "3557",
+    "1556",
+    "267",
+    "156",
+    "1178",
+    "2619",
+    "3126",
+    "4071",
+    "861",
+    "1819",
+    "2462",
+    "2669",
+    "1025",
+    "5128",
+    "3660",
+    "2187",
+    "140",
+    "1368",
+    "512",
+    "5419",
+    "1127",
+    "605",
+    "1889",
+    "736",
+    "1482",
+    "2248",
+    "566",
+    "1269",
+    "2208",
+    "3174",
+    "3747",
+    "539",
+    "1453",
+    "290",
+    "497",
+    "1501",
+    "1058",
+    "2251",
+    "3204",
+    "3545",
+    "1860",
+    "482",
+    "984",
+    "1676",
+    "504",
+    "3096",
+    "599",
+    "1784",
+    "1341",
+    "958",
+    "1797",
+    "2529",
+    "1837",
+    "1067",
+    "2170",
+    "2047",
+    "376",
+    "1884",
+    "2772",
+    "3350",
+    "2551",
+    "408",
+    "1920",
+    "902",
+    "5376",
+    "3182",
+    "1596",
+    "1429",
+    "1728",
+    "1888",
+    "4900",
+    "705",
+    "5643",
+    "3137",
+    "2192",
+    "3515",
+    "810",
+    "541",
+    "1949",
+    "1685",
+    "1760",
+    "516",
+    "767",
+    "2342",
+    "1832",
+    "1891",
+    "2206",
+    "1162",
+    "1216",
+    "5690",
+    "1176",
+    "257",
+    "609",
+    "2326",
+    "354",
+    "2165",
+    "346",
+    "863",
+    "1179",
+    "1595",
+    "828",
+    "1045",
+    "2211",
+    "337",
+    "1624",
+    "4834",
+    "671",
+    "922",
+    "1928",
+    "1188",
+    "120",
+    "1894",
+    "1953",
+    "2971",
+    "4864",
+    "3362",
+    "1466",
+    "1110",
+    "2261",
+    "1609",
+    "978",
+    "833",
+    "895",
+    "785",
+    "2305",
+    "1311",
+    "1646",
+    "1068",
+    "251",
+    "2011",
+    "580",
+    "1088",
+    "4082",
+    "334",
+    "692",
+    "4731",
+    "1961",
+    "2527",
+    "417",
+    "1973",
+    "844",
+    "133",
+    "640",
+    "501",
+    "1903",
+    "2263",
+    "2817",
+    "1549",
+    "1126",
+    "3142",
+    "1823",
+    "366",
+    "1914",
+    "2760",
+    "1687",
+    "2297",
+    "4292",
+    "2228",
+    "67",
+    "207",
+    "988",
+    "1979",
+    "575",
+    "1569",
+    "2372",
+    "1794",
+    "4283",
+    "783",
+    "1255",
+    "5131",
+    "2801",
+    "768",
+    "3340",
+    "3229",
+    "1303",
+    "5408",
+    "1680",
+    "325",
+    "1662",
+    "950",
+    "2849",
+    "1762",
+    "2743",
+    "2714",
+    "115",
+    "699",
+    "1880",
+    "3253",
+    "505",
+    "2178",
+    "604",
+    "135",
+    "147",
+    "1508",
+    "1780",
+    "3696",
+    "3488",
+    "1371",
+    "3728",
+    "2186",
+    "807",
+    "624",
+    "4355",
+    "328",
+    "4162",
+    "2172",
+    "4209",
+    "2448",
+    "1698",
+    "2275",
+    "1406",
+    "1518",
+    "523",
+    "336",
+    "3052",
+    "2312",
+    "2113",
+    "343",
+    "2550",
+    "2800",
+    "926",
+    "2160",
+    "259",
+    "870",
+    "1001",
+    "203",
+    "737",
+    "1183",
+    "840",
+    "298",
+    "494",
+    "1846",
+    "1329",
+    "3718",
+    "3775",
+    "1820",
+    "1320",
+    "2562",
+    "1134",
+    "562",
+    "2875",
+    "3683",
+    "781",
+    "1840",
+    "3697",
+    "3045",
+    "1994",
+    "1621",
+    "1995",
+    "5095",
+    "3279",
+    "194",
+    "2898",
+    "1924",
+    "2762",
+    "141",
+    "587",
+    "403",
+    "2666",
+    "709",
+    "905",
+    "4885",
+    "1237",
+    "2344",
+    "4686",
+    "1331",
+    "758",
+    "1158",
+    "4280",
+    "1925",
+    "1586",
+    "1918",
+    "2166",
+    "3538",
+    "1742",
+    "3326",
+    "2215",
+    "3046",
+    "1066",
+    "2019",
+    "3584",
+    "3114",
+    "159",
+    "1362",
+    "201",
+    "1138",
+    "549",
+    "407",
+    "3733",
+    "383",
+    "1271",
+    "2189",
+    "820",
+    "1876",
+    "2108",
+    "1464",
+    "921",
+    "1822",
+    "3037",
+    "814",
+    "1003",
+    "1861"
+  ],
+  "results": [
+    {
+      "目录ID": "86",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "fa7b03d3-ab8f-4b5f-b035-e60e130c59ab.docx",
+      "新文件名": "86_fa7b03d3-ab8f-4b5f-b035-e60e130c59ab.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 1,
+      "文件大小_MB": 2.82,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\86\\fa7b03d3-ab8f-4b5f-b035-e60e130c59ab.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\86_fa7b03d3-ab8f-4b5f-b035-e60e130c59ab.docx",
+      "处理时间": "2026-05-08T14:27:04.824750"
+    },
+    {
+      "目录ID": "67",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "aa4fede7-4c83-47e0-8e77-1f548b083523.docx",
+      "新文件名": "67_aa4fede7-4c83-47e0-8e77-1f548b083523.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 2,
+      "文件大小_MB": 20.92,
+      "备注": "Top5中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\67\\aa4fede7-4c83-47e0-8e77-1f548b083523.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\67_aa4fede7-4c83-47e0-8e77-1f548b083523.docx",
+      "处理时间": "2026-05-08T14:27:05.517946"
+    },
+    {
+      "目录ID": "141",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "379f98a3-5f65-46c4-b95e-af7b8facd607.docx",
+      "新文件名": "141_379f98a3-5f65-46c4-b95e-af7b8facd607.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.02,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\141\\379f98a3-5f65-46c4-b95e-af7b8facd607.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\141_379f98a3-5f65-46c4-b95e-af7b8facd607.docx",
+      "处理时间": "2026-05-08T14:27:06.296909"
+    },
+    {
+      "目录ID": "185",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "9b671d5d-3769-421b-8705-58de3e52eaec.docx",
+      "新文件名": "185_9b671d5d-3769-421b-8705-58de3e52eaec.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "公司审批意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.02,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\185\\9b671d5d-3769-421b-8705-58de3e52eaec.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\185_9b671d5d-3769-421b-8705-58de3e52eaec.docx",
+      "处理时间": "2026-05-08T14:27:06.432373"
+    },
+    {
+      "目录ID": "191",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "0c021b64-b1ef-4c56-9015-eb4c770aaddc.docx",
+      "新文件名": "191_0c021b64-b1ef-4c56-9015-eb4c770aaddc.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "专家论证意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.02,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\191\\0c021b64-b1ef-4c56-9015-eb4c770aaddc.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\191_0c021b64-b1ef-4c56-9015-eb4c770aaddc.docx",
+      "处理时间": "2026-05-08T14:27:06.468253"
+    },
+    {
+      "目录ID": "194",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "c71e0949-24e6-418e-9378-627ab1642bf8.docx",
+      "新文件名": "194_c71e0949-24e6-418e-9378-627ab1642bf8.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}论证.{0,12}(意见|记录|结论|说明|建议|纪要|报告|审核表|论证表|结论表)",
+      "匹配文件数": 1,
+      "文件大小_MB": 10.82,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\194\\c71e0949-24e6-418e-9378-627ab1642bf8.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\194_c71e0949-24e6-418e-9378-627ab1642bf8.docx",
+      "处理时间": "2026-05-08T14:27:07.509957"
+    },
+    {
+      "目录ID": "41",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "4b7ecff2-6392-430b-8128-4c7772462365.docx",
+      "新文件名": "41_4b7ecff2-6392-430b-8128-4c7772462365.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:(公司|集团).{0,10}施工方案.{0,10}(审核|审查|评审).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 2,
+      "文件大小_MB": 0.01,
+      "备注": "Top5中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\41\\4b7ecff2-6392-430b-8128-4c7772462365.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\41_4b7ecff2-6392-430b-8128-4c7772462365.docx",
+      "处理时间": "2026-05-08T14:27:08.370059"
+    },
+    {
+      "目录ID": "456",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "4ee1d568-8613-4736-bcea-21a9aa11b4f3.docx",
+      "新文件名": "456_4ee1d568-8613-4736-bcea-21a9aa11b4f3.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "公司审批意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\456\\4ee1d568-8613-4736-bcea-21a9aa11b4f3.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\456_4ee1d568-8613-4736-bcea-21a9aa11b4f3.docx",
+      "处理时间": "2026-05-08T14:27:08.542616"
+    },
+    {
+      "目录ID": "476",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "e85effb5-7715-4516-ac91-7ca89a59a1ce.docx",
+      "新文件名": "476_e85effb5-7715-4516-ac91-7ca89a59a1ce.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "公司审批意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\476\\e85effb5-7715-4516-ac91-7ca89a59a1ce.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\476_e85effb5-7715-4516-ac91-7ca89a59a1ce.docx",
+      "处理时间": "2026-05-08T14:27:08.759055"
+    },
+    {
+      "目录ID": "542",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "fab7b33d-8698-4643-879f-4ae26376234a.docx",
+      "新文件名": "542_fab7b33d-8698-4643-879f-4ae26376234a.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "集团意见",
+      "匹配文件数": 3,
+      "文件大小_MB": 0.01,
+      "备注": "其余文件中找到3个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\542\\fab7b33d-8698-4643-879f-4ae26376234a.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\542_fab7b33d-8698-4643-879f-4ae26376234a.docx",
+      "处理时间": "2026-05-08T14:27:10.876412"
+    },
+    {
+      "目录ID": "729",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "74f72ca4-a16f-4cec-8747-e9863570fbdf.docx",
+      "新文件名": "729_74f72ca4-a16f-4cec-8747-e9863570fbdf.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "公司审批意见",
+      "匹配文件数": 2,
+      "文件大小_MB": 0.02,
+      "备注": "其余文件中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\729\\74f72ca4-a16f-4cec-8747-e9863570fbdf.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\729_74f72ca4-a16f-4cec-8747-e9863570fbdf.docx",
+      "处理时间": "2026-05-08T14:27:18.574102"
+    },
+    {
+      "目录ID": "727",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "e037ac12-0b53-4d24-93f9-059f0b158bd9.docx",
+      "新文件名": "727_e037ac12-0b53-4d24-93f9-059f0b158bd9.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.27,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\727\\e037ac12-0b53-4d24-93f9-059f0b158bd9.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\727_e037ac12-0b53-4d24-93f9-059f0b158bd9.docx",
+      "处理时间": "2026-05-08T14:27:18.636469"
+    },
+    {
+      "目录ID": "727",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "cf741260-e560-446a-8c5f-5b0383f434ea.docx",
+      "新文件名": "727_cf741260-e560-446a-8c5f-5b0383f434ea.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:监理.{0,8}工程师.{0,8}意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 11.51,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\727\\cf741260-e560-446a-8c5f-5b0383f434ea.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\727_cf741260-e560-446a-8c5f-5b0383f434ea.docx",
+      "处理时间": "2026-05-08T14:27:19.317109"
+    },
+    {
+      "目录ID": "808",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "4895009d-e438-437d-8eb1-91c34974d0a3.docx",
+      "新文件名": "808_4895009d-e438-437d-8eb1-91c34974d0a3.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "公司意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\808\\4895009d-e438-437d-8eb1-91c34974d0a3.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\808_4895009d-e438-437d-8eb1-91c34974d0a3.docx",
+      "处理时间": "2026-05-08T14:27:19.613821"
+    },
+    {
+      "目录ID": "471",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "3fbe428f-aa66-448f-8b97-e809830a7daf.docx",
+      "新文件名": "471_3fbe428f-aa66-448f-8b97-e809830a7daf.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\471\\3fbe428f-aa66-448f-8b97-e809830a7daf.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\471_3fbe428f-aa66-448f-8b97-e809830a7daf.docx",
+      "处理时间": "2026-05-08T14:27:19.152972"
+    },
+    {
+      "目录ID": "853",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "940ee19f-b37b-4d90-903a-051156b828eb.docx",
+      "新文件名": "853_940ee19f-b37b-4d90-903a-051156b828eb.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\853\\940ee19f-b37b-4d90-903a-051156b828eb.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\853_940ee19f-b37b-4d90-903a-051156b828eb.docx",
+      "处理时间": "2026-05-08T14:27:23.081409"
+    },
+    {
+      "目录ID": "870",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "aa4fede7-4c83-47e0-8e77-1f548b083523.docx",
+      "新文件名": "870_aa4fede7-4c83-47e0-8e77-1f548b083523.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 2,
+      "文件大小_MB": 20.92,
+      "备注": "Top5中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\870\\aa4fede7-4c83-47e0-8e77-1f548b083523.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\870_aa4fede7-4c83-47e0-8e77-1f548b083523.docx",
+      "处理时间": "2026-05-08T14:27:24.690495"
+    },
+    {
+      "目录ID": "939",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "4cad6d56-f766-4f5f-8e0c-e98486e55ff4.docx",
+      "新文件名": "939_4cad6d56-f766-4f5f-8e0c-e98486e55ff4.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "公司审批意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\939\\4cad6d56-f766-4f5f-8e0c-e98486e55ff4.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\939_4cad6d56-f766-4f5f-8e0c-e98486e55ff4.docx",
+      "处理时间": "2026-05-08T14:27:27.704113"
+    },
+    {
+      "目录ID": "872",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "da1b4798-6e44-413e-8d54-e8973d42ce5b.docx",
+      "新文件名": "872_da1b4798-6e44-413e-8d54-e8973d42ce5b.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "公司审核意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\872\\da1b4798-6e44-413e-8d54-e8973d42ce5b.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\872_da1b4798-6e44-413e-8d54-e8973d42ce5b.docx",
+      "处理时间": "2026-05-08T14:27:28.754247"
+    },
+    {
+      "目录ID": "909",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "a26a1c55-5362-4388-8d1d-d4c9796f76bc.docx",
+      "新文件名": "909_a26a1c55-5362-4388-8d1d-d4c9796f76bc.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "公司意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.02,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\909\\a26a1c55-5362-4388-8d1d-d4c9796f76bc.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\909_a26a1c55-5362-4388-8d1d-d4c9796f76bc.docx",
+      "处理时间": "2026-05-08T14:27:28.780153"
+    },
+    {
+      "目录ID": "733",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "0b358a52-cfbd-4d52-8daa-df3afe0e471e.docx",
+      "新文件名": "733_0b358a52-cfbd-4d52-8daa-df3afe0e471e.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:方案.{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.02,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\733\\0b358a52-cfbd-4d52-8daa-df3afe0e471e.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\733_0b358a52-cfbd-4d52-8daa-df3afe0e471e.docx",
+      "处理时间": "2026-05-08T14:27:30.024402"
+    },
+    {
+      "目录ID": "977",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "6111ee38-04a3-43f5-835c-3ff931af3a04.docx",
+      "新文件名": "977_6111ee38-04a3-43f5-835c-3ff931af3a04.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "审批意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 2.04,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\977\\6111ee38-04a3-43f5-835c-3ff931af3a04.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\977_6111ee38-04a3-43f5-835c-3ff931af3a04.docx",
+      "处理时间": "2026-05-08T14:27:30.193398"
+    },
+    {
+      "目录ID": "1062",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "e2e4d520-e022-46f4-8d52-1e0a8702540d.docx",
+      "新文件名": "1062_e2e4d520-e022-46f4-8d52-1e0a8702540d.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "专家评审意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 13.47,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1062\\e2e4d520-e022-46f4-8d52-1e0a8702540d.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\1062_e2e4d520-e022-46f4-8d52-1e0a8702540d.docx",
+      "处理时间": "2026-05-08T14:27:32.873729"
+    },
+    {
+      "目录ID": "1062",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "e2e4d520-e022-46f4-8d52-1e0a8702540d.docx",
+      "新文件名": "1062_e2e4d520-e022-46f4-8d52-1e0a8702540d.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:方案.{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 1,
+      "文件大小_MB": 13.47,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1062\\e2e4d520-e022-46f4-8d52-1e0a8702540d.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\1062_e2e4d520-e022-46f4-8d52-1e0a8702540d.docx",
+      "处理时间": "2026-05-08T14:27:33.659688"
+    },
+    {
+      "目录ID": "1154",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "660902be-dcfe-43d5-b1f7-6fff7a458ebf.docx",
+      "新文件名": "1154_660902be-dcfe-43d5-b1f7-6fff7a458ebf.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "公司审查意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1154\\660902be-dcfe-43d5-b1f7-6fff7a458ebf.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\1154_660902be-dcfe-43d5-b1f7-6fff7a458ebf.docx",
+      "处理时间": "2026-05-08T14:27:36.012508"
+    },
+    {
+      "目录ID": "970",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "871da903-719b-4305-b0a5-a94cb35bde17.docx",
+      "新文件名": "970_871da903-719b-4305-b0a5-a94cb35bde17.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:施工方案.{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 2,
+      "文件大小_MB": 11.25,
+      "备注": "Top5中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\970\\871da903-719b-4305-b0a5-a94cb35bde17.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\970_871da903-719b-4305-b0a5-a94cb35bde17.docx",
+      "处理时间": "2026-05-08T14:27:36.373774"
+    },
+    {
+      "目录ID": "1113",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "7801c037-744c-4d8a-8526-4e29caa30fd1.docx",
+      "新文件名": "1113_7801c037-744c-4d8a-8526-4e29caa30fd1.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 5,
+      "文件大小_MB": 11.52,
+      "备注": "Top5中找到5个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1113\\7801c037-744c-4d8a-8526-4e29caa30fd1.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\1113_7801c037-744c-4d8a-8526-4e29caa30fd1.docx",
+      "处理时间": "2026-05-08T14:27:35.007795"
+    },
+    {
+      "目录ID": "1176",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "b137f78d-629c-4bb6-86ec-36b68571c695.docx",
+      "新文件名": "1176_b137f78d-629c-4bb6-86ec-36b68571c695.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 2,
+      "文件大小_MB": 7.24,
+      "备注": "Top5中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1176\\b137f78d-629c-4bb6-86ec-36b68571c695.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\1176_b137f78d-629c-4bb6-86ec-36b68571c695.docx",
+      "处理时间": "2026-05-08T14:27:38.798052"
+    },
+    {
+      "目录ID": "1264",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "2a150abc-ec6f-4113-bece-cd16cee63e9a.docx",
+      "新文件名": "1264_2a150abc-ec6f-4113-bece-cd16cee63e9a.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:方案.{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1264\\2a150abc-ec6f-4113-bece-cd16cee63e9a.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\1264_2a150abc-ec6f-4113-bece-cd16cee63e9a.docx",
+      "处理时间": "2026-05-08T14:27:40.634598"
+    },
+    {
+      "目录ID": "1223",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "a0cf3cec-d6a6-415b-93c7-c02b59acf772.docx",
+      "新文件名": "1223_a0cf3cec-d6a6-415b-93c7-c02b59acf772.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "公司意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.03,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1223\\a0cf3cec-d6a6-415b-93c7-c02b59acf772.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\1223_a0cf3cec-d6a6-415b-93c7-c02b59acf772.docx",
+      "处理时间": "2026-05-08T14:27:40.965237"
+    },
+    {
+      "目录ID": "1311",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "2aba74b7-8771-43f3-8ac3-4a8e9233a389.docx",
+      "新文件名": "1311_2aba74b7-8771-43f3-8ac3-4a8e9233a389.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "专家评审意见",
+      "匹配文件数": 2,
+      "文件大小_MB": 22.55,
+      "备注": "Top5中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1311\\2aba74b7-8771-43f3-8ac3-4a8e9233a389.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\1311_2aba74b7-8771-43f3-8ac3-4a8e9233a389.docx",
+      "处理时间": "2026-05-08T14:27:42.326491"
+    },
+    {
+      "目录ID": "955",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "6e88478a-ad2d-4a23-ac67-9e7643afbb74.docx",
+      "新文件名": "955_6e88478a-ad2d-4a23-ac67-9e7643afbb74.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家组.{0,10}(意见|建议|结论)",
+      "匹配文件数": 1,
+      "文件大小_MB": 18.15,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\955\\6e88478a-ad2d-4a23-ac67-9e7643afbb74.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\955_6e88478a-ad2d-4a23-ac67-9e7643afbb74.docx",
+      "处理时间": "2026-05-08T14:27:35.846050"
+    },
+    {
+      "目录ID": "955",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "1d011fa9-ee92-4772-b666-62da6ffa410d.docx",
+      "新文件名": "955_1d011fa9-ee92-4772-b666-62da6ffa410d.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:方案.{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.02,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\955\\1d011fa9-ee92-4772-b666-62da6ffa410d.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\955_1d011fa9-ee92-4772-b666-62da6ffa410d.docx",
+      "处理时间": "2026-05-08T14:27:42.905355"
+    },
+    {
+      "目录ID": "1337",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "56816b99-95e9-44b2-86ed-27ee94d7cd30.docx",
+      "新文件名": "1337_56816b99-95e9-44b2-86ed-27ee94d7cd30.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:(公司|集团).{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1337\\56816b99-95e9-44b2-86ed-27ee94d7cd30.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\1337_56816b99-95e9-44b2-86ed-27ee94d7cd30.docx",
+      "处理时间": "2026-05-08T14:27:44.047197"
+    },
+    {
+      "目录ID": "1349",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "95956489-50ed-4496-b689-5158d2f9e680.docx",
+      "新文件名": "1349_95956489-50ed-4496-b689-5158d2f9e680.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:(公司|集团).{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1349\\95956489-50ed-4496-b689-5158d2f9e680.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\1349_95956489-50ed-4496-b689-5158d2f9e680.docx",
+      "处理时间": "2026-05-08T14:27:44.102835"
+    },
+    {
+      "目录ID": "1331",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "c4ffb667-c37a-49e4-b854-a390aeb291ee.docx",
+      "新文件名": "1331_c4ffb667-c37a-49e4-b854-a390aeb291ee.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 1,
+      "文件大小_MB": 6.98,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1331\\c4ffb667-c37a-49e4-b854-a390aeb291ee.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\1331_c4ffb667-c37a-49e4-b854-a390aeb291ee.docx",
+      "处理时间": "2026-05-08T14:27:43.885736"
+    },
+    {
+      "目录ID": "1354",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "ebec760d-9957-4a36-9643-a98621954c8d.docx",
+      "新文件名": "1354_ebec760d-9957-4a36-9643-a98621954c8d.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:方案.{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 1,
+      "文件大小_MB": 2.43,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1354\\ebec760d-9957-4a36-9643-a98621954c8d.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\1354_ebec760d-9957-4a36-9643-a98621954c8d.docx",
+      "处理时间": "2026-05-08T14:27:44.486106"
+    },
+    {
+      "目录ID": "1597",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "0c160278-043b-4922-be31-13dff19db638.docx",
+      "新文件名": "1597_0c160278-043b-4922-be31-13dff19db638.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:监理.{0,8}工程师.{0,8}意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 3.44,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1597\\0c160278-043b-4922-be31-13dff19db638.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\1597_0c160278-043b-4922-be31-13dff19db638.docx",
+      "处理时间": "2026-05-08T14:27:49.124299"
+    },
+    {
+      "目录ID": "1652",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "a0155673-204d-422f-b041-b26b793c6e6c.docx",
+      "新文件名": "1652_a0155673-204d-422f-b041-b26b793c6e6c.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1652\\a0155673-204d-422f-b041-b26b793c6e6c.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\1652_a0155673-204d-422f-b041-b26b793c6e6c.docx",
+      "处理时间": "2026-05-08T14:27:49.341312"
+    },
+    {
+      "目录ID": "1675",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "f8036631-9696-4325-889a-2289734bbb5a.docx",
+      "新文件名": "1675_f8036631-9696-4325-889a-2289734bbb5a.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:监理.{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 1,
+      "文件大小_MB": 32.87,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1675\\f8036631-9696-4325-889a-2289734bbb5a.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\1675_f8036631-9696-4325-889a-2289734bbb5a.docx",
+      "处理时间": "2026-05-08T14:27:51.890210"
+    },
+    {
+      "目录ID": "1691",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "b840eaf5-1873-443f-abdb-03a1aacd2621.docx",
+      "新文件名": "1691_b840eaf5-1873-443f-abdb-03a1aacd2621.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:(公司|集团).{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 2,
+      "文件大小_MB": 0.01,
+      "备注": "其余文件中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1691\\b840eaf5-1873-443f-abdb-03a1aacd2621.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\1691_b840eaf5-1873-443f-abdb-03a1aacd2621.docx",
+      "处理时间": "2026-05-08T14:27:52.029211"
+    },
+    {
+      "目录ID": "1646",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "dc2767f0-d41f-4750-856a-de4a835b5f23.docx",
+      "新文件名": "1646_dc2767f0-d41f-4750-856a-de4a835b5f23.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 2,
+      "文件大小_MB": 19.49,
+      "备注": "Top5中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1646\\dc2767f0-d41f-4750-856a-de4a835b5f23.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\1646_dc2767f0-d41f-4750-856a-de4a835b5f23.docx",
+      "处理时间": "2026-05-08T14:27:52.789087"
+    },
+    {
+      "目录ID": "1710",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "0250ed38-15c6-47d4-93d1-39f2573e9c81.docx",
+      "新文件名": "1710_0250ed38-15c6-47d4-93d1-39f2573e9c81.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 3,
+      "文件大小_MB": 4.14,
+      "备注": "其余文件中找到3个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1710\\0250ed38-15c6-47d4-93d1-39f2573e9c81.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\1710_0250ed38-15c6-47d4-93d1-39f2573e9c81.docx",
+      "处理时间": "2026-05-08T14:27:54.638292"
+    },
+    {
+      "目录ID": "1762",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "8f77f3a0-880d-4592-a6e1-f745417164b7.docx",
+      "新文件名": "1762_8f77f3a0-880d-4592-a6e1-f745417164b7.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家组.{0,10}(意见|建议|结论)",
+      "匹配文件数": 2,
+      "文件大小_MB": 23.07,
+      "备注": "Top5中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1762\\8f77f3a0-880d-4592-a6e1-f745417164b7.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\1762_8f77f3a0-880d-4592-a6e1-f745417164b7.docx",
+      "处理时间": "2026-05-08T14:27:54.600420"
+    },
+    {
+      "目录ID": "1842",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "32822734-ab2d-4e38-90cd-c0b38866ce61.docx",
+      "新文件名": "1842_32822734-ab2d-4e38-90cd-c0b38866ce61.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 1,
+      "文件大小_MB": 12.87,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1842\\32822734-ab2d-4e38-90cd-c0b38866ce61.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\1842_32822734-ab2d-4e38-90cd-c0b38866ce61.docx",
+      "处理时间": "2026-05-08T14:27:56.726759"
+    },
+    {
+      "目录ID": "1848",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "24fbdb2a-ed2f-4277-877a-fbc5302adde4.docx",
+      "新文件名": "1848_24fbdb2a-ed2f-4277-877a-fbc5302adde4.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:项目(部)?.{0,10}经理.{0,8}(意见|审核)",
+      "匹配文件数": 1,
+      "文件大小_MB": 17.7,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1848\\24fbdb2a-ed2f-4277-877a-fbc5302adde4.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\1848_24fbdb2a-ed2f-4277-877a-fbc5302adde4.docx",
+      "处理时间": "2026-05-08T14:27:57.522650"
+    },
+    {
+      "目录ID": "1894",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "c2e32c2a-fff0-4187-8927-dd391699b952.docx",
+      "新文件名": "1894_c2e32c2a-fff0-4187-8927-dd391699b952.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 2,
+      "文件大小_MB": 3.85,
+      "备注": "Top5中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1894\\c2e32c2a-fff0-4187-8927-dd391699b952.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\1894_c2e32c2a-fff0-4187-8927-dd391699b952.docx",
+      "处理时间": "2026-05-08T14:27:59.221342"
+    },
+    {
+      "目录ID": "1903",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "e38ebf2c-aa31-4239-9c7c-9b7ecc318c76.docx",
+      "新文件名": "1903_e38ebf2c-aa31-4239-9c7c-9b7ecc318c76.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 1,
+      "文件大小_MB": 5.14,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1903\\e38ebf2c-aa31-4239-9c7c-9b7ecc318c76.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\1903_e38ebf2c-aa31-4239-9c7c-9b7ecc318c76.docx",
+      "处理时间": "2026-05-08T14:28:00.069387"
+    },
+    {
+      "目录ID": "1730",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "c190d775-5767-4726-b955-683b5e10ae11.docx",
+      "新文件名": "1730_c190d775-5767-4726-b955-683b5e10ae11.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 3,
+      "文件大小_MB": 6.65,
+      "备注": "Top5中找到3个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\1730\\c190d775-5767-4726-b955-683b5e10ae11.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\1730_c190d775-5767-4726-b955-683b5e10ae11.docx",
+      "处理时间": "2026-05-08T14:28:01.528296"
+    },
+    {
+      "目录ID": "2010",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "0256f642-242f-4ebd-9380-c5097befd97a.docx",
+      "新文件名": "2010_0256f642-242f-4ebd-9380-c5097befd97a.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 3,
+      "文件大小_MB": 10.33,
+      "备注": "Top5中找到3个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2010\\0256f642-242f-4ebd-9380-c5097befd97a.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\2010_0256f642-242f-4ebd-9380-c5097befd97a.docx",
+      "处理时间": "2026-05-08T14:28:04.917286"
+    },
+    {
+      "目录ID": "2010",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "0256f642-242f-4ebd-9380-c5097befd97a.docx",
+      "新文件名": "2010_0256f642-242f-4ebd-9380-c5097befd97a.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "监理工程师意见",
+      "匹配文件数": 2,
+      "文件大小_MB": 10.33,
+      "备注": "Top5中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2010\\0256f642-242f-4ebd-9380-c5097befd97a.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\2010_0256f642-242f-4ebd-9380-c5097befd97a.docx",
+      "处理时间": "2026-05-08T14:28:06.975536"
+    },
+    {
+      "目录ID": "2071",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "7c0f1d82-964c-48ac-9aba-45f8e93fe396.docx",
+      "新文件名": "2071_7c0f1d82-964c-48ac-9aba-45f8e93fe396.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "施工方案审查意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.15,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2071\\7c0f1d82-964c-48ac-9aba-45f8e93fe396.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\2071_7c0f1d82-964c-48ac-9aba-45f8e93fe396.docx",
+      "处理时间": "2026-05-08T14:28:09.610377"
+    },
+    {
+      "目录ID": "2183",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "b64b7bac-afb4-4828-a980-17fff5c1f69d.docx",
+      "新文件名": "2183_b64b7bac-afb4-4828-a980-17fff5c1f69d.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 1,
+      "文件大小_MB": 16.24,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2183\\b64b7bac-afb4-4828-a980-17fff5c1f69d.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\2183_b64b7bac-afb4-4828-a980-17fff5c1f69d.docx",
+      "处理时间": "2026-05-08T14:28:13.453085"
+    },
+    {
+      "目录ID": "2192",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "8411669a-2fd0-4878-a82a-d269f9ed771c.docx",
+      "新文件名": "2192_8411669a-2fd0-4878-a82a-d269f9ed771c.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,15}(意见|建议|结论)",
+      "匹配文件数": 1,
+      "文件大小_MB": 22.17,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2192\\8411669a-2fd0-4878-a82a-d269f9ed771c.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\2192_8411669a-2fd0-4878-a82a-d269f9ed771c.docx",
+      "处理时间": "2026-05-08T14:28:14.617328"
+    },
+    {
+      "目录ID": "2194",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "3288eb4e-4ee6-4096-9d0e-5ae5f845250d.docx",
+      "新文件名": "2194_3288eb4e-4ee6-4096-9d0e-5ae5f845250d.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:(公司|集团|企业).{0,8}意见",
+      "匹配文件数": 2,
+      "文件大小_MB": 14.5,
+      "备注": "Top5中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2194\\3288eb4e-4ee6-4096-9d0e-5ae5f845250d.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\2194_3288eb4e-4ee6-4096-9d0e-5ae5f845250d.docx",
+      "处理时间": "2026-05-08T14:28:17.825814"
+    },
+    {
+      "目录ID": "2011",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "c0a79b78-7be1-4a76-8fff-848c8b6c66ee.docx",
+      "新文件名": "2011_c0a79b78-7be1-4a76-8fff-848c8b6c66ee.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 3,
+      "文件大小_MB": 39.3,
+      "备注": "Top5中找到3个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2011\\c0a79b78-7be1-4a76-8fff-848c8b6c66ee.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\2011_c0a79b78-7be1-4a76-8fff-848c8b6c66ee.docx",
+      "处理时间": "2026-05-08T14:28:16.592242"
+    },
+    {
+      "目录ID": "2230",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "00b6f7b2-5db2-417d-ad8c-706d07796fae.docx",
+      "新文件名": "2230_00b6f7b2-5db2-417d-ad8c-706d07796fae.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 2,
+      "文件大小_MB": 16.58,
+      "备注": "Top5中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2230\\00b6f7b2-5db2-417d-ad8c-706d07796fae.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\2230_00b6f7b2-5db2-417d-ad8c-706d07796fae.docx",
+      "处理时间": "2026-05-08T14:28:20.528441"
+    },
+    {
+      "目录ID": "2230",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "7f5c9e5d-590f-4901-865c-3763f3f31d2a.docx",
+      "新文件名": "2230_7f5c9e5d-590f-4901-865c-3763f3f31d2a.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "方案审核意见",
+      "匹配文件数": 2,
+      "文件大小_MB": 0.41,
+      "备注": "Top5中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2230\\7f5c9e5d-590f-4901-865c-3763f3f31d2a.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\2230_7f5c9e5d-590f-4901-865c-3763f3f31d2a.docx",
+      "处理时间": "2026-05-08T14:28:21.913457"
+    },
+    {
+      "目录ID": "2394",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "aa53a0c0-84ea-419e-8c26-d42d59bbcf4c.docx",
+      "新文件名": "2394_aa53a0c0-84ea-419e-8c26-d42d59bbcf4c.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "集团审核意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2394\\aa53a0c0-84ea-419e-8c26-d42d59bbcf4c.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\2394_aa53a0c0-84ea-419e-8c26-d42d59bbcf4c.docx",
+      "处理时间": "2026-05-08T14:28:21.994392"
+    },
+    {
+      "目录ID": "2450",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "852e2060-4c92-4b5a-8303-560b8092a34f.docx",
+      "新文件名": "2450_852e2060-4c92-4b5a-8303-560b8092a34f.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "公司审批意见",
+      "匹配文件数": 2,
+      "文件大小_MB": 0.02,
+      "备注": "其余文件中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2450\\852e2060-4c92-4b5a-8303-560b8092a34f.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\2450_852e2060-4c92-4b5a-8303-560b8092a34f.docx",
+      "处理时间": "2026-05-08T14:28:23.384989"
+    },
+    {
+      "目录ID": "2326",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "c73358a6-3fb3-4865-b9a0-1e6a3dc66096.docx",
+      "新文件名": "2326_c73358a6-3fb3-4865-b9a0-1e6a3dc66096.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 1,
+      "文件大小_MB": 29.24,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2326\\c73358a6-3fb3-4865-b9a0-1e6a3dc66096.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\2326_c73358a6-3fb3-4865-b9a0-1e6a3dc66096.docx",
+      "处理时间": "2026-05-08T14:28:24.382512"
+    },
+    {
+      "目录ID": "2460",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "5f2ed26f-94e7-4c9d-b7e2-dafb83b838f3.docx",
+      "新文件名": "2460_5f2ed26f-94e7-4c9d-b7e2-dafb83b838f3.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 1,
+      "文件大小_MB": 4.16,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2460\\5f2ed26f-94e7-4c9d-b7e2-dafb83b838f3.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\2460_5f2ed26f-94e7-4c9d-b7e2-dafb83b838f3.docx",
+      "处理时间": "2026-05-08T14:28:25.660123"
+    },
+    {
+      "目录ID": "2460",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "95482385-1244-4912-adeb-87a2cdceb67b.docx",
+      "新文件名": "2460_95482385-1244-4912-adeb-87a2cdceb67b.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:(公司|集团).{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.04,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2460\\95482385-1244-4912-adeb-87a2cdceb67b.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\2460_95482385-1244-4912-adeb-87a2cdceb67b.docx",
+      "处理时间": "2026-05-08T14:28:26.040871"
+    },
+    {
+      "目录ID": "2560",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "63bc600e-e455-49a0-b66e-1de0e2816365.docx",
+      "新文件名": "2560_63bc600e-e455-49a0-b66e-1de0e2816365.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:(公司|集团).{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.13,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2560\\63bc600e-e455-49a0-b66e-1de0e2816365.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\2560_63bc600e-e455-49a0-b66e-1de0e2816365.docx",
+      "处理时间": "2026-05-08T14:28:29.492848"
+    },
+    {
+      "目录ID": "2634",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "0c796ce6-2d35-4af7-896b-080b91f7be40.docx",
+      "新文件名": "2634_0c796ce6-2d35-4af7-896b-080b91f7be40.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "监理报告",
+      "匹配文件数": 2,
+      "文件大小_MB": 6.69,
+      "备注": "Top5中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2634\\0c796ce6-2d35-4af7-896b-080b91f7be40.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\2634_0c796ce6-2d35-4af7-896b-080b91f7be40.docx",
+      "处理时间": "2026-05-08T14:28:40.347670"
+    },
+    {
+      "目录ID": "2842",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "cdd8f19f-902e-46f8-a664-f467ff884fe8.docx",
+      "新文件名": "2842_cdd8f19f-902e-46f8-a664-f467ff884fe8.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:方案.{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.02,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2842\\cdd8f19f-902e-46f8-a664-f467ff884fe8.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\2842_cdd8f19f-902e-46f8-a664-f467ff884fe8.docx",
+      "处理时间": "2026-05-08T14:28:59.973065"
+    },
+    {
+      "目录ID": "2949",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "86a6cfc3-d360-4c5f-a459-849bf4b82e00.docx",
+      "新文件名": "2949_86a6cfc3-d360-4c5f-a459-849bf4b82e00.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "监理报告",
+      "匹配文件数": 2,
+      "文件大小_MB": 9.71,
+      "备注": "Top5中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2949\\86a6cfc3-d360-4c5f-a459-849bf4b82e00.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\2949_86a6cfc3-d360-4c5f-a459-849bf4b82e00.docx",
+      "处理时间": "2026-05-08T14:29:03.482890"
+    },
+    {
+      "目录ID": "3003",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "55ac6a29-a201-4c6f-8757-c9ba190bc62d.docx",
+      "新文件名": "3003_55ac6a29-a201-4c6f-8757-c9ba190bc62d.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:(公司|集团).{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\3003\\55ac6a29-a201-4c6f-8757-c9ba190bc62d.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\3003_55ac6a29-a201-4c6f-8757-c9ba190bc62d.docx",
+      "处理时间": "2026-05-08T14:29:03.597363"
+    },
+    {
+      "目录ID": "3033",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "4832d88e-ecfe-49c9-8e1d-adf4a1bf7024.docx",
+      "新文件名": "3033_4832d88e-ecfe-49c9-8e1d-adf4a1bf7024.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "公司审批意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\3033\\4832d88e-ecfe-49c9-8e1d-adf4a1bf7024.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\3033_4832d88e-ecfe-49c9-8e1d-adf4a1bf7024.docx",
+      "处理时间": "2026-05-08T14:29:03.671116"
+    },
+    {
+      "目录ID": "2854",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "54831051-5fbf-4426-b07d-6e3f81732817.docx",
+      "新文件名": "2854_54831051-5fbf-4426-b07d-6e3f81732817.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:方案.{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 4,
+      "文件大小_MB": 0.02,
+      "备注": "其余文件中找到4个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2854\\54831051-5fbf-4426-b07d-6e3f81732817.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\2854_54831051-5fbf-4426-b07d-6e3f81732817.docx",
+      "处理时间": "2026-05-08T14:29:06.127389"
+    },
+    {
+      "目录ID": "3114",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "fff55f1a-7b12-48e4-9a63-b93f594a623b.docx",
+      "新文件名": "3114_fff55f1a-7b12-48e4-9a63-b93f594a623b.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\3114\\fff55f1a-7b12-48e4-9a63-b93f594a623b.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\3114_fff55f1a-7b12-48e4-9a63-b93f594a623b.docx",
+      "处理时间": "2026-05-08T14:29:09.040052"
+    },
+    {
+      "目录ID": "3098",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "86ce393a-9466-483a-a4ad-af0b63bdd727.docx",
+      "新文件名": "3098_86ce393a-9466-483a-a4ad-af0b63bdd727.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "监理工程师意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 6.66,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\3098\\86ce393a-9466-483a-a4ad-af0b63bdd727.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\3098_86ce393a-9466-483a-a4ad-af0b63bdd727.docx",
+      "处理时间": "2026-05-08T14:29:11.344767"
+    },
+    {
+      "目录ID": "3162",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "7178af81-5074-461e-96e2-28b145c8aabc.docx",
+      "新文件名": "3162_7178af81-5074-461e-96e2-28b145c8aabc.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:(公司|集团).{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 7,
+      "文件大小_MB": 0.04,
+      "备注": "其余文件中找到7个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\3162\\7178af81-5074-461e-96e2-28b145c8aabc.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\3162_7178af81-5074-461e-96e2-28b145c8aabc.docx",
+      "处理时间": "2026-05-08T14:29:13.771839"
+    },
+    {
+      "目录ID": "3243",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "115f2f35-e472-4ac8-a275-82480eebff06.docx",
+      "新文件名": "3243_115f2f35-e472-4ac8-a275-82480eebff06.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:项目(部)?.{0,10}技术.{0,8}(负责人|主管).{0,8}(意见|审核)",
+      "匹配文件数": 1,
+      "文件大小_MB": 7.51,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\3243\\115f2f35-e472-4ac8-a275-82480eebff06.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\3243_115f2f35-e472-4ac8-a275-82480eebff06.docx",
+      "处理时间": "2026-05-08T14:29:17.159641"
+    },
+    {
+      "目录ID": "2596",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "fba526f7-d36a-4412-9f23-e8ffb4d9aaeb.docx",
+      "新文件名": "2596_fba526f7-d36a-4412-9f23-e8ffb4d9aaeb.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:专家.{0,15}(意见|建议|结论)",
+      "匹配文件数": 1,
+      "文件大小_MB": 9.49,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\2596\\fba526f7-d36a-4412-9f23-e8ffb4d9aaeb.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\2596_fba526f7-d36a-4412-9f23-e8ffb4d9aaeb.docx",
+      "处理时间": "2026-05-08T14:29:19.666173"
+    },
+    {
+      "目录ID": "3344",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "fd75910f-194e-4bef-9966-254ee85f11d1.docx",
+      "新文件名": "3344_fd75910f-194e-4bef-9966-254ee85f11d1.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "公司审查意见",
+      "匹配文件数": 2,
+      "文件大小_MB": 0.02,
+      "备注": "其余文件中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\3344\\fd75910f-194e-4bef-9966-254ee85f11d1.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\3344_fd75910f-194e-4bef-9966-254ee85f11d1.docx",
+      "处理时间": "2026-05-08T14:29:32.530896"
+    },
+    {
+      "目录ID": "3423",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "ccfb0457-3109-4c88-a140-58bdb595155e.docx",
+      "新文件名": "3423_ccfb0457-3109-4c88-a140-58bdb595155e.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "集团审核意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.07,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\3423\\ccfb0457-3109-4c88-a140-58bdb595155e.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\3423_ccfb0457-3109-4c88-a140-58bdb595155e.docx",
+      "处理时间": "2026-05-08T14:29:32.957967"
+    },
+    {
+      "目录ID": "3372",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "6c1f69a7-6ce8-4dfc-86c7-dacdf4c90332.docx",
+      "新文件名": "3372_6c1f69a7-6ce8-4dfc-86c7-dacdf4c90332.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "监理报告",
+      "匹配文件数": 3,
+      "文件大小_MB": 9.35,
+      "备注": "Top5中找到3个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\3372\\6c1f69a7-6ce8-4dfc-86c7-dacdf4c90332.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\3372_6c1f69a7-6ce8-4dfc-86c7-dacdf4c90332.docx",
+      "处理时间": "2026-05-08T14:29:34.917038"
+    },
+    {
+      "目录ID": "3489",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "4b7ecff2-6392-430b-8128-4c7772462365.docx",
+      "新文件名": "3489_4b7ecff2-6392-430b-8128-4c7772462365.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:(公司|集团).{0,10}施工方案.{0,10}(审核|审查|评审).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 2,
+      "文件大小_MB": 0.01,
+      "备注": "Top5中找到2个匹配,选择最新",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\3489\\4b7ecff2-6392-430b-8128-4c7772462365.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\3489_4b7ecff2-6392-430b-8128-4c7772462365.docx",
+      "处理时间": "2026-05-08T14:29:41.204484"
+    },
+    {
+      "目录ID": "3538",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "9014c392-6ceb-4fc5-a5b8-81cdd9f52793.docx",
+      "新文件名": "3538_9014c392-6ceb-4fc5-a5b8-81cdd9f52793.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 1,
+      "文件大小_MB": 23.34,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\3538\\9014c392-6ceb-4fc5-a5b8-81cdd9f52793.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\3538_9014c392-6ceb-4fc5-a5b8-81cdd9f52793.docx",
+      "处理时间": "2026-05-08T14:29:36.899296"
+    },
+    {
+      "目录ID": "4082",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "0d5d4d45-69e9-4ca1-b513-5bfafaefd11d.docx",
+      "新文件名": "4082_0d5d4d45-69e9-4ca1-b513-5bfafaefd11d.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:专家.{0,12}论证.{0,12}(意见|记录|结论|说明|建议|纪要|报告|审核表|论证表|结论表)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\4082\\0d5d4d45-69e9-4ca1-b513-5bfafaefd11d.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\4082_0d5d4d45-69e9-4ca1-b513-5bfafaefd11d.docx",
+      "处理时间": "2026-05-08T14:29:44.841417"
+    },
+    {
+      "目录ID": "3861",
+      "评审类型": "expert",
+      "阶段": "第2阶段",
+      "原文件名": "b8b8352b-977f-4830-b28d-a8235c77b81e.docx",
+      "新文件名": "3861_b8b8352b-977f-4830-b28d-a8235c77b81e.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:专家.{0,12}(技术.{0,4})?(审查|评审|论证)",
+      "匹配文件数": 1,
+      "文件大小_MB": 30.43,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\3861\\b8b8352b-977f-4830-b28d-a8235c77b81e.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\专家评审意见_记录\\3861_b8b8352b-977f-4830-b28d-a8235c77b81e.docx",
+      "处理时间": "2026-05-08T14:29:46.472271"
+    },
+    {
+      "目录ID": "5057",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "52ab633d-ddc7-4851-9565-b3e6be30c255.docx",
+      "新文件名": "5057_52ab633d-ddc7-4851-9565-b3e6be30c255.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "施工方案审查意见",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.22,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\5057\\52ab633d-ddc7-4851-9565-b3e6be30c255.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\5057_52ab633d-ddc7-4851-9565-b3e6be30c255.docx",
+      "处理时间": "2026-05-08T14:29:52.416618"
+    },
+    {
+      "目录ID": "5423",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "bab16352-6031-4195-a85b-39a880809cde.docx",
+      "新文件名": "5423_bab16352-6031-4195-a85b-39a880809cde.docx",
+      "状态": "成功",
+      "匹配来源": "其他文件",
+      "匹配关键词": "模式命中:(公司|集团).{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.02,
+      "备注": "其余文件中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\5423\\bab16352-6031-4195-a85b-39a880809cde.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\5423_bab16352-6031-4195-a85b-39a880809cde.docx",
+      "处理时间": "2026-05-08T14:29:52.819688"
+    },
+    {
+      "目录ID": "5629",
+      "评审类型": "company",
+      "阶段": "第2阶段",
+      "原文件名": "2eb1f5a0-2628-4bc8-a000-00c98d69ddd8.docx",
+      "新文件名": "5629_2eb1f5a0-2628-4bc8-a000-00c98d69ddd8.docx",
+      "状态": "成功",
+      "匹配来源": "Top5",
+      "匹配关键词": "模式命中:施工方案.{0,10}(评审|审核|审查).{0,10}(意见|说明|记录|纪要|报告)",
+      "匹配文件数": 1,
+      "文件大小_MB": 0.01,
+      "备注": "Top5中找到匹配文件",
+      "原路径": "E:\\提供的原始文件\\原始文件\\全部的原始文档\\未提取\\5629\\2eb1f5a0-2628-4bc8-a000-00c98d69ddd8.docx",
+      "目标路径": "E:\\提供的原始文件\\原始文件\\PDF分类结果_服务器MinerU版\\公司集团评审意见说明\\5629_2eb1f5a0-2628-4bc8-a000-00c98d69ddd8.docx",
+      "处理时间": "2026-05-08T14:29:52.896714"
+    }
+  ],
+  "stats": {
+    "total_directories": 643,
+    "expert_success_count": 120,
+    "company_success_count": 151,
+    "expert_phase2_success_count": 0,
+    "company_phase2_success_count": 0,
+    "expert_top5_match_count": 0,
+    "company_top5_match_count": 0,
+    "expert_others_match_count": 0,
+    "company_others_match_count": 0,
+    "expert_no_match_count": 0,
+    "company_no_match_count": 0
+  },
+  "phase": 2,
+  "total_directories": 643,
+  "last_update": "2026-05-08T14:29:55.981151"
+}

+ 771 - 0
src/app/scripts/md2excel_extractor_company.py

@@ -0,0 +1,771 @@
+#!/usr/bin/env python3
+"""
+md2excel_company: Markdown 公司集团评审意见文档批量提取工具
+
+功能说明:
+    遍历文件夹中的 Markdown 文档,使用大模型语义理解提取项目名称、
+    方案名称和公司集团评审意见,写入 Excel 汇总表。
+
+用法:
+    # 方式1:使用默认路径(无需参数)
+    python md2excel_extractor_company.py
+    
+    # 方式2:自定义路径
+    python md2excel_extractor_company.py <源文件夹路径> <输出Excel路径>
+
+示例:
+    python md2excel_extractor_company.py D:/公司集团评审意见/temp D:/汇总表.xlsx
+
+目录结构要求:
+    支持两种结构,自动识别:
+    
+    结构1(旧):
+    源文件夹/
+    ├── 子文件夹1/
+    │   └── auto/
+    │       └── xxx.md
+    ├── 子文件夹2/
+    │   └── auto/
+    │       └── yyy.md
+    └── ...
+    
+    结构2(新):
+    源文件夹/
+    ├── 子文件夹1/
+    │   └── xxx.md
+    ├── 子文件夹2/
+    │   └── subfolder/
+    │       └── yyy.md
+    └── ...
+"""
+
+import os
+import sys
+import json
+import time
+import re
+import requests
+from pathlib import Path
+from typing import List, Dict, Optional, Any
+from dataclasses import dataclass
+
+from openpyxl import Workbook, load_workbook
+from openpyxl.styles import Font, Alignment, Border, Side, PatternFill
+
+
+# ==================== 配置区域 ====================
+
+
+# ==================== LLM API 配置 ====================
+# 本地部署的大模型 API 配置
+LLM_API_URL = "http://183.220.37.46:25423/v1/chat/completions"
+LLM_API_KEY = "sk_prod_SELVoIV1d3gku28koH_ONg8L_B2cQis__71f55615"
+LLM_MODEL = "/model/Qwen3.5-122B-A10B"
+LLM_TEMPERATURE = 0.0      # 信息提取任务建议用 0,确保结果稳定可复现
+LLM_MAX_TOKENS = 8192      # 公司集团评审意见说明可能很长,建议设为 8192 或更大(原 512 可能不够)
+LLM_TIMEOUT = 120          # API 请求超时时间(秒)
+
+# Excel 列配置
+EXCEL_HEADERS = ["文件名称", "项目名称", "方案名称", "公司集团评审意见说明"]
+
+# 列宽配置
+COLUMN_WIDTHS = {
+    'A': 45,  # 文件名称
+    'B': 50,  # 项目名称
+    'C': 55,  # 方案名称
+    'D': 120, # 公司集团评审意见说明
+}
+
+# 数据行高度
+ROW_HEIGHT = 180
+
+# API 请求间隔(秒)
+API_DELAY = 0.5
+
+# 单文件最大读取字符数(控制 token 消耗)
+MAX_CONTENT_LENGTH = 12000
+
+# 默认路径配置(当不传入命令行参数时使用)
+DEFAULT_SOURCE_DIR = r"E:\提供的原始文件\原始文件\PDF分类结果_服务器MinerU版\公司集团评审意见说明\公司集团评审意见5.11ouput"
+DEFAULT_OUTPUT_FILE = r"E:\提供的原始文件\原始文件\PDF分类结果_服务器MinerU版\公司集团评审意见说明\公司集团评审意见汇总表5.11.xlsx"
+
+
+# ==================== 数据模型 ====================
+
+@dataclass
+class ExtractedInfo:
+    """提取的信息结构"""
+    file_name: str
+    project_name: str
+    plan_name: str
+    company_opinion: str
+
+
+# ==================== 大模型调用实现 ====================
+
+def call_llm_api(prompt: str) -> str:
+    """
+    调用本地部署的大模型 API 进行文本理解和信息提取
+    
+    API 端点: http://183.220.37.46:25423/v1/chat/completions
+    模型: /model/Qwen3.5-122B-A10B
+    
+    Args:
+        prompt: 完整的提示词文本(已包含待分析的文档内容)
+    
+    Returns:
+        大模型返回的文本结果(应为 JSON 格式字符串)
+    
+    Raises:
+        requests.RequestException: HTTP 请求失败
+        json.JSONDecodeError: 响应 JSON 解析失败
+        KeyError: 响应格式不符合预期
+    """
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {LLM_API_KEY}"
+    }
+    
+    payload = {
+        "model": LLM_MODEL,
+        "messages": [{"role": "user", "content": prompt}],
+        "temperature": LLM_TEMPERATURE,
+        "max_tokens": LLM_MAX_TOKENS
+    }
+    
+    try:
+        response = requests.post(
+            LLM_API_URL,
+            headers=headers,
+            json=payload,
+            timeout=LLM_TIMEOUT
+        )
+        response.raise_for_status()
+        
+        result = response.json()
+        
+        # 解析 OpenAI 兼容格式的响应
+        # 格式: {"choices": [{"message": {"content": "..."}}]}
+        if "choices" not in result or not result["choices"]:
+            raise KeyError(f"响应中未找到 'choices' 字段: {result.keys()}")
+        
+        message = result["choices"][0].get("message", {})
+        content = message.get("content", "").strip()
+        
+        if not content:
+            raise ValueError("模型返回内容为空")
+        
+        return content
+    
+    except requests.exceptions.ConnectionError as e:
+        raise ConnectionError(
+            f"无法连接到本地 LLM 服务 ({LLM_API_URL}),请确认服务已启动。\n"
+            f"原始错误: {e}"
+        )
+    except requests.exceptions.Timeout:
+        raise TimeoutError(
+            f"请求本地 LLM 服务超时 (>{LLM_TIMEOUT}秒),请检查模型是否过载或增大 LLM_TIMEOUT 配置。"
+        )
+    except requests.exceptions.HTTPError as e:
+        raise RuntimeError(
+            f"LLM API 返回 HTTP 错误: {e.response.status_code}\n"
+            f"响应内容: {e.response.text[:500]}"
+        )
+
+
+# ==================== 提示词模板 ====================
+
+def build_extraction_prompt(content: str) -> str:
+    """
+    构建用于大模型信息提取的详细提示词
+    
+    此提示词经过精心设计,包含:
+    - 角色设定: 让模型理解其作为文档分析专家的身份
+    - 任务说明: 明确需要提取的三个核心字段
+    - 提取规则: 详细的字段定位和推断规则
+    - 输出格式: 严格的 JSON 格式要求
+    - 容错处理: 信息缺失时的标注规范
+    - 示例说明: 帮助模型理解期望的输出形式
+    
+    Args:
+        content: Markdown 文档的原始内容
+    
+    Returns:
+        完整的提示词文本
+    """
+    
+    # 截取内容,避免超出模型上下文长度
+    truncated_content = content[:MAX_CONTENT_LENGTH]
+    if len(content) > MAX_CONTENT_LENGTH:
+        truncated_content += "\n\n... [文档内容已截断,剩余部分省略]"
+    
+    prompt = f"""你是一位资深的工程文档分析专家,擅长从施工方案评审意见文档中提取结构化信息。
+
+## 任务说明
+
+请仔细阅读以下 Markdown 格式的施工方案公司集团评审意见文档,从中提取三个关键字段的信息。
+
+## 提取字段及规则
+
+### 1. 项目名称
+**定义**: 该施工方案所对应的工程项目名称。
+
+**提取规则**(按优先级排序):
+- 优先从文档中的表格字段提取,查找包含以下关键词的单元格:
+  * "项目名称"
+  * "工程名称"
+  * "工程全称"
+  * "建设项目名称"
+  * "标段名称"
+  
+- 如果表格中没有明确字段,从文档标题、页眉或正文开头部分语义推断。
+  通常项目名称会出现在文档的显著位置,格式如:
+  * "XX高速公路XX标段"
+  * "XX大桥工程"
+  * "XX隧道工程"
+  * "XX合同段"
+
+- 如果确实无法确定,标注为"未明确"。
+
+### 2. 方案名称
+**定义**: 该文档所涉及的专项施工方案名称。
+
+**提取规则**(按优先级排序):
+- 优先从文档中的表格字段提取,查找包含以下关键词的单元格:
+  * "方案名称"
+  * "专项方案名称"
+  * "危险性较大分项工程名称"
+  * "分部分项工程名称"
+  * "施工方案名称"
+  
+- 如果表格中没有明确字段,从文档标题中推断。
+  方案名称通常包含以下关键词:
+  * "施工方案"
+  * "专项方案"
+  * "施工组织设计"
+  * "安全专项方案"
+  * "技术方案"
+
+- 注意区分"项目名称"和"方案名称":
+  * 项目名称:宏观的工程名称(如"XX高速公路")
+  * 方案名称:具体的施工方案(如"XX大桥桩基施工方案")
+
+- 如果确实无法确定,标注为"未明确"。
+
+### 3. 公司集团评审意见说明
+**定义**: 整合后的公司集团评审意见及修改回复内容。
+
+**提取规则**(按优先级排序):
+- 从以下命名的章节或表格中提取:
+  * "公司集团评审意见"
+  * "公司集团审查意见"
+  * "公司集团审核意见"
+  * "公司集团论证意见"
+  * "公司/集团意见及回复"
+  * "审查意见及修改回复"
+  * "审核意见及修改情况"
+  * "意见与建议"
+  * "公司集团评审会议纪要"
+  * "公司集团评审报告"
+
+- 内容整合要求:
+  * 将"公司集团评审意见/审查意见/审核意见"与"修改回复/修改情况/回复说明"进行配对整合
+  * 保留原始的公司集团评审意见原文
+  * 保留对应的修改回复或整改措施
+  * 如果有多位评审人员的意见,按顺序列出
+  * 如果公司集团评审意见与回复分散在文档不同位置,需要将它们关联起来
+
+- 格式要求:
+  * 使用清晰的编号列出每条公司集团评审意见及其回复
+  * 保留关键的专业术语和数据
+  * 如果原文有表格形式,转换为文本描述
+  * 每条意见格式建议:"意见X: [公司集团评审原文意见] -> 回复: [施工单位回复内容]"
+
+- 如果确实无法提取到公司集团评审意见内容,标注为"未明确"。
+
+## 输出格式要求
+
+必须以严格的 JSON 格式返回,不要包含任何其他解释文字:
+
+```json
+{{
+  "项目名称": "提取到的项目名称或'未明确'",
+  "方案名称": "提取到的方案名称或'未明确'",
+  "公司集团评审意见说明": "整合后的公司集团评审意见与回复内容,或'未明确'"
+}}
+```
+
+## 注意事项
+
+1. **语义理解优先**: 不要依赖固定的正则表达式,而是通过理解文档内容的语义来提取信息。
+2. **容错处理**: 即使文档格式不标准、表格缺失或字段名称不同,也要尝试从上下文中推断。
+3. **信息整合**: 对于分散在文档各处的公司集团评审意见和回复,需要整合成完整的记录。
+4. **不要编造**: 如果某项信息确实无法从文档中确定,必须标注为"未明确",严禁编造或猜测。
+5. **保持简洁**: 公司集团评审意见说明的内容可以适当精简,但要保留核心观点和关键数据。
+
+## 待分析文档
+
+```markdown
+{truncated_content}
+```
+
+请直接返回 JSON 格式的提取结果:"""
+    
+    return prompt
+
+
+# ==================== 文件处理 ====================
+
+def read_md_files(root_dir: str) -> List[Dict[str, str]]:
+    """
+    遍历文件夹,读取所有 md 文件内容
+    
+    支持两种目录结构:
+    结构1(旧):
+        root_dir/
+        ├── folder_1/
+        │   └── auto/
+        │       └── xxx.md
+        └── ...
+    
+    结构2(新):
+        root_dir/
+        ├── folder_1/
+        │   └── xxx.md
+        ├── folder_2/
+        │   └── subfolder/
+        │       └── yyy.md
+        └── ...
+    
+    搜索策略:
+        1. 优先在每个子文件夹的 auto/ 子目录中查找(兼容旧结构)
+        2. 如果没有 auto/,则在该子文件夹及其所有嵌套子文件夹中递归查找
+        3. 每个顶层子文件夹只取第一个找到的 md 文件
+    
+    Args:
+        root_dir: 源文件夹根目录路径
+    
+    Returns:
+        包含文件信息的字典列表,每个字典包含:
+        - file_name: 子文件夹名称
+        - content: md 文件内容
+    """
+    md_contents = []
+    root_path = Path(root_dir)
+    
+    if not root_path.exists():
+        raise FileNotFoundError(f"源文件夹不存在: {root_dir}")
+    
+    # 遍历所有子文件夹
+    for folder_path in sorted(root_path.iterdir()):
+        if not folder_path.is_dir():
+            continue
+        
+        md_file = None
+        
+        # 策略1: 优先查找 auto/ 子目录(兼容旧结构)
+        auto_dir = folder_path / "auto"
+        if auto_dir.exists() and auto_dir.is_dir():
+            md_files = list(auto_dir.glob("*.md"))
+            if md_files:
+                md_file = md_files[0]
+                print(f"  [已读取] {folder_path.name}/auto -> {md_file.name}")
+        
+        # 策略2: 如果没有 auto/ 或 auto/ 中没有 md,则在当前子文件夹中递归查找
+        if md_file is None:
+            # 使用 rglob 递归查找所有 .md 文件
+            all_md_files = list(folder_path.rglob("*.md"))
+            if all_md_files:
+                md_file = all_md_files[0]
+                relative_path = md_file.relative_to(folder_path)
+                print(f"  [已读取] {folder_path.name}/{relative_path} -> {md_file.name}")
+        
+        # 如果还是没找到,跳过
+        if md_file is None:
+            print(f"  [跳过] 未找到 md 文件: {folder_path.name}")
+            continue
+        
+        # 读取 md 文件
+        try:
+            content = md_file.read_text(encoding="utf-8")
+            md_contents.append({
+                "file_name": folder_path.name,
+                "content": content,
+                "file_path": str(md_file)
+            })
+        except Exception as e:
+            print(f"  [错误] 读取文件失败 {md_file}: {e}")
+            continue
+    
+    return md_contents
+
+
+def parse_llm_response(response_text: str) -> Dict[str, str]:
+    """
+    解析大模型返回的 JSON 响应
+
+    Args:
+        response_text: 大模型返回的原始文本
+
+    Returns:
+        解析后的字典,包含提取的字段
+    """
+    placeholder_keywords = ["提取到的", "或'", "...", "示例", "Extract", "Template"]
+
+    def is_valid(data: dict) -> bool:
+        pn = data.get("项目名称", "")
+        sn = data.get("方案名称", "")
+        return not any(kw in str(pn) or kw in str(sn) for kw in placeholder_keywords)
+
+    # 策略1: 提取所有 ```json ... ``` 代码块,取最后一个能成功解析的
+    json_blocks = re.findall(r'```json\s*(.*?)\s*```', response_text, re.DOTALL)
+    for block_text in reversed(json_blocks):
+        try:
+            data = json.loads(block_text)
+            if isinstance(data, dict) and "项目名称" in data and is_valid(data):
+                return data
+        except json.JSONDecodeError:
+            continue
+
+    # 策略2: 用正则匹配包含三个目标键的 JSON 对象
+    pattern = (
+        r'\{'
+        r'\s*"项目名称"\s*:\s*"((?:[^"\\]|\\.)*)"\s*,'
+        r'\s*"方案名称"\s*:\s*"((?:[^"\\]|\\.)*)"\s*,'
+        r'\s*"公司集团评审意见说明"\s*:\s*"((?:[^"\\]|\\.)*)"'
+        r'\s*\}'
+    )
+    matches = re.findall(pattern, response_text, re.DOTALL)
+    if matches:
+        pn, sn, eo = matches[-1]
+        return {
+            "项目名称": pn.strip(),
+            "方案名称": sn.strip(),
+            "公司集团评审意见说明": eo.strip()
+        }
+
+    # 策略3: 大括号深度匹配(兜底)
+    text = re.sub(r'<think>.*?</think>', '', response_text, flags=re.DOTALL)
+    json_objects = _extract_json_by_brace_company(text)
+
+    for obj in json_objects:
+        try:
+            data = json.loads(obj)
+            if isinstance(data, dict) and "项目名称" in data and is_valid(data):
+                return data
+        except json.JSONDecodeError:
+            continue
+
+    print(f"  [警告] 无法解析 JSON 响应,使用原始文本")
+    return {
+        "项目名称": "解析失败",
+        "方案名称": "解析失败",
+        "公司集团评审意见说明": response_text
+    }
+
+
+def _extract_json_by_brace_company(text: str) -> list:
+    """通过大括号深度匹配提取 JSON 对象列表。"""
+    brace_depth = 0
+    json_objects = []
+    start_idx = -1
+
+    for i, char in enumerate(text):
+        if char == '{':
+            if brace_depth == 0:
+                start_idx = i
+            brace_depth += 1
+        elif char == '}':
+            brace_depth -= 1
+            if brace_depth == 0 and start_idx != -1:
+                json_objects.append(text[start_idx:i+1])
+                start_idx = -1
+
+    return json_objects
+
+
+def extract_info_with_llm(content: str) -> Dict[str, str]:
+    """
+    使用大模型从文档中提取信息
+    
+    Args:
+        content: Markdown 文档内容
+    
+    Returns:
+        包含提取字段的字典
+    """
+    prompt = build_extraction_prompt(content)
+    
+    try:
+        response_text = call_llm_api(prompt)
+        extracted = parse_llm_response(response_text)
+        
+        # 确保所有必要字段存在
+        return {
+            "项目名称": extracted.get("项目名称", "未明确").strip(),
+            "方案名称": extracted.get("方案名称", "未明确").strip(),
+            "公司集团评审意见说明": extracted.get("公司集团评审意见说明", "未明确").strip()
+        }
+    
+    except Exception as e:
+        print(f"  [错误] LLM 提取失败: {e}")
+        return {
+            "项目名称": f"提取失败: {str(e)[:50]}",
+            "方案名称": f"提取失败: {str(e)[:50]}",
+            "公司集团评审意见说明": f"提取失败: {str(e)}"
+        }
+
+
+# ==================== Excel 生成 ====================
+
+def _init_excel_styles(ws):
+    """初始化 Excel 表头和列宽样式"""
+    # 设置表头样式
+    header_fill = PatternFill(
+        start_color="4472C4",
+        end_color="4472C4",
+        fill_type="solid"
+    )
+    header_font = Font(color="FFFFFF", bold=True, size=12)
+    header_align = Alignment(horizontal="center", vertical="center", wrap_text=True)
+    
+    for col_num, header in enumerate(EXCEL_HEADERS, 1):
+        cell = ws.cell(row=1, column=col_num)
+        cell.value = header
+        cell.fill = header_fill
+        cell.font = header_font
+        cell.alignment = header_align
+    
+    # 设置列宽
+    for col, width in COLUMN_WIDTHS.items():
+        ws.column_dimensions[col].width = width
+    
+    # 冻结首行
+    ws.freeze_panes = 'A2'
+
+
+def _apply_row_style(ws, row_num: int):
+    """为指定行应用数据样式(边框、对齐、行高)"""
+    thin_border = Border(
+        left=Side(style='thin'),
+        right=Side(style='thin'),
+        top=Side(style='thin'),
+        bottom=Side(style='thin')
+    )
+    
+    for col in range(1, len(EXCEL_HEADERS) + 1):
+        cell = ws.cell(row=row_num, column=col)
+        cell.border = thin_border
+        cell.alignment = Alignment(vertical="top", wrap_text=True)
+    
+    ws.row_dimensions[row_num].height = ROW_HEIGHT
+
+
+def append_to_excel(row_data: Dict[str, str], output_file: str):
+    """
+    追加单条数据到 Excel 文件
+    
+    如果文件不存在则创建新文件(含表头),如果存在则在末尾追加。
+    每次追加后立即保存,确保中断不丢失已处理数据。
+    
+    Args:
+        row_data: 单条数据字典
+        output_file: 输出 Excel 文件路径
+    """
+    output_path = Path(output_file)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    
+    if output_path.exists():
+        # 文件已存在,加载并追加
+        wb = load_workbook(output_file)
+        ws = wb.active
+        next_row = ws.max_row + 1
+    else:
+        # 文件不存在,创建新文件
+        wb = Workbook()
+        ws = wb.active
+        ws.title = "公司集团评审意见汇总"
+        _init_excel_styles(ws)
+        next_row = 2
+    
+    # 写入数据
+    ws.append([
+        row_data.get("文件名称", ""),
+        row_data.get("项目名称", ""),
+        row_data.get("方案名称", ""),
+        row_data.get("公司集团评审意见说明", "")
+    ])
+    
+    # 应用样式到新行
+    _apply_row_style(ws, next_row)
+    
+    # 立即保存
+    wb.save(output_file)
+
+
+def create_excel(data_rows: List[Dict[str, str]], output_file: str):
+    """
+    创建格式化的 Excel 文件(全量写入,用于最终汇总)
+    
+    包含以下样式:
+    - 蓝色表头背景 + 白色粗体文字
+    - 所有单元格细边框
+    - 自动换行
+    - 首行冻结
+    - 指定列宽和行高
+    
+    Args:
+        data_rows: 数据行列表,每行是一个字典
+        output_file: 输出 Excel 文件路径
+    """
+    output_path = Path(output_file)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    
+    wb = Workbook()
+    ws = wb.active
+    ws.title = "公司集团评审意见汇总"
+    
+    # 初始化样式
+    _init_excel_styles(ws)
+    
+    # 添加数据行
+    for row_data in data_rows:
+        ws.append([
+            row_data.get("文件名称", ""),
+            row_data.get("项目名称", ""),
+            row_data.get("方案名称", ""),
+            row_data.get("公司集团评审意见说明", "")
+        ])
+    
+    # 应用样式到所有数据行
+    for row_num in range(2, ws.max_row + 1):
+        _apply_row_style(ws, row_num)
+    
+    # 保存文件
+    wb.save(output_file)
+    
+    print(f"\n✅ 已成功保存到: {output_file}")
+    print(f"📊 共写入 {len(data_rows)} 条记录")
+
+
+# ==================== 主流程 ====================
+
+def main():
+    """主函数"""
+    # 解析命令行参数
+    if len(sys.argv) >= 3:
+        # 使用命令行传入的参数
+        root_dir = sys.argv[1]
+        output_file = sys.argv[2]
+        print("✅ 使用命令行传入的路径")
+    else:
+        # 使用默认路径
+        root_dir = DEFAULT_SOURCE_DIR
+        output_file = DEFAULT_OUTPUT_FILE
+        print("⚠️  未提供命令行参数,使用默认路径")
+        print(f"    如需自定义路径,请运行: python md2excel_extractor_company.py <源文件夹> <输出Excel>")
+    
+    # 验证源目录
+    if not os.path.isdir(root_dir):
+        print(f"错误: 源文件夹不存在: {root_dir}")
+        sys.exit(1)
+    
+    print("=" * 70)
+    print("Markdown 公司集团评审意见文档批量提取工具")
+    print("=" * 70)
+    print(f"\n📁 源文件夹: {root_dir}")
+    print(f"📄 输出文件: {output_file}")
+    
+    # 读取 md 文件
+    print(f"\n【步骤 1/3】扫描并读取 Markdown 文件...")
+    try:
+        md_contents = read_md_files(root_dir)
+    except Exception as e:
+        print(f"错误: 读取文件失败: {e}")
+        sys.exit(1)
+    
+    if not md_contents:
+        print("未找到任何有效的 md 文件,请检查目录结构")
+        sys.exit(1)
+    
+    print(f"\n✅ 共找到 {len(md_contents)} 个有效文档")
+    
+    # 使用大模型提取信息
+    print(f"\n【步骤 2/3】使用大模型提取信息...")
+    print(f"  LLM 端点: {LLM_API_URL}")
+    print(f"  模型: {LLM_MODEL}")
+    print(f"  Temperature: {LLM_TEMPERATURE} | Max tokens: {LLM_MAX_TOKENS}")
+    print(f"  💡 每处理完一个文件会立即追加写入 Excel,支持断点续传\n")
+    
+    # 检查是否已有进度(Excel 文件已存在)
+    output_path = Path(output_file)
+    processed_files = set()
+    if output_path.exists():
+        try:
+            wb = load_workbook(output_file)
+            ws = wb.active
+            # 读取已处理的文件名称(第1列,从第2行开始)
+            for row in ws.iter_rows(min_row=2, values_only=True):
+                if row and row[0]:
+                    processed_files.add(row[0])
+            print(f"  📋 检测到已有进度,已处理 {len(processed_files)} 个文件,将跳过这些文件")
+        except Exception:
+            pass
+    
+    data_rows = []
+    total = len(md_contents)
+    processed_count = 0
+    
+    for i, item in enumerate(md_contents, 1):
+        file_name = item['file_name']
+        
+        # 跳过已处理的文件
+        if file_name in processed_files:
+            print(f"[{i}/{total}] ⏭️  跳过已处理: {file_name}")
+            continue
+        
+        print(f"[{i}/{total}] 正在处理: {file_name}")
+        
+        try:
+            extracted = extract_info_with_llm(item['content'])
+            row_data = {
+                "文件名称": file_name,
+                "项目名称": extracted["项目名称"],
+                "方案名称": extracted["方案名称"],
+                "公司集团评审意见说明": extracted["公司集团评审意见说明"]
+            }
+            data_rows.append(row_data)
+            
+            # 立即追加写入 Excel
+            append_to_excel(row_data, output_file)
+            processed_count += 1
+            print(f"  ✅ 提取完成并已写入 Excel")
+            
+        except Exception as e:
+            print(f"  ❌ 处理失败: {e}")
+            row_data = {
+                "文件名称": file_name,
+                "项目名称": "处理异常",
+                "方案名称": "处理异常",
+                "公司集团评审意见说明": f"处理异常: {str(e)}"
+            }
+            data_rows.append(row_data)
+            append_to_excel(row_data, output_file)
+            processed_count += 1
+        
+        # API 调用间隔,避免请求过快
+        if i < total:
+            time.sleep(API_DELAY)
+    
+    # 生成最终汇总(可选:重新整理整个 Excel 确保格式一致)
+    print(f"\n【步骤 3/3】生成 Excel 汇总表...")
+    print(f"  本次新处理: {processed_count} 个文件")
+    print(f"  总计写入: {len(processed_files) + processed_count} 个文件")
+    
+    print("\n" + "=" * 70)
+    print("🎉 处理完成!")
+    print("=" * 70)
+
+
+if __name__ == "__main__":
+    main()

+ 159 - 88
src/app/scripts/md2excel_extractor.py → src/app/scripts/md2excel_extractor_expert.py

@@ -7,12 +7,19 @@ md2excel: Markdown 专家意见文档批量提取工具
     方案名称和专家意见,写入 Excel 汇总表。
 
 用法:
+    # 方式1:使用默认路径(无需参数)
+    python md2excel_extractor.py
+    
+    # 方式2:自定义路径
     python md2excel_extractor.py <源文件夹路径> <输出Excel路径>
 
 示例:
     python md2excel_extractor.py D:/专家意见/temp D:/汇总表.xlsx
 
 目录结构要求:
+    支持两种结构,自动识别:
+    
+    结构1(旧):
     源文件夹/
     ├── 子文件夹1/
     │   └── auto/
@@ -21,6 +28,15 @@ md2excel: Markdown 专家意见文档批量提取工具
     │   └── auto/
     │       └── yyy.md
     └── ...
+    
+    结构2(新):
+    源文件夹/
+    ├── 子文件夹1/
+    │   └── xxx.md
+    ├── 子文件夹2/
+    │   └── subfolder/
+    │       └── yyy.md
+    └── ...
 """
 
 import os
@@ -39,6 +55,16 @@ from openpyxl.styles import Font, Alignment, Border, Side, PatternFill
 
 # ==================== 配置区域 ====================
 
+
+# ==================== LLM API 配置 ====================
+# 本地部署的大模型 API 配置
+LLM_API_URL = "http://183.220.37.46:25423/v1/chat/completions"
+LLM_API_KEY = "sk_prod_SELVoIV1d3gku28koH_ONg8L_B2cQis__71f55615"
+LLM_MODEL = "/model/Qwen3.5-122B-A10B"
+LLM_TEMPERATURE = 0.0      # 信息提取任务建议用 0,确保结果稳定可复现
+LLM_MAX_TOKENS = 8192      # 专家意见回复可能很长,建议设为 8192 或更大(原 512 可能不够)
+LLM_TIMEOUT = 120          # API 请求超时时间(秒)
+
 # Excel 列配置
 EXCEL_HEADERS = ["文件名称", "项目名称", "方案名称", "专项方案专家评审意见回复表"]
 
@@ -59,14 +85,9 @@ API_DELAY = 0.5
 # 单文件最大读取字符数(控制 token 消耗)
 MAX_CONTENT_LENGTH = 12000
 
-# ==================== LLM API 配置 ====================
-# 本地部署的大模型 API 配置
-LLM_API_URL = "http://localhost:25423/v1/chat/completions"
-LLM_API_KEY = "sk_prod_SELVoIV1d3gku28koH_ONg8L_B2cQis__71f55615"
-LLM_MODEL = "/model/Qwen3.5-122B-A10B"
-LLM_TEMPERATURE = 0.0      # 信息提取任务建议用 0,确保结果稳定可复现
-LLM_MAX_TOKENS = 8192      # 专家意见回复可能很长,建议设为 8192 或更大(原 512 可能不够)
-LLM_TIMEOUT = 120          # API 请求超时时间(秒)
+# 默认路径配置(当不传入命令行参数时使用)
+DEFAULT_SOURCE_DIR = r"E:\提供的原始文件\原始文件\PDF分类结果_服务器MinerU版\专家评审意见_记录\专家意见5.11output"
+DEFAULT_OUTPUT_FILE = r"E:\提供的原始文件\原始文件\PDF分类结果_服务器MinerU版\专家评审意见_记录\专家意见汇总表5.11.xlsx"
 
 
 # ==================== 数据模型 ====================
@@ -86,7 +107,7 @@ def call_llm_api(prompt: str) -> str:
     """
     调用本地部署的大模型 API 进行文本理解和信息提取
     
-    API 端点: http://localhost:25423/v1/chat/completions
+    API 端点: http://183.220.37.46:25423/v1/chat/completions
     模型: /model/Qwen3.5-122B-A10B
     
     Args:
@@ -291,22 +312,117 @@ def build_extraction_prompt(content: str) -> str:
     return prompt
 
 
+def parse_llm_response(response_text: str) -> Dict[str, str]:
+    """
+    解析大模型返回的 JSON 响应。
+
+    Qwen3 模型的输出结构:
+    1. Thinking Process 思考过程(可能含 JSON 示例)
+    2. 正式回答:```json 代码块
+
+    策略:从后往前遍历 ```json 代码块,返回第一个有效且非占位符的。
+    """
+    placeholder_keywords = ["提取到的", "或'", "...", "示例", "Extract", "Template"]
+
+    def is_valid(data: dict) -> bool:
+        pn = data.get("项目名称", "")
+        sn = data.get("方案名称", "")
+        return not any(kw in str(pn) or kw in str(sn) for kw in placeholder_keywords)
+
+    # 策略1: 提取所有 ```json ... ``` 代码块,从后往前找第一个有效的
+    json_blocks = re.findall(r'```json\s*(.*?)\s*```', response_text, re.DOTALL)
+    for block_text in reversed(json_blocks):
+        try:
+            data = json.loads(block_text)
+            if isinstance(data, dict) and "项目名称" in data and is_valid(data):
+                return data
+        except json.JSONDecodeError:
+            continue
+
+    # 策略2: 用正则直接匹配包含三个目标键的 JSON 对象
+    pattern = (
+        r'\{'
+        r'\s*"项目名称"\s*:\s*"((?:[^"\\]|\\.)*)"\s*,'
+        r'\s*"方案名称"\s*:\s*"((?:[^"\\]|\\.)*)"\s*,'
+        r'\s*"专项方案专家评审意见回复表"\s*:\s*"((?:[^"\\]|\\.)*)"'
+        r'\s*\}'
+    )
+    matches = re.findall(pattern, response_text, re.DOTALL)
+    for pn, sn, eo in reversed(matches):
+        if not any(kw in pn or kw in sn for kw in placeholder_keywords):
+            return {
+                "项目名称": pn.strip(),
+                "方案名称": sn.strip(),
+                "专项方案专家评审意见回复表": eo.strip()
+            }
+
+    # 策略3: 大括号深度匹配(兜底)
+    text = re.sub(r'<think>.*?</think>', '', response_text, flags=re.DOTALL)
+    json_objects = _extract_json_by_brace(text)
+    for obj in json_objects:
+        try:
+            data = json.loads(obj)
+            if isinstance(data, dict) and "项目名称" in data and is_valid(data):
+                return data
+        except json.JSONDecodeError:
+            continue
+
+    print(f"  [警告] 无法解析 JSON 响应,使用原始文本")
+    return {
+        "项目名称": "解析失败",
+        "方案名称": "解析失败",
+        "专项方案专家评审意见回复表": response_text
+    }
+
+
+def _extract_json_by_brace(text: str) -> list:
+    """通过大括号深度匹配提取 JSON 对象列表。"""
+    brace_depth = 0
+    json_objects = []
+    start_idx = -1
+
+    for i, char in enumerate(text):
+        if char == '{':
+            if brace_depth == 0:
+                start_idx = i
+            brace_depth += 1
+        elif char == '}':
+            brace_depth -= 1
+            if brace_depth == 0 and start_idx != -1:
+                json_objects.append(text[start_idx:i+1])
+                start_idx = -1
+
+    return json_objects
+
+
 # ==================== 文件处理 ====================
 
 def read_md_files(root_dir: str) -> List[Dict[str, str]]:
     """
     遍历文件夹,读取所有 md 文件内容
     
-    目录结构要求:
+    支持两种目录结构:
+    结构1(旧):
         root_dir/
         ├── folder_1/
         │   └── auto/
         │       └── xxx.md
+        └── ...
+    
+    结构2(新):
+        root_dir/
+        ├── folder_1/
+        │   └── xxx.md
         ├── folder_2/
-        │   └── auto/
+        │   └── subfolder/
         │       └── yyy.md
         └── ...
     
+    搜索策略:
+        1. 优先在每个子文件夹的 auto/ 子目录中查找(兼容旧结构)
+        2. 如果没有 auto/,则在该子文件夹及其所有嵌套子文件夹中递归查找
+        3. 每个顶层子文件夹只取第一个找到的 md 文件
+    
     Args:
         root_dir: 源文件夹根目录路径
     
@@ -326,20 +442,31 @@ def read_md_files(root_dir: str) -> List[Dict[str, str]]:
         if not folder_path.is_dir():
             continue
         
-        # 查找 auto 子目录
+        md_file = None
+        
+        # 策略1: 优先查找 auto/ 子目录(兼容旧结构)
         auto_dir = folder_path / "auto"
-        if not auto_dir.exists() or not auto_dir.is_dir():
-            print(f"  [跳过] 未找到 auto 目录: {folder_path.name}")
-            continue
+        if auto_dir.exists() and auto_dir.is_dir():
+            md_files = list(auto_dir.glob("*.md"))
+            if md_files:
+                md_file = md_files[0]
+                print(f"  [已读取] {folder_path.name}/auto -> {md_file.name}")
+        
+        # 策略2: 如果没有 auto/ 或 auto/ 中没有 md,则在当前子文件夹中递归查找
+        if md_file is None:
+            # 使用 rglob 递归查找所有 .md 文件
+            all_md_files = list(folder_path.rglob("*.md"))
+            if all_md_files:
+                md_file = all_md_files[0]
+                relative_path = md_file.relative_to(folder_path)
+                print(f"  [已读取] {folder_path.name}/{relative_path} -> {md_file.name}")
         
-        # 查找 md 文件
-        md_files = list(auto_dir.glob("*.md"))
-        if not md_files:
-            print(f"  [跳过] auto 目录中无 md 文件: {folder_path.name}")
+        # 如果还是没找到,跳过
+        if md_file is None:
+            print(f"  [跳过] 未找到 md 文件: {folder_path.name}")
             continue
         
-        # 读取第一个 md 文件
-        md_file = md_files[0]
+        # 读取 md 文件
         try:
             content = md_file.read_text(encoding="utf-8")
             md_contents.append({
@@ -347,7 +474,6 @@ def read_md_files(root_dir: str) -> List[Dict[str, str]]:
                 "content": content,
                 "file_path": str(md_file)
             })
-            print(f"  [已读取] {folder_path.name} -> {md_file.name}")
         except Exception as e:
             print(f"  [错误] 读取文件失败 {md_file}: {e}")
             continue
@@ -355,65 +481,6 @@ def read_md_files(root_dir: str) -> List[Dict[str, str]]:
     return md_contents
 
 
-def parse_llm_response(response_text: str) -> Dict[str, str]:
-    """
-    解析大模型返回的 JSON 响应
-    
-    Args:
-        response_text: 大模型返回的原始文本
-    
-    Returns:
-        解析后的字典,包含提取的字段
-    """
-    def extract_json_objects(text):
-        brace_depth = 0
-        json_objects = []
-        start_idx = -1
-        
-        for i, char in enumerate(text):
-            if char == '{':
-                if brace_depth == 0:
-                    start_idx = i
-                brace_depth += 1
-            elif char == '}':
-                brace_depth -= 1
-                if brace_depth == 0 and start_idx != -1:
-                    json_objects.append(text[start_idx:i+1])
-                    start_idx = -1
-        
-        return json_objects
-    
-    json_objects = extract_json_objects(response_text)
-    
-    placeholder_keywords = ["提取到的", "或'", "...", "示例", "Extract", "Template"]
-    
-    for obj in json_objects:
-        try:
-            data = json.loads(obj)
-            if isinstance(data, dict) and "项目名称" in data:
-                project_name = data.get("项目名称", "")
-                plan_name = data.get("方案名称", "")
-                expert_opinion = data.get("专项方案专家评审意见回复表", "")
-                
-                is_placeholder = False
-                for keyword in placeholder_keywords:
-                    if keyword in str(project_name) or keyword in str(plan_name):
-                        is_placeholder = True
-                        break
-                
-                if not is_placeholder and len(str(expert_opinion)) > 50:
-                    return data
-        except json.JSONDecodeError:
-            continue
-    
-    print(f"  [警告] 无法解析 JSON 响应,使用原始文本")
-    return {
-        "项目名称": "解析失败",
-        "方案名称": "解析失败",
-        "专项方案专家评审意见回复表": response_text
-    }
-
-
 def extract_info_with_llm(content: str) -> Dict[str, str]:
     """
     使用大模型从文档中提取信息
@@ -583,13 +650,17 @@ def create_excel(data_rows: List[Dict[str, str]], output_file: str):
 def main():
     """主函数"""
     # 解析命令行参数
-    if len(sys.argv) < 3:
-        print("用法: python md2excel_extractor.py <源文件夹路径> <输出Excel路径>")
-        print("示例: python md2excel_extractor.py D:/专家意见/temp D:/汇总表.xlsx")
-        sys.exit(1)
-    
-    root_dir = sys.argv[1]
-    output_file = sys.argv[2]
+    if len(sys.argv) >= 3:
+        # 使用命令行传入的参数
+        root_dir = sys.argv[1]
+        output_file = sys.argv[2]
+        print("✅ 使用命令行传入的路径")
+    else:
+        # 使用默认路径
+        root_dir = DEFAULT_SOURCE_DIR
+        output_file = DEFAULT_OUTPUT_FILE
+        print("⚠️  未提供命令行参数,使用默认路径")
+        print(f"    如需自定义路径,请运行: python md2excel_extractor.py <源文件夹> <输出Excel>")
     
     # 验证源目录
     if not os.path.isdir(root_dir):