|
|
@@ -0,0 +1,63 @@
|
|
|
+import fitz # PyMuPDF
|
|
|
+import os
|
|
|
+
|
|
|
+def pdf_to_images(pdf_path, output_root_dir, zoom=2):
|
|
|
+ """
|
|
|
+ 批量PDF转图片:每个PDF自动创建同名文件夹,图片放入其中
|
|
|
+ """
|
|
|
+ if not os.path.exists(pdf_path):
|
|
|
+ print(f"错误:文件 {pdf_path} 不存在!")
|
|
|
+ return
|
|
|
+
|
|
|
+ # 获取 PDF 文件名(不带后缀)
|
|
|
+ pdf_filename = os.path.splitext(os.path.basename(pdf_path))[0]
|
|
|
+
|
|
|
+ # ✅ 为当前 PDF 创建【同名文件夹】
|
|
|
+ output_dir = os.path.join(output_root_dir, pdf_filename)
|
|
|
+ if not os.path.exists(output_dir):
|
|
|
+ os.makedirs(output_dir)
|
|
|
+
|
|
|
+ # 打开PDF
|
|
|
+ pdf = fitz.open(pdf_path)
|
|
|
+ total_pages = pdf.page_count
|
|
|
+ print(f"\n📄 处理:{pdf_filename},总页数:{total_pages}")
|
|
|
+
|
|
|
+ # 逐页转换
|
|
|
+ for page_num in range(total_pages):
|
|
|
+ page = pdf[page_num]
|
|
|
+ mat = fitz.Matrix(zoom, zoom)
|
|
|
+ pix = page.get_pixmap(matrix=mat)
|
|
|
+
|
|
|
+ # 图片命名:001_文件名.png
|
|
|
+ page_number = page_num + 1
|
|
|
+ img_path = os.path.join(output_dir, f"{page_number:03d}_{pdf_filename}.png")
|
|
|
+
|
|
|
+ pix.save(img_path)
|
|
|
+
|
|
|
+ pdf.close()
|
|
|
+ print(f"✅ 完成:{pdf_filename} → 图片保存在:{output_dir}")
|
|
|
+
|
|
|
+def batch_convert_pdf_folder(pdf_folder, output_root_dir, zoom=2):
|
|
|
+ """
|
|
|
+ 批量转换一个文件夹中的所有 PDF 文件
|
|
|
+ """
|
|
|
+ if not os.path.isdir(pdf_folder):
|
|
|
+ print(f"错误:PDF 文件夹不存在:{pdf_folder}")
|
|
|
+ return
|
|
|
+
|
|
|
+ # 遍历文件夹中所有 PDF
|
|
|
+ for file in os.listdir(pdf_folder):
|
|
|
+ if file.lower().endswith(".pdf"):
|
|
|
+ pdf_path = os.path.join(pdf_folder, file)
|
|
|
+ pdf_to_images(pdf_path, output_root_dir, zoom=zoom)
|
|
|
+
|
|
|
+# ====================== 你只需要改这 2 个路径 ======================
|
|
|
+if __name__ == "__main__":
|
|
|
+ # 存放所有 PDF 的文件夹
|
|
|
+ PDF_FOLDER = r"F:\第二阶段编制依据及施工方案数据治理-20260206\标题未标注过\pdf"
|
|
|
+
|
|
|
+ # 输出根目录(每个PDF会在这里创建同名文件夹)
|
|
|
+ OUTPUT_ROOT = r"F:\第二阶段编制依据及施工方案数据治理-20260206\标题未标注过\image"
|
|
|
+
|
|
|
+ # 批量转换
|
|
|
+ batch_convert_pdf_folder(PDF_FOLDER, OUTPUT_ROOT, zoom=2)
|