Convert PDFs to Images
作者:XD / 发表: 2024年3月31日 21:47 / 更新: 2024年3月31日 21:47 / 编程笔记 / 阅读量:703
Use Python to convert PDF documents into images, page by page.
from pdf2image import convert_from_path
import os
def convert_pdf_to_images(pdf_path, output_folder):
if not os.path.exists(output_folder):
os.makedirs(output_folder)
# pdf2image
images = convert_from_path(pdf_path)
for i, image in enumerate(images):
image_path = os.path.join(output_folder, f"page_{i+1}.jpg")
image.save(image_path, 'JPEG')
def process_all_pdfs(pdf_folder):
for root, dirs, files in os.walk(pdf_folder):
for file in files:
if file.lower().endswith('.pdf'):
pdf_path = os.path.join(root, file)
output_folder = os.path.join(root, os.path.splitext(file)[0])
convert_pdf_to_images(pdf_path, output_folder)
pdf_folder = '/your_folder_path/'
process_all_pdfs(pdf_folder)