Image2Text: Automating Document Layout Analysis with Python and LayoutParser
作者:XD / 发表: 2024年3月31日 21:59 / 更新: 2024年3月31日 21:59 / 编程笔记 / 阅读量:886
Image2Text: Automating Document Layout Analysis with Python and LayoutParser
import cv2
import layoutparser as lp
import os
import json
from PIL import Image
import numpy as np
def to_serializable(obj):
if isinstance(obj, (np.float32, np.float64)):
return float(obj)
elif isinstance(obj, np.ndarray):
return obj.tolist()
else:
return obj
def process_image(image_path, model):
# Read and preprocess the image
image = cv2.imread(image_path)
image = image[..., ::-1] # Convert from BGR to RGB
# Use the model to detect layout
layout = model.detect(image)
# Convert layout objects to a serializable format
layout_data = []
for obj in layout:
obj_dict = obj.to_dict()
# Iterate through the dictionary, converting all numpy data types to serializable types
obj_dict_serializable = {key: to_serializable(value) for key, value in obj_dict.items()}
layout_data.append(obj_dict_serializable)
return layout_data
def save_layout_to_json(layout_data, json_path):
# Save layout data to a JSON file
with open(json_path, 'w') as json_file:
json.dump(layout_data, json_file)
# Load the model
model = lp.PaddleDetectionLayoutModel(
config_path="lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config",
threshold=0.5,
label_map={0: "Text", 1: "Title", 2: "List", 3: "Table", 4: "Figure"},
enforce_cpu=False,
enable_mkldnn=True
)
def process_folder(folder_path):
# Iterate through all files and subfolders in the folder
for root, dirs, files in os.walk(folder_path):
for file in files:
if file.lower().endswith('.jpg'): # Check if it's a JPG file
file_path = os.path.join(root, file)
layout_data = process_image(file_path, model) # Process the image
# Create JSON file path
json_path = os.path.splitext(file_path)[0] + '.json'
save_layout_to_json(layout_data, json_path) # Save layout data as JSON
# Specify the folder path to process
folder_path = '/your_folder_path/'
process_folder(folder_path)
相关标签