EADST

Image2Text: Automating Document Layout Analysis with Python and LayoutParser

Image2Text: Automating Document Layout Analysis with Python and LayoutParser

import cv2
import layoutparser as lp
import os
import json
from PIL import Image
import numpy as np

def to_serializable(obj):
    if isinstance(obj, (np.float32, np.float64)):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    else:
        return obj

def process_image(image_path, model):
    # Read and preprocess the image
    image = cv2.imread(image_path)
    image = image[..., ::-1]  # Convert from BGR to RGB

    # Use the model to detect layout
    layout = model.detect(image)

    # Convert layout objects to a serializable format
    layout_data = []
    for obj in layout:
        obj_dict = obj.to_dict()
        # Iterate through the dictionary, converting all numpy data types to serializable types
        obj_dict_serializable = {key: to_serializable(value) for key, value in obj_dict.items()}
        layout_data.append(obj_dict_serializable)

    return layout_data

def save_layout_to_json(layout_data, json_path):
    # Save layout data to a JSON file
    with open(json_path, 'w') as json_file:
        json.dump(layout_data, json_file)

# Load the model
model = lp.PaddleDetectionLayoutModel(
    config_path="lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config",
    threshold=0.5,
    label_map={0: "Text", 1: "Title", 2: "List", 3: "Table", 4: "Figure"},
    enforce_cpu=False,
    enable_mkldnn=True
)

def process_folder(folder_path):
    # Iterate through all files and subfolders in the folder
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.lower().endswith('.jpg'):  # Check if it's a JPG file
                file_path = os.path.join(root, file)
                layout_data = process_image(file_path, model)  # Process the image

                # Create JSON file path
                json_path = os.path.splitext(file_path)[0] + '.json'
                save_layout_to_json(layout_data, json_path)  # Save layout data as JSON


# Specify the folder path to process
folder_path = '/your_folder_path/'
process_folder(folder_path)
相关标签
About Me
XD
Goals determine what you are going to be.
Category
标签云
Password NLP TTS MD5 C++ Llama Pandas diffusers torchinfo Algorithm Video Sklearn Conda 签证 PIP FP64 Card TSV tqdm OpenCV SAM 多进程 FP16 TensorFlow QWEN Django scipy Web Random VSCode LeetCode 阿里云 Excel 版权 Datetime 证件照 HaggingFace AI LLM Numpy Tensor Land Website Google InvalidArgumentError Streamlit RGB Distillation SPIE CAM Transformers GIT Jupyter OCR RAR Template Michelin Jetson 搞笑 Plotly CSV UI Docker GPTQ Tiktoken uwsgi FP8 Crawler Ubuntu SQLite Zip Plate Bert Disk PyTorch PDB 财报 CC Logo WAN git-lfs Ptyhon Image2Text ModelScope Hotel YOLO CEIR Magnet Review Pillow UNIX Pickle Food Firewall COCO Qwen2.5 SQL Paper Safetensors Hilton mmap uWSGI 多线程 Nginx ONNX Windows 报税 GGML DeepStream ChatGPT Quantization CLAP Hungarian LLAMA v2ray CUDA Augmentation Claude PyCharm XGBoost tar CTC Bin Statistics Python PDF Bitcoin Baidu Breakpoint Permission Diagram logger Qwen 公式 Tracking BF16 Paddle Base64 git Cloudreve TensorRT WebCrawler FastAPI Miniforge Domain Qwen2 Quantize API Color Shortcut Git Freesound transformers GPT4 Pytorch Knowledge LaTeX printf Math OpenAI 音频 Linux Data JSON FP32 腾讯云 Mixtral 飞书 Proxy Use Vim NameSilo 继承 GoogLeNet 算法题 Translation BTC Heatmap LoRA 关于博主 BeautifulSoup Dataset EXCEL hf Animate Anaconda Attention Vmess FlashAttention HuggingFace XML Input DeepSeek Github Markdown VGG-16 VPN v0.dev IndexTTS2 ResNet-50 域名 Interview 净利润 llama.cpp Gemma Bipartite CV Clash NLTK SVR
站点统计

本站现有博文311篇,共被浏览742141

本站已经建立2381天!

热门文章
文章归档
回到顶部