EADST

Image2Text: Automating Document Layout Analysis with Python and LayoutParser

Image2Text: Automating Document Layout Analysis with Python and LayoutParser

import cv2
import layoutparser as lp
import os
import json
from PIL import Image
import numpy as np

def to_serializable(obj):
    if isinstance(obj, (np.float32, np.float64)):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    else:
        return obj

def process_image(image_path, model):
    # Read and preprocess the image
    image = cv2.imread(image_path)
    image = image[..., ::-1]  # Convert from BGR to RGB

    # Use the model to detect layout
    layout = model.detect(image)

    # Convert layout objects to a serializable format
    layout_data = []
    for obj in layout:
        obj_dict = obj.to_dict()
        # Iterate through the dictionary, converting all numpy data types to serializable types
        obj_dict_serializable = {key: to_serializable(value) for key, value in obj_dict.items()}
        layout_data.append(obj_dict_serializable)

    return layout_data

def save_layout_to_json(layout_data, json_path):
    # Save layout data to a JSON file
    with open(json_path, 'w') as json_file:
        json.dump(layout_data, json_file)

# Load the model
model = lp.PaddleDetectionLayoutModel(
    config_path="lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config",
    threshold=0.5,
    label_map={0: "Text", 1: "Title", 2: "List", 3: "Table", 4: "Figure"},
    enforce_cpu=False,
    enable_mkldnn=True
)

def process_folder(folder_path):
    # Iterate through all files and subfolders in the folder
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.lower().endswith('.jpg'):  # Check if it's a JPG file
                file_path = os.path.join(root, file)
                layout_data = process_image(file_path, model)  # Process the image

                # Create JSON file path
                json_path = os.path.splitext(file_path)[0] + '.json'
                save_layout_to_json(layout_data, json_path)  # Save layout data as JSON


# Specify the folder path to process
folder_path = '/your_folder_path/'
process_folder(folder_path)
相关标签
About Me
XD
Goals determine what you are going to be.
Category
标签云
图形思考法 printf SVR Proxy Excel Domain Dataset llama.cpp PDF Data Agent XGBoost Mixtral Logo git RAR TTS git-lfs Plate 阿里云 News tar Git 多进程 VPN CAM Datetime Base64 Python Breakpoint 域名 LoRA 腾讯云 Algorithm Hotel v0.dev CEIR Pillow Distillation Cloudreve Heatmap Attention 公式 Paddle Shortcut 顶会 API YOLO FlashAttention LeetCode GIT Transformers Ubuntu PDB EXCEL LaTeX Plotly hf MD5 diffusers Review Tensor C++ COCO Miniforge GGML FP16 CLAP Statistics uwsgi Quantize Input ModelScope Bert Use QWEN Streamlit 音频 v2ray Card Google Animate VSCode GPT4 LLM OCR SQL Sklearn uWSGI 论文速读 DeepSeek OpenCV XML Qwen2.5 NameSilo Video FP64 HaggingFace 关于博主 第一性原理 Tiktoken Michelin AI CSV Qwen2 TensorFlow 净利润 Nginx Anaconda GPTQ Random Template Augmentation BTC 签证 Bipartite ONNX Safetensors icon TSV Disk NLP Claude Math CTC Paper Pandas FP32 UNIX 图标 Clash tqdm LLAMA PIP 递归学习法 IndexTTS2 CC scipy PyCharm 算法题 Hilton Magnet 证件照 JSON InvalidArgumentError 搞笑 logger 继承 Color 财报 强化学习 DeepStream Food FastAPI Jetson Firewall SPIE torchinfo Permission BeautifulSoup SQLite WebCrawler Vmess Knowledge Web SAM Baidu Diagram TensorRT Tracking NLTK 飞书 Search 报税 PyTorch Password HuggingFace Pytorch Numpy Zip Conda CV VGG-16 Markdown Freesound Interview 多线程 OpenAI Rebuttal Github Land Windows CUDA 云服务器 Translation Pickle Ptyhon FP8 Django Crawler mmap Qwen WAN Linux ChatGPT 论文 Website UI 版权 BF16 Llama transformers Bitcoin Docker Image2Text ResNet-50 Vim Jupyter GoogLeNet Bin RGB Gemma Quantization Hungarian
站点统计

本站现有博文328篇,共被浏览846182

本站已经建立2552天!

热门文章
文章归档
回到顶部