EADST

Image2Text: Automating Document Layout Analysis with Python and LayoutParser

Image2Text: Automating Document Layout Analysis with Python and LayoutParser

import cv2
import layoutparser as lp
import os
import json
from PIL import Image
import numpy as np

def to_serializable(obj):
    if isinstance(obj, (np.float32, np.float64)):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    else:
        return obj

def process_image(image_path, model):
    # Read and preprocess the image
    image = cv2.imread(image_path)
    image = image[..., ::-1]  # Convert from BGR to RGB

    # Use the model to detect layout
    layout = model.detect(image)

    # Convert layout objects to a serializable format
    layout_data = []
    for obj in layout:
        obj_dict = obj.to_dict()
        # Iterate through the dictionary, converting all numpy data types to serializable types
        obj_dict_serializable = {key: to_serializable(value) for key, value in obj_dict.items()}
        layout_data.append(obj_dict_serializable)

    return layout_data

def save_layout_to_json(layout_data, json_path):
    # Save layout data to a JSON file
    with open(json_path, 'w') as json_file:
        json.dump(layout_data, json_file)

# Load the model
model = lp.PaddleDetectionLayoutModel(
    config_path="lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config",
    threshold=0.5,
    label_map={0: "Text", 1: "Title", 2: "List", 3: "Table", 4: "Figure"},
    enforce_cpu=False,
    enable_mkldnn=True
)

def process_folder(folder_path):
    # Iterate through all files and subfolders in the folder
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.lower().endswith('.jpg'):  # Check if it's a JPG file
                file_path = os.path.join(root, file)
                layout_data = process_image(file_path, model)  # Process the image

                # Create JSON file path
                json_path = os.path.splitext(file_path)[0] + '.json'
                save_layout_to_json(layout_data, json_path)  # Save layout data as JSON


# Specify the folder path to process
folder_path = '/your_folder_path/'
process_folder(folder_path)
相关标签
About Me
XD
Goals determine what you are going to be.
Category
标签云
logger Tracking scipy EXCEL GPT4 财报 PDF Firewall Shortcut Hilton DeepSeek OCR Magnet Color SAM Bert Plate 域名 SQLite Tiktoken FlashAttention Mixtral PIP 多进程 IndexTTS2 PyCharm 阿里云 Dataset Password Land Pytorch Github HuggingFace XML Cloudreve Miniforge 净利润 Safetensors Distillation AI BF16 Domain UNIX tqdm Sklearn LaTeX 搞笑 UI TTS hf Heatmap FP16 Augmentation Food CTC Vmess uwsgi Crawler 腾讯云 RAR PyTorch LoRA v0.dev Zip torchinfo git-lfs Vim Bipartite NLTK Python 签证 Plotly Claude MD5 ModelScope VSCode diffusers VGG-16 Streamlit 继承 Pandas Ptyhon XGBoost FastAPI 关于博主 VPN Anaconda llama.cpp Use 音频 CAM TensorFlow Gemma Qwen2 PDB tar Bitcoin BTC CSV 报税 CLAP Qwen2.5 GIT Web 飞书 API GoogLeNet Paper Django Algorithm Image2Text Markdown Review Math Baidu FP64 YOLO v2ray Attention OpenCV OpenAI LLM SVR CEIR LLAMA printf WebCrawler Nginx GGML DeepStream Docker Hungarian ChatGPT Base64 Website Jetson Diagram 多线程 JSON LeetCode Animate Google uWSGI Excel Freesound Windows transformers RGB Hotel Git Llama QWEN Numpy Template CV Tensor Quantize TSV Card 公式 Bin Breakpoint Random SQL BeautifulSoup Disk Michelin Video C++ NameSilo Conda Clash Data HaggingFace Jupyter mmap Pickle CC TensorRT Input Qwen Permission 版权 WAN InvalidArgumentError Transformers Paddle Pillow COCO git Knowledge FP32 Linux CUDA 算法题 ResNet-50 Statistics ONNX Interview Datetime GPTQ Quantization Ubuntu Logo SPIE 证件照 NLP Proxy FP8 Translation
站点统计

本站现有博文309篇,共被浏览731993

本站已经建立2368天!

热门文章
文章归档
回到顶部