EADST

Image2Text: Automating Document Layout Analysis with Python and LayoutParser

Image2Text: Automating Document Layout Analysis with Python and LayoutParser

import cv2
import layoutparser as lp
import os
import json
from PIL import Image
import numpy as np

def to_serializable(obj):
    if isinstance(obj, (np.float32, np.float64)):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    else:
        return obj

def process_image(image_path, model):
    # Read and preprocess the image
    image = cv2.imread(image_path)
    image = image[..., ::-1]  # Convert from BGR to RGB

    # Use the model to detect layout
    layout = model.detect(image)

    # Convert layout objects to a serializable format
    layout_data = []
    for obj in layout:
        obj_dict = obj.to_dict()
        # Iterate through the dictionary, converting all numpy data types to serializable types
        obj_dict_serializable = {key: to_serializable(value) for key, value in obj_dict.items()}
        layout_data.append(obj_dict_serializable)

    return layout_data

def save_layout_to_json(layout_data, json_path):
    # Save layout data to a JSON file
    with open(json_path, 'w') as json_file:
        json.dump(layout_data, json_file)

# Load the model
model = lp.PaddleDetectionLayoutModel(
    config_path="lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config",
    threshold=0.5,
    label_map={0: "Text", 1: "Title", 2: "List", 3: "Table", 4: "Figure"},
    enforce_cpu=False,
    enable_mkldnn=True
)

def process_folder(folder_path):
    # Iterate through all files and subfolders in the folder
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.lower().endswith('.jpg'):  # Check if it's a JPG file
                file_path = os.path.join(root, file)
                layout_data = process_image(file_path, model)  # Process the image

                # Create JSON file path
                json_path = os.path.splitext(file_path)[0] + '.json'
                save_layout_to_json(layout_data, json_path)  # Save layout data as JSON


# Specify the folder path to process
folder_path = '/your_folder_path/'
process_folder(folder_path)
相关标签
About Me
XD
Goals determine what you are going to be.
Category
标签云
Git FlashAttention transformers 图标 Vim QWEN LLAMA Google NLP Ubuntu Hungarian Streamlit Pandas LoRA InvalidArgumentError Qwen2.5 Jetson Zip Sklearn Excel GPT4 Diagram ChatGPT TSV C++ Bin Password Use uWSGI tqdm Domain Github XML Quantization CTC v0.dev Random printf Conda uwsgi Base64 SQLite Linux Clash Rebuttal WAN ModelScope 强化学习 Bert Statistics YOLO NameSilo git-lfs Interview CV Tiktoken UNIX Permission Math News Template 关于博主 云服务器 签证 DeepStream Firewall FP32 Paddle Color API 顶会 Animate OCR Pytorch PyCharm Qwen2 diffusers 腾讯云 Baidu Input UI Bitcoin Tensor Plotly Claude FP16 HuggingFace icon scipy Dataset Pillow 版权 Data Michelin Hilton CSV WebCrawler 搞笑 递归学习法 Crawler Distillation Land MD5 CUDA GoogLeNet Paper Python NLTK PyTorch SQL VPN RAR v2ray Vmess Heatmap Proxy Agent 多线程 PDF CEIR ResNet-50 JSON Datetime Knowledge Food 音频 Logo tar Magnet Transformers EXCEL Jupyter DeepSeek BF16 Card Search Markdown Attention Breakpoint AI Anaconda GGML Miniforge XGBoost Ptyhon Bipartite Mixtral Website Django 证件照 BeautifulSoup COCO Gemma CLAP VSCode Nginx ONNX SVR Numpy 飞书 Algorithm LaTeX FastAPI hf CC LeetCode Web llama.cpp FP8 BTC PDB Safetensors 阿里云 Disk Freesound 公式 SPIE mmap PIP 第一性原理 RGB Review FP64 TTS TensorRT 报税 Augmentation 净利润 Cloudreve 继承 OpenAI Hotel LLM OpenCV git SAM Image2Text Qwen GIT 多进程 域名 Quantize VGG-16 Translation IndexTTS2 Video Docker Windows 算法题 图形思考法 Llama logger Shortcut Plate HaggingFace Pickle CAM 财报 GPTQ Tracking TensorFlow torchinfo
站点统计

本站现有博文323篇,共被浏览799602

本站已经建立2498天!

热门文章
文章归档
回到顶部