EADST

Image2Text: Automating Document Layout Analysis with Python and LayoutParser

Image2Text: Automating Document Layout Analysis with Python and LayoutParser

import cv2
import layoutparser as lp
import os
import json
from PIL import Image
import numpy as np

def to_serializable(obj):
    if isinstance(obj, (np.float32, np.float64)):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    else:
        return obj

def process_image(image_path, model):
    # Read and preprocess the image
    image = cv2.imread(image_path)
    image = image[..., ::-1]  # Convert from BGR to RGB

    # Use the model to detect layout
    layout = model.detect(image)

    # Convert layout objects to a serializable format
    layout_data = []
    for obj in layout:
        obj_dict = obj.to_dict()
        # Iterate through the dictionary, converting all numpy data types to serializable types
        obj_dict_serializable = {key: to_serializable(value) for key, value in obj_dict.items()}
        layout_data.append(obj_dict_serializable)

    return layout_data

def save_layout_to_json(layout_data, json_path):
    # Save layout data to a JSON file
    with open(json_path, 'w') as json_file:
        json.dump(layout_data, json_file)

# Load the model
model = lp.PaddleDetectionLayoutModel(
    config_path="lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config",
    threshold=0.5,
    label_map={0: "Text", 1: "Title", 2: "List", 3: "Table", 4: "Figure"},
    enforce_cpu=False,
    enable_mkldnn=True
)

def process_folder(folder_path):
    # Iterate through all files and subfolders in the folder
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.lower().endswith('.jpg'):  # Check if it's a JPG file
                file_path = os.path.join(root, file)
                layout_data = process_image(file_path, model)  # Process the image

                # Create JSON file path
                json_path = os.path.splitext(file_path)[0] + '.json'
                save_layout_to_json(layout_data, json_path)  # Save layout data as JSON


# Specify the folder path to process
folder_path = '/your_folder_path/'
process_folder(folder_path)
相关标签
About Me
XD
Goals determine what you are going to be.
Category
标签云
Augmentation Qwen2 CAM Agent Sklearn Domain hf XML ChatGPT LeetCode Pickle Breakpoint EXCEL Quantize PyTorch Disk SQLite Proxy scipy Image2Text Miniforge diffusers Anaconda TTS FP8 第一性原理 SAM torchinfo FP32 LLAMA Hotel Streamlit QWEN 报税 v2ray logger OpenCV Random Llama Pytorch COCO 音频 tar Clash CSV Base64 printf Password IndexTTS2 Qwen YOLO Statistics 飞书 BTC Food Gemma Distillation 强化学习 顶会 C++ Search Plate Jetson Heatmap Transformers BeautifulSoup 算法题 git-lfs Logo Interview OCR Translation AI Web DeepSeek Conda Hungarian Math RGB Permission 搞笑 PyCharm CTC Website Github Git 版权 InvalidArgumentError TensorRT Data Pandas NLP CV Zip NameSilo GPTQ uwsgi PIP XGBoost Datetime Crawler VSCode Michelin TensorFlow CLAP NLTK Land Card Cloudreve LaTeX Attention CEIR ResNet-50 Bert 财报 腾讯云 mmap RAR Excel Google Paper Bipartite 关于博主 DeepStream 域名 Diagram PDF llama.cpp Quantization Paddle Safetensors Bin git Color Vmess 净利润 GIT transformers FlashAttention Pillow Ubuntu Input Docker Nginx Python UNIX Magnet v0.dev Animate SVR GGML Bitcoin Hilton API Numpy Tracking Mixtral Markdown Freesound LLM 多线程 Windows 继承 tqdm 公式 UI ONNX 签证 Tiktoken 证件照 HaggingFace Template SPIE VPN SQL FP64 递归学习法 Dataset Use CUDA WebCrawler Linux JSON LoRA VGG-16 Tensor ModelScope Knowledge Plotly MD5 CC Review Algorithm TSV GoogLeNet Video Ptyhon 图形思考法 Shortcut BF16 Vim 多进程 HuggingFace Django FastAPI Jupyter PDB WAN FP16 OpenAI Claude 阿里云 Baidu uWSGI Firewall GPT4 Qwen2.5
站点统计

本站现有博文319篇,共被浏览749994

本站已经建立2403天!

热门文章
文章归档
回到顶部