EADST

Image2Text: Automating Document Layout Analysis with Python and LayoutParser

Image2Text: Automating Document Layout Analysis with Python and LayoutParser

import cv2
import layoutparser as lp
import os
import json
from PIL import Image
import numpy as np

def to_serializable(obj):
    if isinstance(obj, (np.float32, np.float64)):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    else:
        return obj

def process_image(image_path, model):
    # Read and preprocess the image
    image = cv2.imread(image_path)
    image = image[..., ::-1]  # Convert from BGR to RGB

    # Use the model to detect layout
    layout = model.detect(image)

    # Convert layout objects to a serializable format
    layout_data = []
    for obj in layout:
        obj_dict = obj.to_dict()
        # Iterate through the dictionary, converting all numpy data types to serializable types
        obj_dict_serializable = {key: to_serializable(value) for key, value in obj_dict.items()}
        layout_data.append(obj_dict_serializable)

    return layout_data

def save_layout_to_json(layout_data, json_path):
    # Save layout data to a JSON file
    with open(json_path, 'w') as json_file:
        json.dump(layout_data, json_file)

# Load the model
model = lp.PaddleDetectionLayoutModel(
    config_path="lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config",
    threshold=0.5,
    label_map={0: "Text", 1: "Title", 2: "List", 3: "Table", 4: "Figure"},
    enforce_cpu=False,
    enable_mkldnn=True
)

def process_folder(folder_path):
    # Iterate through all files and subfolders in the folder
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.lower().endswith('.jpg'):  # Check if it's a JPG file
                file_path = os.path.join(root, file)
                layout_data = process_image(file_path, model)  # Process the image

                # Create JSON file path
                json_path = os.path.splitext(file_path)[0] + '.json'
                save_layout_to_json(layout_data, json_path)  # Save layout data as JSON


# Specify the folder path to process
folder_path = '/your_folder_path/'
process_folder(folder_path)
相关标签
About Me
XD
Goals determine what you are going to be.
Category
标签云
XML 论文速读 diffusers Data Color NLTK Breakpoint Hilton Random Permission 关于博主 XGBoost 净利润 Pytorch 强化学习 torchinfo Django OpenAI BeautifulSoup CSV EXCEL HuggingFace Translation CTC uwsgi 论文 Linux Vim Paper tqdm OCR git logger Gemma C++ printf RGB Rebuttal CLAP Statistics UI 图标 CC Bert 顶会 Michelin Password VGG-16 Base64 icon VPN Image2Text Qwen2.5 Food 证件照 Qwen Review Github 版权 Shortcut 递归学习法 Claude Search NLP ResNet-50 Docker Land 多进程 Distillation Hotel GIT JSON Attention transformers PyTorch FP64 uWSGI tar Vmess mmap 报税 ONNX Firewall Datetime PDB Dataset Llama WebCrawler 财报 Template 飞书 MD5 Paddle Safetensors TensorFlow Transformers SPIE Clash Tracking Pickle VSCode 公式 v2ray Agent AI News 云服务器 Cloudreve Numpy Markdown ModelScope 第一性原理 IndexTTS2 Google SAM Mixtral YOLO BF16 Use CUDA Algorithm Disk Jupyter CEIR Zip Excel Crawler ChatGPT Magnet FastAPI Quantize Streamlit Freesound Pillow NameSilo Bin LLAMA FP16 Anaconda Quantization Interview Jetson GoogLeNet Input Tensor Domain LLM COCO Tiktoken SQL FP8 OpenCV 算法题 TensorRT Plate LoRA git-lfs TTS Pandas SVR Web Sklearn LaTeX HaggingFace GGML SQLite CV FP32 RAR Video v0.dev DeepStream UNIX DeepSeek scipy Bitcoin hf Windows Plotly Augmentation Math CAM API Miniforge 多线程 Bipartite WAN Logo GPTQ Baidu 腾讯云 阿里云 LeetCode QWEN Qwen2 Conda Knowledge 继承 音频 Card Python Proxy Nginx PyCharm 域名 Animate Website FlashAttention Diagram Hungarian Heatmap Ptyhon Git TSV 签证 PIP 搞笑 GPT4 图形思考法 BTC PDF InvalidArgumentError llama.cpp Ubuntu
站点统计

本站现有博文328篇,共被浏览854827

本站已经建立2562天!

热门文章
文章归档
回到顶部