EADST

Image2Text: Automating Document Layout Analysis with Python and LayoutParser

Image2Text: Automating Document Layout Analysis with Python and LayoutParser

import cv2
import layoutparser as lp
import os
import json
from PIL import Image
import numpy as np

def to_serializable(obj):
    if isinstance(obj, (np.float32, np.float64)):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    else:
        return obj

def process_image(image_path, model):
    # Read and preprocess the image
    image = cv2.imread(image_path)
    image = image[..., ::-1]  # Convert from BGR to RGB

    # Use the model to detect layout
    layout = model.detect(image)

    # Convert layout objects to a serializable format
    layout_data = []
    for obj in layout:
        obj_dict = obj.to_dict()
        # Iterate through the dictionary, converting all numpy data types to serializable types
        obj_dict_serializable = {key: to_serializable(value) for key, value in obj_dict.items()}
        layout_data.append(obj_dict_serializable)

    return layout_data

def save_layout_to_json(layout_data, json_path):
    # Save layout data to a JSON file
    with open(json_path, 'w') as json_file:
        json.dump(layout_data, json_file)

# Load the model
model = lp.PaddleDetectionLayoutModel(
    config_path="lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config",
    threshold=0.5,
    label_map={0: "Text", 1: "Title", 2: "List", 3: "Table", 4: "Figure"},
    enforce_cpu=False,
    enable_mkldnn=True
)

def process_folder(folder_path):
    # Iterate through all files and subfolders in the folder
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.lower().endswith('.jpg'):  # Check if it's a JPG file
                file_path = os.path.join(root, file)
                layout_data = process_image(file_path, model)  # Process the image

                # Create JSON file path
                json_path = os.path.splitext(file_path)[0] + '.json'
                save_layout_to_json(layout_data, json_path)  # Save layout data as JSON


# Specify the folder path to process
folder_path = '/your_folder_path/'
process_folder(folder_path)
相关标签
About Me
XD
Goals determine what you are going to be.
Category
标签云
多进程 Distillation FlashAttention FP32 uWSGI 签证 VGG-16 DeepSeek Domain 算法题 logger Translation TensorFlow InvalidArgumentError 顶会 HaggingFace OCR CUDA Magnet git-lfs BTC 飞书 Linux Knowledge GGML Password tar Streamlit NameSilo Jupyter Ubuntu Image2Text Use CLAP Attention 腾讯云 YOLO Numpy SPIE Data mmap Random 阿里云 TensorRT AI Cloudreve Git Interview VSCode NLP Miniforge icon PDF COCO Bin Clash Pandas Pytorch Dataset git XML HuggingFace NLTK tqdm 云服务器 域名 scipy Quantization Qwen2.5 IndexTTS2 Bert Shortcut UI SAM 递归学习法 Datetime EXCEL GPT4 Sklearn 继承 SQL TSV Hotel Math Algorithm PyTorch 搞笑 ResNet-50 Hungarian Paper Bipartite torchinfo WAN Python Conda FP16 llama.cpp v0.dev RGB Agent OpenAI Gemma DeepStream Tiktoken CEIR 图形思考法 v2ray LLAMA Vim Qwen Base64 Nginx Paddle FastAPI Markdown VPN Heatmap 财报 PyCharm 多线程 CTC 净利润 WebCrawler Augmentation Llama Baidu Logo Quantize Claude Michelin Plate OpenCV ONNX GPTQ Github Crawler GoogLeNet Bitcoin Web Disk PDB 报税 CSV 公式 强化学习 LoRA BF16 Mixtral Search Pickle Hilton RAR XGBoost Ptyhon Zip Excel JSON QWEN Website Animate Pillow MD5 关于博主 GIT Anaconda uwsgi Statistics 第一性原理 Windows LeetCode LaTeX Permission Breakpoint CAM Review Tensor Tracking CC Firewall Diagram Transformers Card Django Qwen2 Plotly Jetson Safetensors FP64 图标 PIP Food UNIX printf BeautifulSoup ChatGPT TTS 版权 ModelScope C++ Vmess transformers Google Rebuttal Proxy API LLM Template Freesound Video Color News 证件照 SQLite Land CV 音频 diffusers FP8 Docker SVR Input hf
站点统计

本站现有博文324篇,共被浏览815969

本站已经建立2520天!

热门文章
文章归档
回到顶部