EADST

Image2Text: Automating Document Layout Analysis with Python and LayoutParser

Image2Text: Automating Document Layout Analysis with Python and LayoutParser

import cv2
import layoutparser as lp
import os
import json
from PIL import Image
import numpy as np

def to_serializable(obj):
    if isinstance(obj, (np.float32, np.float64)):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    else:
        return obj

def process_image(image_path, model):
    # Read and preprocess the image
    image = cv2.imread(image_path)
    image = image[..., ::-1]  # Convert from BGR to RGB

    # Use the model to detect layout
    layout = model.detect(image)

    # Convert layout objects to a serializable format
    layout_data = []
    for obj in layout:
        obj_dict = obj.to_dict()
        # Iterate through the dictionary, converting all numpy data types to serializable types
        obj_dict_serializable = {key: to_serializable(value) for key, value in obj_dict.items()}
        layout_data.append(obj_dict_serializable)

    return layout_data

def save_layout_to_json(layout_data, json_path):
    # Save layout data to a JSON file
    with open(json_path, 'w') as json_file:
        json.dump(layout_data, json_file)

# Load the model
model = lp.PaddleDetectionLayoutModel(
    config_path="lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config",
    threshold=0.5,
    label_map={0: "Text", 1: "Title", 2: "List", 3: "Table", 4: "Figure"},
    enforce_cpu=False,
    enable_mkldnn=True
)

def process_folder(folder_path):
    # Iterate through all files and subfolders in the folder
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.lower().endswith('.jpg'):  # Check if it's a JPG file
                file_path = os.path.join(root, file)
                layout_data = process_image(file_path, model)  # Process the image

                # Create JSON file path
                json_path = os.path.splitext(file_path)[0] + '.json'
                save_layout_to_json(layout_data, json_path)  # Save layout data as JSON


# Specify the folder path to process
folder_path = '/your_folder_path/'
process_folder(folder_path)
相关标签
About Me
XD
Goals determine what you are going to be.
Category
标签云
Base64 Proxy Hungarian OCR 多线程 Google Anaconda XGBoost SQLite Transformers ResNet-50 OpenCV Knowledge Ptyhon Qwen2.5 Streamlit Paper 关于博主 LLAMA PIP 强化学习 Algorithm InvalidArgumentError 报税 Miniforge Hotel Qwen llama.cpp Vim BTC 云服务器 Food hf CTC GPT4 Quantize C++ Pytorch 音频 BF16 飞书 Vmess scipy Ubuntu logger Image2Text WAN Git Color Quantization NameSilo LeetCode 继承 LaTeX Tiktoken 图形思考法 RGB Docker Distillation git 签证 Use 论文 SAM 搞笑 CAM HuggingFace 递归学习法 Bitcoin TTS Mixtral FlashAttention Markdown Rebuttal Password TensorFlow UNIX Permission LoRA Animate 图标 Clash Cloudreve 多进程 DeepSeek FP32 Django API Statistics SQL COCO Pillow ModelScope GIT Pickle 论文速读 OpenAI Numpy Land Pandas git-lfs 阿里云 Search Freesound v0.dev ONNX Paddle CUDA GPTQ Sklearn FastAPI UI diffusers QWEN BeautifulSoup Interview VSCode Bert CLAP Data VGG-16 PDB Claude CC Linux 证件照 mmap Random Website XML 财报 icon JSON DeepStream CV 版权 SVR Jupyter Dataset uWSGI Bipartite Card transformers Jetson Disk uwsgi Excel EXCEL Magnet Michelin Bin 顶会 Github TensorRT Template printf Shortcut 第一性原理 v2ray 公式 Qwen2 MD5 torchinfo Firewall FP64 GGML Domain NLP 算法题 Python Augmentation Translation HaggingFace 腾讯云 Agent Attention FP8 Zip Plate NLTK PyCharm WebCrawler Logo Math Baidu IndexTTS2 tar 净利润 Heatmap GoogLeNet SPIE Crawler 域名 Llama AI Input PDF Tensor Review tqdm Tracking Web Nginx Hilton Breakpoint Video VPN ChatGPT FP16 CEIR News TSV CSV Plotly Diagram Safetensors YOLO Gemma LLM Conda RAR PyTorch Datetime Windows
站点统计

本站现有博文327篇,共被浏览826003

本站已经建立2532天!

热门文章
文章归档
回到顶部