EADST

Image2Text: Automating Document Layout Analysis with Python and LayoutParser

Image2Text: Automating Document Layout Analysis with Python and LayoutParser

import cv2
import layoutparser as lp
import os
import json
from PIL import Image
import numpy as np

def to_serializable(obj):
    if isinstance(obj, (np.float32, np.float64)):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    else:
        return obj

def process_image(image_path, model):
    # Read and preprocess the image
    image = cv2.imread(image_path)
    image = image[..., ::-1]  # Convert from BGR to RGB

    # Use the model to detect layout
    layout = model.detect(image)

    # Convert layout objects to a serializable format
    layout_data = []
    for obj in layout:
        obj_dict = obj.to_dict()
        # Iterate through the dictionary, converting all numpy data types to serializable types
        obj_dict_serializable = {key: to_serializable(value) for key, value in obj_dict.items()}
        layout_data.append(obj_dict_serializable)

    return layout_data

def save_layout_to_json(layout_data, json_path):
    # Save layout data to a JSON file
    with open(json_path, 'w') as json_file:
        json.dump(layout_data, json_file)

# Load the model
model = lp.PaddleDetectionLayoutModel(
    config_path="lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config",
    threshold=0.5,
    label_map={0: "Text", 1: "Title", 2: "List", 3: "Table", 4: "Figure"},
    enforce_cpu=False,
    enable_mkldnn=True
)

def process_folder(folder_path):
    # Iterate through all files and subfolders in the folder
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.lower().endswith('.jpg'):  # Check if it's a JPG file
                file_path = os.path.join(root, file)
                layout_data = process_image(file_path, model)  # Process the image

                # Create JSON file path
                json_path = os.path.splitext(file_path)[0] + '.json'
                save_layout_to_json(layout_data, json_path)  # Save layout data as JSON


# Specify the folder path to process
folder_path = '/your_folder_path/'
process_folder(folder_path)
相关标签
About Me
XD
Goals determine what you are going to be.
Category
标签云
报税 diffusers Streamlit VSCode v0.dev Knowledge 公式 TensorRT QWEN 多进程 财报 VPN Ubuntu RAR git-lfs Pillow Safetensors uwsgi Excel Clash Diagram 算法题 Video Random Google NameSilo mmap 第一性原理 GoogLeNet IndexTTS2 Statistics 强化学习 Plotly Agent OpenCV 图形思考法 Input tar Datetime hf LLAMA Git Interview scipy CLAP Qwen 飞书 HuggingFace TSV Linux Transformers PyCharm ResNet-50 NLP COCO Heatmap Docker Food Python Proxy UI Django GPTQ Qwen2.5 API Use Crawler 证件照 GGML BF16 Hungarian Augmentation Qwen2 Miniforge YOLO Pandas TensorFlow OCR InvalidArgumentError ONNX Jupyter PDB Review Freesound FlashAttention Permission Distillation FP64 Land LaTeX uWSGI UNIX EXCEL Sklearn NLTK Vim TTS Color 多线程 ModelScope 递归学习法 Website Nginx Anaconda CSV Data Logo 净利润 Breakpoint Bipartite Animate News Disk Password Hotel Pytorch PIP Bert Bitcoin CV FP32 CTC SPIE Michelin 音频 BeautifulSoup printf DeepStream Magnet OpenAI SVR PDF FastAPI Pickle Llama CC XML VGG-16 Web torchinfo GIT Template Jetson Paddle 阿里云 HaggingFace Tiktoken Bin Quantization Domain PyTorch Hilton LLM Conda SQLite Quantize Ptyhon WAN SAM BTC CAM Card GPT4 Markdown XGBoost logger Attention MD5 AI Numpy 版权 Baidu git JSON Dataset Base64 Tensor 关于博主 Zip Mixtral Search Algorithm Math 搞笑 WebCrawler tqdm LoRA 继承 腾讯云 LeetCode 域名 Windows Claude Paper Image2Text CUDA FP16 Plate Vmess RGB ChatGPT Tracking transformers DeepSeek Github CEIR Translation Cloudreve C++ FP8 SQL 签证 Shortcut v2ray Gemma llama.cpp 顶会 Firewall
站点统计

本站现有博文320篇,共被浏览759697

本站已经建立2428天!

热门文章
文章归档
回到顶部