EADST

Image2Text: Automating Document Layout Analysis with Python and LayoutParser

Image2Text: Automating Document Layout Analysis with Python and LayoutParser

import cv2
import layoutparser as lp
import os
import json
from PIL import Image
import numpy as np

def to_serializable(obj):
    if isinstance(obj, (np.float32, np.float64)):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    else:
        return obj

def process_image(image_path, model):
    # Read and preprocess the image
    image = cv2.imread(image_path)
    image = image[..., ::-1]  # Convert from BGR to RGB

    # Use the model to detect layout
    layout = model.detect(image)

    # Convert layout objects to a serializable format
    layout_data = []
    for obj in layout:
        obj_dict = obj.to_dict()
        # Iterate through the dictionary, converting all numpy data types to serializable types
        obj_dict_serializable = {key: to_serializable(value) for key, value in obj_dict.items()}
        layout_data.append(obj_dict_serializable)

    return layout_data

def save_layout_to_json(layout_data, json_path):
    # Save layout data to a JSON file
    with open(json_path, 'w') as json_file:
        json.dump(layout_data, json_file)

# Load the model
model = lp.PaddleDetectionLayoutModel(
    config_path="lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config",
    threshold=0.5,
    label_map={0: "Text", 1: "Title", 2: "List", 3: "Table", 4: "Figure"},
    enforce_cpu=False,
    enable_mkldnn=True
)

def process_folder(folder_path):
    # Iterate through all files and subfolders in the folder
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.lower().endswith('.jpg'):  # Check if it's a JPG file
                file_path = os.path.join(root, file)
                layout_data = process_image(file_path, model)  # Process the image

                # Create JSON file path
                json_path = os.path.splitext(file_path)[0] + '.json'
                save_layout_to_json(layout_data, json_path)  # Save layout data as JSON


# Specify the folder path to process
folder_path = '/your_folder_path/'
process_folder(folder_path)
相关标签
About Me
XD
Goals determine what you are going to be.
Category
标签云
CAM mmap OpenAI Qwen Algorithm Base64 Search 公式 Diagram Paper VPN Bipartite Logo tqdm hf NLTK FP16 XML llama.cpp Paddle Dataset NameSilo printf AI EXCEL Food scipy UNIX Conda Tiktoken Bitcoin ModelScope PDF Vim Website RGB Permission ChatGPT LeetCode Llama PyCharm HuggingFace CUDA Augmentation LaTeX BF16 Review uWSGI JSON Disk Git 继承 NLP Distillation Input 飞书 BTC Ubuntu VGG-16 图形思考法 Template Math Gemma IndexTTS2 FastAPI Mixtral git-lfs Pytorch FlashAttention Shortcut PDB SVR 财报 Bin TensorFlow Jupyter ONNX Windows Datetime LLM Random Django LoRA Vmess BeautifulSoup TTS Tensor 顶会 Pandas Excel LLAMA Sklearn transformers Quantize Plotly 报税 InvalidArgumentError RAR CV 强化学习 GPT4 FP8 SQLite 多线程 SPIE Animate UI 版权 Firewall C++ Proxy WAN Markdown torchinfo 域名 PIP 第一性原理 logger Use 搞笑 GGML WebCrawler Land GoogLeNet News 多进程 Hilton Crawler Agent SAM git Card v0.dev Domain Claude API CLAP SQL Anaconda Nginx 关于博主 Statistics Quantization 云服务器 Knowledge Image2Text 阿里云 VSCode QWEN Michelin Tracking Jetson v2ray DeepSeek TSV Attention diffusers Zip 签证 Github GPTQ 净利润 Web ResNet-50 证件照 PyTorch Docker 腾讯云 Magnet Breakpoint Clash Bert Qwen2.5 Pillow FP32 CSV tar Heatmap Data Ptyhon Miniforge TensorRT COCO Google OCR FP64 音频 Linux DeepStream OpenCV Freesound CC Baidu Streamlit Python HaggingFace Numpy Pickle Video 算法题 XGBoost CTC Safetensors MD5 CEIR Interview uwsgi Transformers Qwen2 Translation Hungarian Password GIT Color 递归学习法 Cloudreve Hotel Plate YOLO
站点统计

本站现有博文321篇,共被浏览779972

本站已经建立2472天!

热门文章
文章归档
回到顶部