EADST

Image2Text: Automating Document Layout Analysis with Python and LayoutParser

Image2Text: Automating Document Layout Analysis with Python and LayoutParser

import cv2
import layoutparser as lp
import os
import json
from PIL import Image
import numpy as np

def to_serializable(obj):
    if isinstance(obj, (np.float32, np.float64)):
        return float(obj)
    elif isinstance(obj, np.ndarray):
        return obj.tolist()
    else:
        return obj

def process_image(image_path, model):
    # Read and preprocess the image
    image = cv2.imread(image_path)
    image = image[..., ::-1]  # Convert from BGR to RGB

    # Use the model to detect layout
    layout = model.detect(image)

    # Convert layout objects to a serializable format
    layout_data = []
    for obj in layout:
        obj_dict = obj.to_dict()
        # Iterate through the dictionary, converting all numpy data types to serializable types
        obj_dict_serializable = {key: to_serializable(value) for key, value in obj_dict.items()}
        layout_data.append(obj_dict_serializable)

    return layout_data

def save_layout_to_json(layout_data, json_path):
    # Save layout data to a JSON file
    with open(json_path, 'w') as json_file:
        json.dump(layout_data, json_file)

# Load the model
model = lp.PaddleDetectionLayoutModel(
    config_path="lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config",
    threshold=0.5,
    label_map={0: "Text", 1: "Title", 2: "List", 3: "Table", 4: "Figure"},
    enforce_cpu=False,
    enable_mkldnn=True
)

def process_folder(folder_path):
    # Iterate through all files and subfolders in the folder
    for root, dirs, files in os.walk(folder_path):
        for file in files:
            if file.lower().endswith('.jpg'):  # Check if it's a JPG file
                file_path = os.path.join(root, file)
                layout_data = process_image(file_path, model)  # Process the image

                # Create JSON file path
                json_path = os.path.splitext(file_path)[0] + '.json'
                save_layout_to_json(layout_data, json_path)  # Save layout data as JSON


# Specify the folder path to process
folder_path = '/your_folder_path/'
process_folder(folder_path)
相关标签
About Me
XD
Goals determine what you are going to be.
Category
标签云
XGBoost Heatmap TensorFlow Claude Tiktoken Knowledge logger mmap Interview DeepSeek Nginx Hotel Conda Firewall Review FP32 Search TensorRT CV Qwen2.5 LoRA LeetCode 云服务器 Cloudreve FP16 NLP Bitcoin v0.dev GIT Miniforge Food torchinfo Diagram Vim Pillow API Animate Proxy Algorithm WAN Logo BF16 腾讯云 报税 FP8 Docker EXCEL 净利润 News Linux Breakpoint Bin CTC 多线程 递归学习法 GGML PyCharm Ptyhon 飞书 搞笑 HuggingFace COCO Quantize UNIX Ubuntu Markdown Dataset DeepStream Augmentation VSCode InvalidArgumentError Attention hf 域名 NameSilo CUDA Template Bipartite Sklearn BTC LLM Website Agent ONNX 音频 SQLite SAM Pytorch Jetson Freesound GPTQ Pickle Transformers YOLO 阿里云 Jupyter Permission Plate Excel LaTeX CAM 版权 AI Input tar Llama Pandas OCR HaggingFace Github IndexTTS2 ChatGPT Paddle OpenAI OpenCV VGG-16 SPIE CC FlashAttention Windows Numpy 关于博主 diffusers git Gemma MD5 Git printf 证件照 FastAPI LLAMA Bert Qwen Translation 强化学习 Qwen2 Password TSV Tensor Hungarian Django 多进程 Safetensors Paper git-lfs ModelScope Video Disk UI JSON PIP v2ray Domain Color PDF Datetime Streamlit WebCrawler Magnet Quantization Math llama.cpp Clash Web Anaconda NLTK 顶会 Crawler tqdm uWSGI Base64 transformers 继承 SVR 第一性原理 QWEN Use 公式 Hilton RAR GPT4 Tracking GoogLeNet SQL Google RGB Statistics 算法题 Image2Text Card 签证 Mixtral Python scipy Random FP64 CEIR uwsgi Vmess Plotly PDB CLAP Data Zip Land TTS CSV BeautifulSoup 财报 Michelin ResNet-50 XML C++ Distillation Shortcut PyTorch VPN Baidu 图形思考法
站点统计

本站现有博文321篇,共被浏览768029

本站已经建立2452天!

热门文章
文章归档
回到顶部