EADST

Pytorch Q4_1 Quantize and Dequantize aligning with llama.cpp

Pytorch Q4_1 Quantize and Dequantize aligning with llama.cpp

import torch

# Check if CUDA is available
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

def q4_1_quantize_and_dequantize_tensor(tensor):
    tensor = tensor.to(dtype=torch.float32, device=device)

    # Reshape tensor to process each 4-value block independently
    orig_shape = tensor.shape
    tensor = tensor.view(-1, 32)

    # Find the min and max values per block
    min_vals = torch.min(tensor, dim=1)[0]
    max_vals = torch.max(tensor, dim=1)[0]

    # Calculate scale d for each block
    d = (max_vals - min_vals) / (2**4 - 1)
    d[d == 0] = 1.0  # Prevent division by zero

    # Calculate inverse of d
    ids = 1.0 / d

    # Quantize tensor elements
    quantized_tensors = (tensor - min_vals[:, None]) * ids[:, None]

    # Clamp values to be between 0 and 15 (for 4 bits)
    quantized_tensors = torch.clamp(quantized_tensors + 0.5, 0, 15).to(torch.uint8)

    # Dequantize the tensor
    dequantized_tensors = (quantized_tensors.float() * d[:, None]) + min_vals[:, None]

    # Reshape back to the original shape
    dequantized_tensors = dequantized_tensors.view(orig_shape).to(dtype=torch.float16)

    return dequantized_tensors

# Assuming 'model_part' is already loaded and on CPU
model_part = torch.load(f"your_model_path/pytorch_model.bin", map_location="cpu")
keywords = [
    "embed_tokens.weight",
    "self_attn.q_proj.weight",
    "self_attn.k_proj.weight",
    "self_attn.v_proj.weight",
    "self_attn.o_proj.weight",
    "mlp.up_proj.weight",
    "mlp.gate_proj.weight",
    "mlp.down_proj.weight",
    "lm_head.weight"
]
for name, data in model_part.items():
    for word in keywords:
        if word in name:
            # Quantize and dequantize the entire tensor
            model_part[name] = q4_1_quantize_and_dequantize_tensor(data)

# Save the updated model parts
torch.save(model_part, "pytorch_model_quantized.bin")

Reference:

相关标签
About Me
XD
Goals determine what you are going to be.
Category
标签云
Windows InvalidArgumentError GPTQ Interview Distillation Pillow Qwen2.5 Docker 财报 Tracking Paper Bitcoin 净利润 Password git-lfs YOLO SQLite Statistics Diagram Baidu Github v0.dev 公式 Markdown LoRA Ubuntu LeetCode Llama v2ray NLP Clash Random Animate SQL Firewall Qwen2 Qwen NLTK SVR JSON UI transformers ModelScope Knowledge Tiktoken AI Django Zip uWSGI Color Magnet Anaconda Breakpoint API FP32 Cloudreve UNIX RAR Bert torchinfo Attention Website Transformers Claude logger Jetson MD5 Input Base64 PyCharm 版权 Quantize BTC ONNX 音频 Use TSV diffusers Proxy Pandas Gemma GPT4 GoogLeNet Card Data Vim Google CTC TensorFlow GGML Bipartite WebCrawler Shortcut FP16 Safetensors tqdm git OpenCV HuggingFace Bin 关于博主 签证 Translation BeautifulSoup OCR Excel OpenAI Freesound Crawler VSCode TensorRT C++ Numpy CEIR 域名 hf 腾讯云 GIT COCO LLM Miniforge PDF WAN Logo PyTorch FP8 Python Pytorch TTS 飞书 搞笑 Template Paddle 多线程 Hotel NameSilo CAM Permission Quantization Agent Datetime Augmentation HaggingFace SAM XGBoost LaTeX tar Domain Hungarian Jupyter ResNet-50 Dataset mmap BF16 Pickle SPIE VGG-16 Food Algorithm LLAMA DeepSeek Mixtral Plotly Disk 证件照 VPN Heatmap printf CV 继承 Review Nginx Git Hilton CSV Linux EXCEL 阿里云 Conda QWEN Sklearn PDB llama.cpp CUDA IndexTTS2 Video PIP Michelin 多进程 Streamlit CC 算法题 Web 报税 XML RGB Image2Text ChatGPT Vmess CLAP DeepStream uwsgi Tensor FastAPI Plate Math Ptyhon Land FlashAttention scipy FP64
站点统计

本站现有博文312篇,共被浏览744337

本站已经建立2387天!

热门文章
文章归档
回到顶部