EADST

Pytorch Q4_1 Quantize and Dequantize aligning with llama.cpp

Pytorch Q4_1 Quantize and Dequantize aligning with llama.cpp

import torch

# Check if CUDA is available
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

def q4_1_quantize_and_dequantize_tensor(tensor):
    tensor = tensor.to(dtype=torch.float32, device=device)

    # Reshape tensor to process each 4-value block independently
    orig_shape = tensor.shape
    tensor = tensor.view(-1, 32)

    # Find the min and max values per block
    min_vals = torch.min(tensor, dim=1)[0]
    max_vals = torch.max(tensor, dim=1)[0]

    # Calculate scale d for each block
    d = (max_vals - min_vals) / (2**4 - 1)
    d[d == 0] = 1.0  # Prevent division by zero

    # Calculate inverse of d
    ids = 1.0 / d

    # Quantize tensor elements
    quantized_tensors = (tensor - min_vals[:, None]) * ids[:, None]

    # Clamp values to be between 0 and 15 (for 4 bits)
    quantized_tensors = torch.clamp(quantized_tensors + 0.5, 0, 15).to(torch.uint8)

    # Dequantize the tensor
    dequantized_tensors = (quantized_tensors.float() * d[:, None]) + min_vals[:, None]

    # Reshape back to the original shape
    dequantized_tensors = dequantized_tensors.view(orig_shape).to(dtype=torch.float16)

    return dequantized_tensors

# Assuming 'model_part' is already loaded and on CPU
model_part = torch.load(f"your_model_path/pytorch_model.bin", map_location="cpu")
keywords = [
    "embed_tokens.weight",
    "self_attn.q_proj.weight",
    "self_attn.k_proj.weight",
    "self_attn.v_proj.weight",
    "self_attn.o_proj.weight",
    "mlp.up_proj.weight",
    "mlp.gate_proj.weight",
    "mlp.down_proj.weight",
    "lm_head.weight"
]
for name, data in model_part.items():
    for word in keywords:
        if word in name:
            # Quantize and dequantize the entire tensor
            model_part[name] = q4_1_quantize_and_dequantize_tensor(data)

# Save the updated model parts
torch.save(model_part, "pytorch_model_quantized.bin")

Reference:

相关标签
About Me
XD
Goals determine what you are going to be.
Category
标签云
API Video logger Ptyhon PDF uwsgi Dataset Claude Image2Text QWEN Linux SPIE Statistics LLM Bipartite 继承 关于博主 Zip Python Land Permission 证件照 Random Math VGG-16 scipy DeepStream Llama Pickle IndexTTS2 视频信息 llama.cpp Windows Use Pandas Website Baidu Review Docker Markdown Bin TSV 财报 腾讯云 Template transformers JSON FP32 Hungarian Jupyter Input Attention GoogLeNet Food Tracking PIP HaggingFace 算法题 Streamlit Vim v0.dev 报税 NLTK Pytorch Cloudreve Anaconda EXCEL UNIX Bitcoin CUDA Sklearn Distillation CEIR Safetensors 域名 CSV Transformers Clash GPT4 Hotel tar torchinfo Ubuntu SQL hf Proxy Crawler Domain MD5 AI CLAP Translation Magnet FP64 NLP Card Interview LaTeX Heatmap 搞笑 Animate Data printf Color XML Tiktoken RAR BF16 Plate FastAPI PyCharm NameSilo Tensor CAM LLAMA git-lfs YOLO LoRA Datetime Disk Knowledge VPN BeautifulSoup C++ Qwen CV ResNet-50 TensorRT Algorithm UI Numpy HuggingFace 版权 Nginx Qwen2.5 WAN 阿里云 v2ray Pillow 净利润 Google uWSGI DeepSeek FP8 RGB Jetson GIT XGBoost Diagram Gemma VSCode COCO Vmess FlashAttention Logo FP16 ModelScope Quantize OpenCV git 多线程 CTC OpenAI 飞书 签证 Web InvalidArgumentError Mixtral Django TensorFlow Conda Paddle tqdm 多进程 mmap Miniforge TTS SVR ChatGPT Base64 公式 Quantization BTC Github Freesound Qwen2 ONNX Password Hilton WebCrawler Firewall SQLite Michelin SAM OCR 音频 Git Excel Breakpoint GGML Augmentation Shortcut GPTQ LeetCode PyTorch Bert PDB Plotly diffusers Paper CC
站点统计

本站现有博文311篇,共被浏览739981

本站已经建立2376天!

热门文章
文章归档
回到顶部