EADST

Pytorch Q4_1 Quantize and Dequantize aligning with llama.cpp

Pytorch Q4_1 Quantize and Dequantize aligning with llama.cpp

import torch

# Check if CUDA is available
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

def q4_1_quantize_and_dequantize_tensor(tensor):
    tensor = tensor.to(dtype=torch.float32, device=device)

    # Reshape tensor to process each 4-value block independently
    orig_shape = tensor.shape
    tensor = tensor.view(-1, 32)

    # Find the min and max values per block
    min_vals = torch.min(tensor, dim=1)[0]
    max_vals = torch.max(tensor, dim=1)[0]

    # Calculate scale d for each block
    d = (max_vals - min_vals) / (2**4 - 1)
    d[d == 0] = 1.0  # Prevent division by zero

    # Calculate inverse of d
    ids = 1.0 / d

    # Quantize tensor elements
    quantized_tensors = (tensor - min_vals[:, None]) * ids[:, None]

    # Clamp values to be between 0 and 15 (for 4 bits)
    quantized_tensors = torch.clamp(quantized_tensors + 0.5, 0, 15).to(torch.uint8)

    # Dequantize the tensor
    dequantized_tensors = (quantized_tensors.float() * d[:, None]) + min_vals[:, None]

    # Reshape back to the original shape
    dequantized_tensors = dequantized_tensors.view(orig_shape).to(dtype=torch.float16)

    return dequantized_tensors

# Assuming 'model_part' is already loaded and on CPU
model_part = torch.load(f"your_model_path/pytorch_model.bin", map_location="cpu")
keywords = [
    "embed_tokens.weight",
    "self_attn.q_proj.weight",
    "self_attn.k_proj.weight",
    "self_attn.v_proj.weight",
    "self_attn.o_proj.weight",
    "mlp.up_proj.weight",
    "mlp.gate_proj.weight",
    "mlp.down_proj.weight",
    "lm_head.weight"
]
for name, data in model_part.items():
    for word in keywords:
        if word in name:
            # Quantize and dequantize the entire tensor
            model_part[name] = q4_1_quantize_and_dequantize_tensor(data)

# Save the updated model parts
torch.save(model_part, "pytorch_model_quantized.bin")

Reference:

相关标签
About Me
XD
Goals determine what you are going to be.
Category
标签云
FP8 NameSilo Github SQL Gemma QWEN 飞书 RAR NLTK CV Tensor v2ray Disk Baidu Safetensors CLAP Vim Qwen2.5 搞笑 git NLP Dataset WAN Breakpoint 第一性原理 Image2Text XGBoost Anaconda WebCrawler 签证 Food VGG-16 Numpy PDF VPN Hungarian 算法题 Miniforge Card Math DeepStream TensorRT TTS Sklearn Docker logger Qwen Google 域名 Quantize SAM git-lfs Input Password Llama hf Shortcut ONNX Pandas 多线程 音频 RGB Clash PyCharm Template Conda Streamlit CEIR Web Magnet CAM Interview llama.cpp Website HaggingFace uWSGI Excel FP32 LaTeX 公式 printf Markdown OpenAI Michelin Jetson Plate 报税 Search Cloudreve YOLO Firewall Django Crawler API Pickle tar Random FlashAttention Ptyhon Plotly CC Tracking Mixtral Bert MD5 Vmess Translation CSV ChatGPT SVR torchinfo Bin Transformers UI Data uwsgi 版权 Quantization DeepSeek Algorithm Domain Heatmap Permission scipy TSV Knowledge Tiktoken ResNet-50 Review COCO 顶会 Ubuntu Nginx Jupyter XML 财报 HuggingFace 证件照 Hotel mmap GPT4 FP64 PIP 多进程 C++ v0.dev SPIE AI CTC GIT 净利润 腾讯云 Freesound Use transformers Proxy Datetime 强化学习 BTC Bipartite LoRA 关于博主 Distillation Animate Git Paper 递归学习法 继承 OpenCV GPTQ BF16 LeetCode PDB LLM Color FastAPI UNIX SQLite Attention Video Land Pytorch GGML Claude Linux CUDA Bitcoin Hilton GoogLeNet EXCEL Statistics tqdm Pillow VSCode Agent Python InvalidArgumentError IndexTTS2 Windows Diagram BeautifulSoup Zip Qwen2 图形思考法 FP16 TensorFlow PyTorch ModelScope Base64 LLAMA 阿里云 Paddle JSON Logo Augmentation diffusers OCR
站点统计

本站现有博文319篇,共被浏览751697

本站已经建立2408天!

热门文章
文章归档
回到顶部