EADST

Pytorch Q4_1 Quantize and Dequantize aligning with llama.cpp

Pytorch Q4_1 Quantize and Dequantize aligning with llama.cpp

import torch

# Check if CUDA is available
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

def q4_1_quantize_and_dequantize_tensor(tensor):
    tensor = tensor.to(dtype=torch.float32, device=device)

    # Reshape tensor to process each 4-value block independently
    orig_shape = tensor.shape
    tensor = tensor.view(-1, 32)

    # Find the min and max values per block
    min_vals = torch.min(tensor, dim=1)[0]
    max_vals = torch.max(tensor, dim=1)[0]

    # Calculate scale d for each block
    d = (max_vals - min_vals) / (2**4 - 1)
    d[d == 0] = 1.0  # Prevent division by zero

    # Calculate inverse of d
    ids = 1.0 / d

    # Quantize tensor elements
    quantized_tensors = (tensor - min_vals[:, None]) * ids[:, None]

    # Clamp values to be between 0 and 15 (for 4 bits)
    quantized_tensors = torch.clamp(quantized_tensors + 0.5, 0, 15).to(torch.uint8)

    # Dequantize the tensor
    dequantized_tensors = (quantized_tensors.float() * d[:, None]) + min_vals[:, None]

    # Reshape back to the original shape
    dequantized_tensors = dequantized_tensors.view(orig_shape).to(dtype=torch.float16)

    return dequantized_tensors

# Assuming 'model_part' is already loaded and on CPU
model_part = torch.load(f"your_model_path/pytorch_model.bin", map_location="cpu")
keywords = [
    "embed_tokens.weight",
    "self_attn.q_proj.weight",
    "self_attn.k_proj.weight",
    "self_attn.v_proj.weight",
    "self_attn.o_proj.weight",
    "mlp.up_proj.weight",
    "mlp.gate_proj.weight",
    "mlp.down_proj.weight",
    "lm_head.weight"
]
for name, data in model_part.items():
    for word in keywords:
        if word in name:
            # Quantize and dequantize the entire tensor
            model_part[name] = q4_1_quantize_and_dequantize_tensor(data)

# Save the updated model parts
torch.save(model_part, "pytorch_model_quantized.bin")

Reference:

相关标签
About Me
XD
Goals determine what you are going to be.
Category
标签云
Plate Tracking Knowledge Paddle PDB BF16 Diagram Translation BeautifulSoup InvalidArgumentError Attention 版权 Linux Zip MD5 Excel WAN git-lfs IndexTTS2 Conda NLP Nginx ONNX Pytorch printf Distillation Image2Text Land 搞笑 FP8 Jupyter Pickle UI Plotly VGG-16 Baidu 算法题 GPT4 v2ray icon 多线程 Interview Domain CTC Django Heatmap AI Bert TensorRT Random Qwen Bipartite Github FlashAttention Freesound HuggingFace Pandas Pillow Tensor GPTQ FP64 域名 JSON Video GGML 签证 Claude GIT 净利润 LoRA Magnet PyTorch LLAMA torchinfo Jetson Datetime NameSilo SPIE Anaconda 腾讯云 Statistics GoogLeNet OpenCV transformers hf Rebuttal uwsgi Data TSV Quantize C++ Clash PyCharm 顶会 Markdown scipy VPN WebCrawler Qwen2.5 CUDA Hotel Card TTS Use 强化学习 云服务器 FP32 财报 XML LLM Input Streamlit 报税 mmap v0.dev 音频 llama.cpp QWEN UNIX 第一性原理 Llama ModelScope ChatGPT XGBoost CAM HaggingFace LeetCode 递归学习法 Cloudreve TensorFlow Mixtral Tiktoken Vmess Color ResNet-50 PDF Transformers diffusers Firewall VSCode Bitcoin Qwen2 Crawler SQLite 公式 Miniforge Vim 关于博主 Base64 Food Safetensors tqdm LaTeX uWSGI API Gemma EXCEL Agent COCO RAR 证件照 CLAP CV Website 图形思考法 CC OpenAI CSV DeepStream Windows 阿里云 Disk Web NLTK Shortcut Python YOLO BTC Hungarian Logo RGB OCR Sklearn Ptyhon Permission tar Bin Michelin Template SVR Google 飞书 Math Proxy SAM 图标 git DeepSeek Password PIP Hilton Git Docker Augmentation SQL Review 继承 Animate FastAPI Numpy Search logger FP16 CEIR Paper 多进程 Ubuntu Algorithm Breakpoint News Dataset Quantization
站点统计

本站现有博文323篇,共被浏览801170

本站已经建立2500天!

热门文章
文章归档
回到顶部