我一直试图对一个密集的模型进行量化,但没有成功。我一直在尝试实现pytorch后的训练静态量化。Pytorch有其他型号的量化版本,但没有用于密度集的版本。是否可以量化密集的体系结构。
我一直在寻找如何将量化应用于预先训练的模型的教程,但我没有取得任何成功。
发布于 2022-12-01 12:24:52
下面是如何在torchvision的DenseNet169上这样做:
from torch.ao.quantization import QuantStub, DeQuantStub
from torch import nn
from torchvision.models import densenet169, DenseNet169_Weights
from tqdm import tqdm
from torch.ao.quantization import HistogramObserver, PerChannelMinMaxObserver
import torch
# Wrap base model with quant/dequant stub
class QuantizedDenseNet169(nn.Module):
def __init__(self):
super().__init__()
self.dn = densenet169(weights=DenseNet169_Weights.IMAGENET1K_V1)
self.quant = QuantStub()
self.dequant = DeQuantStub()
def forward(self, x):
x = self.quant(x)
x = self.dn(x)
return self.dequant(x)
dn = QuantizedDenseNet169()
# move to gpu
dn.cuda()
# Propagate qconfig
dn.qconfig = torch.quantization.QConfig(
activation=HistogramObserver.with_args(),
weight=PerChannelMinMaxObserver.with_args(dtype=torch.qint8)
)
# fbgemm for x86 architecture
torch.backends.quantized.engine = 'fbgemm'
dn = torch.quantization.prepare(dn, inplace=False)
# calibrate with own dataset (I'm using random inputs to show process)
with torch.no_grad():
for _ in tqdm(range(5), desc="PTQ progess"):
input_ = torch.randn([1, 3, 128, 128], device='cuda')
dn.forward(input_)
# move to cpu before quantization
dn.cpu()
dn = torch.quantization.convert(dn, inplace=False)
# check if it's working
out = dn(torch.randn([1, 3, 128, 128]))https://stackoverflow.com/questions/74612146
复制相似问题