EDIT: attaching some code to help generate similar results (appended at end)
I have a really small model with architecture [2, 3, 6]
where the hidden layer uses ReLU and it's a softmax activation for multiclass classification. Trained offline and statically quantized later to qint8. What I would like to do now is extract the weights so I can use them on other hardware via matrix multiplication/addition. The problem I'm encountering is it doesn't seem to behave as expected. Take for instance this GraphModule output of state_dict():
OrderedDict([('input_layer_input_scale_0', tensor(0.0039)),
('input_layer_input_zero_point_0', tensor(0)),
('input_layer.scale', tensor(0.0297)),
('input_layer.zero_point', tensor(0)),
('input_layer._packed_params.dtype', torch.qint8),
('input_layer._packed_params._packed_params',
(tensor([[-0.1180, 0.1180],
[-0.2949, -0.5308],
[-3.3029, -7.5496]], size=(3, 2), dtype=torch.qint8,
quantization_scheme=torch.per_tensor_affine, scale=0.05898105353116989,
zero_point=0),
Parameter containing:
tensor([-0.4747, -0.3563, 7.7603], requires_grad=True))),
('out.scale', tensor(1.5963)),
('out.zero_point', tensor(243)),
('out._packed_params.dtype', torch.qint8),
('out._packed_params._packed_params',
(tensor([[ 0.4365, 0.4365, -55.4356],
[ 0.4365, 0.0000, 1.3095],
[ 0.4365, 0.0000, -13.9680],
[ 0.4365, -0.4365, 4.3650],
[ 0.4365, 0.4365, -3.0555],
[ 0.4365, 0.0000, -1.3095],
[ 0.4365, 0.0000, 3.0555]], size=(7, 3), dtype=torch.qint8,
quantization_scheme=torch.per_tensor_affine, scale=0.43650051951408386,
zero_point=0),
Parameter containing:
tensor([ 19.2761, -1.0785, 14.2602, -22.3171, 10.1059, 7.2197, -11.7253],
requires_grad=True)))])
If I directly access the weights the way I think I should like so:
input_weights = np.array(
[[-0.1180, 0.1180],
[-0.2949, -0.5308],
[-3.3029, -7.5496]])
inputs_scale = 0.05898105353116989
inputs_zero_point = 0
W1=np.clip(np.round(input_weights/inputs_scale+ inputs_zero_scale), -127, 128)
b1=np.clip(np.round(np.array([-0.4747, -0.3563, 7.7603])/inputs_scale + inputs_zer_scale), -127, 128)
output_weights = np.array(
[[ 0.4365, 0.4365, -55.4356],
[ 0.4365, 0.0000, 1.3095],
[ 0.4365, 0.0000, -13.9680],
[ 0.4365, -0.4365, 4.3650],
[ 0.4365, 0.4365, -3.0555],
[ 0.4365, 0.0000, -1.3095],
[ 0.4365, 0.0000, 3.0555]])
outputs_scale=0.43650051951408386
outputs_zero_point=0
W1=np.clip(np.round(output_weights/outputs_scale+ outputs_zero_scale), -127, 128)
W2=np.clip(np.round(np.array([ 19.2761, -1.0785, 14.2602, -22.3171, 10.1059, 7.2197, -11.7253])/outputs_scale + outputs_zero_scale), -127, 128)
And then I give it some data:
inputs = np.array(
[[1. , 1. ], # class 0 example
[1. , 0. ], # class 1 example
[0. , 1. ],
[0. , 0. ],
[0. , 0.9 ],
[0. , 0.75],
[0. , 0.25]]) # class 6 example
Where each row is an example, then I would expect to be able to do matrix multiplication and argmax over the rows to get the result. However, doing that gives me this:
>>> (ReLU((inputs @ W1.T) + b1) @ W2.T + b2).argmax(axis=0)
array([0, 3, 0, 3, 0, 0, 3])
which is not right. And when I test accuracy of the quantized model in pytorch it's high enough that it should get all examples correct here. So what am I misunderstanding in terms of accessing these weights/bias?
EDIT: adding code to help people mess around with quantization. Now technically it doesn't matter how this code is generated - an OrderedDict of the quantized model will remain similar. If you want to mess around with it, here is some code to generate a model and quantize it on the XOR problem. Note that I'm using a multiclass classification still to help stick to my original model. Anyway.... here you go...
import torch
import torch.nn as nn
import random
import copy
import numpy as np
import tensorflow as tf
import torch.nn.functional as F
from torch.ao.quantization.quantize_fx import prepare_fx, convert_fx
from torch.utils.data import DataLoader, TensorDataset
from pytorch_lightning.callbacks.progress import RichProgressBar
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
import pytorch_lightning as pl
class XORModel(nn.Module):
def __init__(self, h: int):
super().__init__()
self.input_layer = nn.Linear(2, h)
self.out = nn.Linear(h, 2)
def forward(self, x):
out = self.input_layer(x)
out = F.relu(out)
out = self.out(out)
return out
class LitModel(pl.LightningModule):
def __init__(self, model: XORModel):
super().__init__()
self.model = model
def forward(self, x):
return self.model(x)
def _generic_step(self, batch, batch_idx, calc_metric: bool = False):
x, y = batch
out = self.model(x)
if calc_metric:
with torch.no_grad():
soft = F.softmax(out, dim=-1)
metric = (soft.argmax(-1).ravel() == y.ravel()).float().mean()
self.log('Accuracy', metric, prog_bar=True)
loss = F.cross_entropy(out, y)
return loss
def training_step(self, batch, batch_idx):
loss = self._generic_step(batch, batch_idx)
self.log('train_loss', loss, prog_bar=True)
return loss
def validation_step(self, batch, batch_idx):
loss = self._generic_step(batch, batch_idx, calc_metric=True)
self.log('val_loss', loss, prog_bar=True)
return loss
def configure_optimizers(self):
return torch.optim.Adam(self.model.parameters())
def get_accuracy(model: XORModel, seed: int):
dataset = make_dataset(1000, 1000, False, seed)
model.eval()
ret = []
with torch.no_grad():
for X, y in dataset:
out = F.softmax(model(X), dim=-1).argmax(-1)
ret.append((out.cpu().numpy() == y.numpy()).mean())
model.train()
return np.array(ret).mean()
def make_dataset(samples: int, batch_size: int, shuffle: bool, seed: int):
inputs, outputs = [], []
rng = random.Random(seed)
for _ in range(samples):
x0 = rng.randint(0, 1)
x1 = rng.randint(0, 1)
y = x0 ^ x1
inputs.append((x0, x1))
outputs.append(y)
dataset = TensorDataset(torch.tensor(inputs, dtype=torch.float), torch.tensor(outputs, dtype=torch.long))
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)
return dataloader
def quantize_model(model: XORModel):
model_to_quantize = copy.deepcopy(model)
model_to_quantize.eval()
def calibrate(m, data_loader):
m.eval()
with torch.no_grad():
for x in data_loader:
m(x)
loader = make_dataset(1000, 1000, False, 0x42)
sample_inputs = next(iter(loader))[0]
qconfig_dict = {'': torch.quantization.get_default_qconfig('fbgemm')}
prepared_model = prepare_fx(model, qconfig_dict)
calibrate(prepared_model, sample_inputs)
quantized_model = convert_fx(prepared_model)
return quantized_model
if __name__ == '__main__':
train_dataset = make_dataset(10_000, 256, True, 123456)
val_dataset = make_dataset(500, 64, True, 0xabcd)
test_dataset = make_dataset(1000, 1000, False, 0x1122)
model = XORModel(3)
lit_model = LitModel(model)
trainer = pl.Trainer(accelerator='cpu', max_epochs=100,
callbacks=[
RichProgressBar(refresh_rate=50),
EarlyStopping(monitor='val_loss', mode='min', patience=3)
])
trainer.fit(lit_model, train_dataset, val_dataset)
qmodel = quantize_model(lit_model.model)
print('accuracy of model', get_accuracy(model, 0xbeef)) # prints 1
print('accuray of qmodel', get_accuracy(qmodel, 0xbeef)) # prints 1
Now assuming you save off the qmodel for later, you can look at the parameters similar to how I do by calling qmodel.state_dict()