I have a model that I have trained, and I am testing it by running it (on .eval()
mode).
Here are the exact lines and order that I've executed in debugger:
(Pdb) p feature
tensor([[[ -4.0563, -3.8415, -3.9542, ..., -14.8424, -14.9201, -14.8960],
[ -5.8481, -2.0405, -2.4438, ..., -19.6938, -19.4901, -19.9180],
[ -5.2424, -1.2804, -1.5109, ..., -19.3892, -19.4397, -19.5012],
...,
[ -6.4756, -2.0376, -2.0894, ..., -20.0942, -19.9635, -19.8762],
[ -6.5087, -2.0452, -1.9018, ..., -19.7127, -19.8574, -20.0103],
[ -7.0725, -4.2817, -3.3231, ..., -16.7170, -16.9004, -17.0333]]])
(Pdb) p feature2
tensor([[[ -4.0563, -3.8415, -3.9542, ..., -14.8424, -14.9201, -14.8960],
[ -5.8481, -2.0405, -2.4438, ..., -19.6938, -19.4901, -19.9180],
[ -5.2424, -1.2804, -1.5109, ..., -19.3892, -19.4397, -19.5012],
...,
[ -6.4756, -2.0376, -2.0894, ..., -20.0942, -19.9635, -19.8762],
[ -6.5087, -2.0452, -1.9018, ..., -19.7127, -19.8574, -20.0103],
[ -7.0725, -4.2817, -3.3231, ..., -16.7170, -16.9004, -17.0333]]])
(Pdb) torch.all(feature == feature2)
tensor(True)
(Pdb) prediction_tag, prediction_time = model(feature)
(Pdb) prediction_tag2, prediction_time2 = model(feature2)
(Pdb) prediction_time
tensor([[[9.6584e-06, 3.9059e-05, 4.0984e-06, ..., 1.7644e-04,
1.0589e-02, 4.4167e-06],
[9.6584e-06, 3.9059e-05, 4.0984e-06, ..., 1.7644e-04,
1.0589e-02, 4.4167e-06],
[9.3993e-06, 3.7754e-05, 3.9786e-06, ..., 1.7362e-04,
1.0243e-02, 4.2382e-06],
...,
[7.8885e-06, 1.1077e-05, 3.8594e-06, ..., 1.9443e-04,
3.8032e-03, 6.6878e-06],
[8.0696e-06, 1.1217e-05, 3.9580e-06, ..., 2.0004e-04,
3.7598e-03, 6.8072e-06],
[8.0696e-06, 1.1217e-05, 3.9580e-06, ..., 2.0004e-04,
3.7598e-03, 6.8072e-06]]])
(Pdb) p prediction_time2
tensor([[[8.0289e-07, 2.0557e-05, 2.5803e-05, ..., 3.3225e-04,
4.4547e-03, 8.4192e-06],
[8.0289e-07, 2.0557e-05, 2.5803e-05, ..., 3.3225e-04,
4.4547e-03, 8.4192e-06],
[7.6509e-07, 1.9805e-05, 2.4918e-05, ..., 3.2385e-04,
4.3618e-03, 7.9963e-06],
...,
[7.3927e-07, 8.7688e-06, 1.8454e-05, ..., 1.9831e-04,
1.9305e-03, 6.2879e-06],
[7.7376e-07, 8.8673e-06, 1.8517e-05, ..., 2.0194e-04,
1.8297e-03, 6.3183e-06],
[7.7376e-07, 8.8673e-06, 1.8517e-05, ..., 2.0194e-04,
1.8297e-03, 6.3183e-06]]])
(Pdb) torch.all(prediction_time == prediction_time2)
tensor(False)
As you can see, even though feature
and feature2
are seemingly the same inputs, the outputs by the model do not match. This isn't random either, as after I have executed those lines above and running these lines below:
(Pdb) prediction_tag, prediction_time = model(feature)
(Pdb) prediction_time
tensor([[[9.6584e-06, 3.9059e-05, 4.0984e-06, ..., 1.7644e-04,
1.0589e-02, 4.4167e-06],
[9.6584e-06, 3.9059e-05, 4.0984e-06, ..., 1.7644e-04,
1.0589e-02, 4.4167e-06],
[9.3993e-06, 3.7754e-05, 3.9786e-06, ..., 1.7362e-04,
1.0243e-02, 4.2382e-06],
...,
[7.8885e-06, 1.1077e-05, 3.8594e-06, ..., 1.9443e-04,
3.8032e-03, 6.6878e-06],
[8.0696e-06, 1.1217e-05, 3.9580e-06, ..., 2.0004e-04,
3.7598e-03, 6.8072e-06],
[8.0696e-06, 1.1217e-05, 3.9580e-06, ..., 2.0004e-04,
3.7598e-03, 6.8072e-06]]])
(Pdb) prediction_tag2, prediction_time2 = model(feature2)
(Pdb) prediction_time2
tensor([[[8.0289e-07, 2.0557e-05, 2.5803e-05, ..., 3.3225e-04,
4.4547e-03, 8.4192e-06],
[8.0289e-07, 2.0557e-05, 2.5803e-05, ..., 3.3225e-04,
4.4547e-03, 8.4192e-06],
[7.6509e-07, 1.9805e-05, 2.4918e-05, ..., 3.2385e-04,
4.3618e-03, 7.9963e-06],
...,
[7.3927e-07, 8.7688e-06, 1.8454e-05, ..., 1.9831e-04,
1.9305e-03, 6.2879e-06],
[7.7376e-07, 8.8673e-06, 1.8517e-05, ..., 2.0194e-04,
1.8297e-03, 6.3183e-06],
[7.7376e-07, 8.8673e-06, 1.8517e-05, ..., 2.0194e-04,
1.8297e-03, 6.3183e-06]]])
I get the same, different outputs. Why am I experiencing this issue? I am totally confused.
Notes: I have checked that both feature
and feature2
have dtypes of torch.float32
. feature
was extracted from a torch DataLoader that was set up, while feature2
was directly obtained from reading a file.
EDIT: Here is how the model was constructed:
class CRNN(nn.Module):
def __init__(self, inputdim, outputdim, **kwargs):
super().__init__()
features = nn.ModuleList()
self.features = nn.Sequential(
Block2D(1, 32),
nn.LPPool2d(4, (2, 4)),
Block2D(32, 128),
Block2D(128, 128),
nn.LPPool2d(4, (2, 4)),
Block2D(128, 128),
Block2D(128, 128),
nn.LPPool2d(4, (1, 4)),
nn.Dropout(0.3),
)
with torch.no_grad():
rnn_input_dim = self.features(torch.randn(1, 1, 500,
inputdim)).shape
rnn_input_dim = rnn_input_dim[1] * rnn_input_dim[-1]
self.gru = nn.GRU(rnn_input_dim,
128,
bidirectional=True,
batch_first=True)
self.temp_pool = parse_poolingfunction(kwargs.get(
'temppool', 'linear'),
inputdim=256,
outputdim=outputdim)
self.outputlayer = nn.Linear(256, outputdim)
self.features.apply(init_weights)
self.outputlayer.apply(init_weights)
def forward(self, x):
batch, time, dim = x.shape
x = x.unsqueeze(1)
x = self.features(x)
x = x.transpose(1, 2).contiguous().flatten(-2)
x, _ = self.gru(x)
decision_time = torch.sigmoid(self.outputlayer(x)).clamp(1e-7, 1.)
decision_time = torch.nn.functional.interpolate(
decision_time.transpose(1, 2),
time,
mode='linear',
align_corners=False).transpose(1, 2)
decision = self.temp_pool(x, decision_time).clamp(1e-7, 1.).squeeze(1)
return decision, decision_time
def crnn(inputdim=64, outputdim=527, pretrained_file='gpv_f'):
model = CRNN(inputdim, outputdim)
if pretrained_file:
state = torch.load(Path(__file__).parent / pretrained_file,
map_location='cpu')
model.load_state_dict(state, strict=True)
return model
with the following helpers:
class Block2D(nn.Module):
def __init__(self, cin, cout, kernel_size=3, padding=1):
super().__init__()
self.block = nn.Sequential(
nn.BatchNorm2d(cin),
nn.Conv2d(cin,
cout,
kernel_size=kernel_size,
padding=padding,
bias=False),
nn.LeakyReLU(inplace=True, negative_slope=0.1))
def forward(self, x):
return self.block(x)
def init_weights(m):
if isinstance(m, (nn.Conv2d, nn.Conv1d)):
nn.init.kaiming_normal_(m.weight)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
nn.init.constant_(m.weight, 1)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
if isinstance(m, nn.Linear):
nn.init.kaiming_uniform_(m.weight)
if m.bias is not None:
nn.init.constant_(m.bias, 0)
class LinearSoftPool(nn.Module):
"""LinearSoftPool
Linear softmax, takes logits and returns a probability, near to the actual maximum value.
Taken from the paper:
A Comparison of Five Multiple Instance Learning Pooling Functions for Sound Event Detection with Weak Labeling
https://arxiv.org/abs/1810.09050
"""
def __init__(self, pooldim=1):
super().__init__()
self.pooldim = pooldim
def forward(self, logits, time_decision):
return (time_decision**2).sum(self.pooldim) / time_decision.sum(
self.pooldim)
class MeanPool(nn.Module):
def __init__(self, pooldim=1):
super().__init__()
self.pooldim = pooldim
def forward(self, logits, decision):
return torch.mean(decision, dim=self.pooldim)
def parse_poolingfunction(poolingfunction_name='mean', **kwargs):
"""parse_poolingfunction
A heler function to parse any temporal pooling
Pooling is done on dimension 1
:param poolingfunction_name:
:param **kwargs:
"""
poolingfunction_name = poolingfunction_name.lower()
if poolingfunction_name == 'mean':
return MeanPool(pooldim=1)
elif poolingfunction_name == 'linear':
return LinearSoftPool(pooldim=1)