I was trying to train a CNN model with CIFAR-10 dataset and while the training I noticed the accuracy is stuck at 10% from the start. My best guess is that the accuracy is not properly updating, but I still can't figure out why this is happening. Below is the part of my code where I defined,trained and found accuracy of my CNN model.
# Define CNN
class CNNModel_b1(nn.Module):
def __init__(self):
super(CNNModel_b1, self).__init__()
# Layer 1: Conv2d
self.conv1 = nn.Conv2d(3,6,5)
# Layer 2: ReLU
self.relu2 = nn.ReLU()
# Layer 3: Conv2d
self.conv3 = nn.Conv2d(6,16,3)
# Layer 4: ReLU
self.relu4 = nn.ReLU()
# Layer 4b: Maxpooling
self.maxpool4b = nn.MaxPool2d(kernel_size=2)
# Layer 5: Conv2d
self.conv5 = nn.Conv2d(16,24,3)
# Layer 6: ReLU
self.relu6 = nn.ReLU()
# Layer 7: Linear (fully connected)
self.fc7 = nn.Linear(2904,120)
# Layer 8: ReLU
self.relu8 = nn.ReLU()
# Layer 9: Linear (fully connected)
self.fc9 = nn.Linear(120,84)
# Layer 10: ReLU
self.relu10 = nn.ReLU()
# Layer 11: Linear (fully connected)
self.fc11 = nn.Linear(84,10)
def forward(self,x): # Giving an argument that decides whether to plot feature map.
global out
# Layer 1: Conv2d
out = self.conv1(x)
# Layer 2: ReLU
out = self.relu2(out)
# Layer 3: Conv2d
out = self.conv3(out)
# Layer 4: ReLU
out = self.relu4(out)
# Layer 4b: Maxpooling
out = self.maxpool4b(out)
# Visualize the first channel of the feature map
# Uncomment below 3 lines to answer Part b - 1, comment for training
# plt.imshow(out[0][0].cpu().detach().numpy())
# plt.show()
# plt.close('all')
# Layer 5: Conv2d
out = self.conv5(out)
# Layer 6: ReLU
out = self.relu6(out)
# Convert the output tensor into a 1D vector
out = out.view(out.size(0), -1)
# Layer 7: Linear (fully connected)
out = self.fc7(out)
# Layer 8: ReLU
out = self.relu8(out)
# Layer 9: Linear (fully connected)
out = self.fc9(out)
# Layer 10: ReLU
out = self.relu10(out)
# Layer 11: Linear (fully connected)
out = self.fc11(out)
return out
# Create CNN
device = "cuda" if torch.cuda.is_available() else "cpu"
model_b1 = CNNModel_b1()
model_b1.to(device)
# TODO: define Cross Entropy Loss
cross_ent = nn.CrossEntropyLoss()
# TODO: create Adam Optimizer and define your hyperparameters
# Use L2 penalty of 1e-8
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3, weight_decay = 1e-8)
# Train CNN
count = 0
loss_list_b1 = []
iteration_list = []
accuracy_list_b1 = []
num_epochs = 20
for epoch in tqdm(range(num_epochs)):
model_b1.train()
for i, (images, labels) in enumerate(train_loader):
images, labels = images.to(device), labels.to(device)
# Clear gradients
optimizer.zero_grad()
# TODO: Forward propagation
out = model_b1(images)
# TODO: Calculate softmax and cross entropy loss
loss = cross_ent(out,labels)
# Backpropagate your Loss
loss.backward()
# Update CNN model_b1
optimizer.step()
count += 1
if count % 50 == 0:
model_b1.eval()
# Calculate Accuracy
correct = 0
total = 0
# Iterate through test dataset
for images, labels in test_loader:
images, labels = images.to(device), labels.to(device)
# Forward propagation
outputs = model_b1(images)
# Get predictions from the maximum value
predicted = torch.argmax(outputs,1)
# Total number of labels
total += len(labels)
correct += (predicted == labels).sum()
accuracy = 100 * correct / float(total)
# store loss and iteration
loss_list_b1.append(loss.item())
iteration_list.append(count)
accuracy_list_b1.append(accuracy.item())
if count % 500 == 0:
# Print Loss
print('Iteration: {} Loss: {} Accuracy: {} %'.format(count, loss.data, accuracy))