in my CNN for image classification, I get a curious loss and I don't know what's wrong. I'm lucky, if you help me to find the failure. Here is a cutout of my print output and at the end there is my code:
Train Epoch: 1 [0/2048 (0%)] Loss: 0.654869
Train Epoch: 1 [64/2048 (3%)] Loss: 0.271722
Train Epoch: 1 [128/2048 (6%)] Loss: 0.001958
Train Epoch: 1 [192/2048 (9%)] Loss: 0.003399
Train Epoch: 1 [256/2048 (12%)] Loss: 0.000000
Train Epoch: 1 [320/2048 (16%)] Loss: 0.006664
Train Epoch: 1 [384/2048 (19%)] Loss: 0.000000
Train Epoch: 1 [448/2048 (22%)] Loss: 0.000000
Train Epoch: 1 [512/2048 (25%)] Loss: 0.000000
Train Epoch: 1 [576/2048 (28%)] Loss: 0.000000
Train Epoch: 2 [0/2048 (0%)] Loss: 173505.656250
Train Epoch: 2 [64/2048 (3%)] Loss: 0.000000
Train Epoch: 2 [128/2048 (6%)] Loss: 0.000000
Train Epoch: 2 [192/2048 (9%)] Loss: 33394.285156
Train Epoch: 2 [256/2048 (12%)] Loss: 0.000000
Train Epoch: 2 [320/2048 (16%)] Loss: 0.000000
Train Epoch: 2 [960/2048 (47%)] Loss: 0.000000
Train Epoch: 2 [1024/2048 (50%)] Loss: 636908.437500
Train Epoch: 2 [1088/2048 (53%)] Loss: 32862667387437056.000000
Train Epoch: 2 [1152/2048 (56%)] Loss: 15723443952412777718762887446528.000000
Train Epoch: 2 [1216/2048 (59%)] Loss: nan
Train Epoch: 2 [1280/2048 (62%)] Loss: nan
Train Epoch: 2 [1344/2048 (66%)] Loss: nan
Train Epoch: 2 [1408/2048 (69%)] Loss: nan
Here, you see code for the training.
def trainM(epoch):
model.train()
for batch_id, (data, target) in enumerate(net.train_data):
target = torch.LongTensor(target[64*batch_id:64*(batch_id+1)])
data = Variable(data)
target = Variable(target)
optimizer.zero_grad()
out = model(data)
criterion = F.nll_loss
loss = criterion(out,target)
loss.backward()
optimizer.step()
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(epoch,batch_id*len(data), len(net.train_data)*64, 100*batch_id/len(net.train_data), loss.item()))
for item in range(1,10):
trainM(item)
That's the code for neural network and the end there is the dataPrep method for data preparation.
train_data = []
target_list = []
class Netz(nn.Module):
def __init__(self):
super(Netz, self).__init__()
self.conv1 = nn.Conv2d(1, 10,kernel_size=5)
self.conv2 = nn.Conv2d(10,20, kernel_size = 5)
self.conv_dropout = nn.Dropout2d()
self.fc1 = nn.Linear(1050,60)
self.fc2 = nn.Linear(60,2)
self.fce = nn.Linear(20,1)
def forward(self,x):
x = self.conv1(x)
x = F.max_pool2d(x, 2)
x = F.relu(x)
x = self.conv2(x)
x = self.conv_dropout(x)
x = F.max_pool2d(x,2)
x = F.relu(x)
x = x.reshape(x.shape[0], x.shape[1], -1)
x = F.relu(self.fc1(x))
x = self.fc2(x)
x = self.fce(x.permute(0,2,1)).squeeze(-1)
return F.log_softmax(x, -1)
def dataPrep(list_of_data, data_path, category, quantity):
global train_data
global target_list
train_data_list = []
transform = transforms.Compose([
transforms.ToTensor(),
])
len_data = len(train_data)
for item in list_of_data:
f = random.choice(list_of_data)
list_of_data.remove(f)
try:
img = Image.open(data_path +f)
except:
continue
img_crop = img.crop((310,60,425,240))
img_tensor = transform(img_crop)
train_data_list.append(img_tensor)
if category == True:
target = 1
else:
target = 0
target_list.append(target)
if len(train_data_list) >=64:
train_data.append((torch.stack(train_data_list), target_list))
train_data_list = []
if (len_data*64 + quantity) <= len(train_data)*64:
break
return list_of_data