First the setup:
I have the following simple model architecture:
class Lin2Prop(Module):
def __init__(self):
super().__init__()
# Property A
self.al1 = Linear(159, 32)
self.al2 = Linear(32, 2)
# Property B
#self.bl1 = Linear(159, 32)
#self.bl2 = Linear(32, 1)
def forward(self, x):
#Property A prediction layers
a_l1 = self.al1(x)
a_l1 = LeakyReLU(negative_slope=0.2)(a_l1)
a_l2 = self.al2(a_l1)
#Property B prediction layers
b_l1 = self.bl1(x)
b_l1 = LeakyReLU(negative_slope=0.2)(b_l1)
b_l2 = self.b2(b_l1)
return a_l1, b_l2
And this is how I train the model:
model = Lin2Prop()
model = model.to('cuda')
loss_fn = MSELoss()
optimizer = Adam(model.parameters(), lr=0.01)
all_losses = []
for epoch in range(1,50):
total_loss = 0
for x, y in tqdm(train_1_dl):
print(x.shape, y.shape)
x = x.to('cuda')
y = y.to('cuda')
model.train()
optimizer.zero_grad()
a_pred, b_pred = model(x)
a_loss = loss_fn(a_pred, y[:,:0])
b_loss = loss_fn(b_pred, y[:,:1])
loss = a_loss + b_loss
total_loss += loss.item()
loss.backward(retain_graph=True)
optimizer.step()
all_losses.append(loss.detach().cpu().numpy())
I thought that this is a simple network but I am getting this error:
---------------------------------------------------------------------------
---> 21 loss.backward(retain_graph=True)
22 optimizer.step()
23 all_losses.append(loss.detach().cpu().numpy())
~/anaconda3/envs/alpha-ren/lib/python3.7/site-packages/torch/_tensor.py inbackward(self, gradient, retain_graph, create_graph, inputs)
487 )
488 torch.autograd.backward(
--> 489 self, gradient, retain_graph, create_graph, inputs=inputs
490 )
491
~/anaconda3/envs/alpha-ren/lib/python3.7/site-packages/torch/autograd/__init__.py in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
197 Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
198 tensors, grad_tensors_, retain_graph, create_graph, inputs,
--> 199 allow_unreachable=True, accumulate_grad=True) # Calls into the C++ engine to run the backward pass
200
201 def grad(
RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.
I am very aware of topics like these:
- Pytorch - RuntimeError: Trying to backward through the graph a second time, but the buffers have already been freed
- Trying to backward through the graph a second time with GANs model
But I don't fully understand how those answers apply to my case. In my case, I think I have a simple network. Please help me understand what I am obviously doing wrong. I should mention that I am still getting the error if I simplify the model to the following:
class Lin2Prop(Module):
def __init__(self):
super().__init__()
# Prediction layers
self.a1 = Linear(159, 32)
self.a2 = Linear(32, 2)
def forward(self, x):
#Prediction layers
al1 = self.a1(x)
al1 = LeakyReLU(negative_slope=0.2)(al1)
al2 = self.ht12_linear_2(al1)
return al2
.... and later in the training loop...
preds = model(x)
a_loss = loss_fn(preds[:,:0], y[:,:0])
b_loss = loss_fn(preds[:,:1], y[:,:1])
total_loss = a_loss + b_loss