I am trying to implement an extremly simple regression task with GNNs, specifically GAT Networks and am getting very poor performance. When compared to a MLP on the same task the GNN model perfroms approximatley 10x worse. I am aware that the problem I have designed doesn't need to be represented as a Graph for proper inference, I am just using this as a sanity check before I introduce the GNN to a more complex environment and use Deep RL to train it.
I have used a number of GNN types, GCN, GAT and GIN, all seem to result in the same performance.
Here is the code:
dataarray = []
for i in range(0, 100):
rand1 = random.randint(1, 5)/5+1
rand2 = random.randint(1, 5)/5+1
rand3 = random.randint(1, 5)/5+1
edge_index = torch.tensor([[0, 1, 1, 2],
[1, 0, 2, 1]], dtype=torch.long)
x = torch.tensor([[rand1], [rand2], [rand3]], dtype=torch.float)
dataarray.append(Data(x=x, edge_index=edge_index, y=torch.tensor([[rand1*rand2*rand3 + rand1/rand2 * rand3*rand3]], dtype=torch.float)))
import torch
import numpy as np
from torch.nn import Linear
from torch_geometric.nn import GCNConv, global_add_pool, GATv2Conv
s = nn.LeakyReLU(0.4)
class GCN(torch.nn.Module):
def __init__(self):
super().__init__()
torch.manual_seed(1234)
self.conv1 = GATv2Conv(1, 10, heads=8)
self.conv2 = GATv2Conv(10*8, 10, heads=4)
self.conv3 = GATv2Conv(10*4, 10, heads=1)
self.classifier5 = Linear(10, 1)
def forward(self, x, edge_index):
h = self.conv1(x, edge_index)
h = s(h)
h = self.conv2(h, edge_index)
h = s(h)
h = self.conv3(h, edge_index)
h = s(h)
h = global_add_pool(h, batch=None)
# Apply a final (linear) classifier.
h = self.classifier5(h)
return h
class MLP(torch.nn.Module):
def __init__(self):
super().__init__()
torch.manual_seed(1234)
self.classifier1 = Linear(3, 10)
self.classifier2 = Linear(10, 10)
self.classifier3 = Linear(10, 10)
self.classifier4 = Linear(10, 10)
self.classifier5 = Linear(10, 1)
def forward(self, x, edge_index):
h = self.classifier1(x.T)
h = s(h)
h = self.classifier2(h)
h = s(h)
h = self.classifier3(h)
h = s(h)
h = self.classifier4(h)
h = s(h)
h = self.classifier5(h)
return h
model = GCN()
modelMlp = MLP()
criterion = torch.nn.SmoothL1Loss()
optimizer = torch.optim.NAdam(model.parameters(), lr=0.001)
fnnOptim = torch.optim.NAdam(modelMlp.parameters(), lr=0.001)
def train(data):
optimizer.zero_grad() # Clear gradients.
out = model(data.x, data.edge_index).flatten() # Perform a single forward pass.
loss = criterion(out, data.y.flatten()) # Compute the loss solely based on the training nodes.
loss.backward() # Derive gradients.
optimizer.step() # Update parameters based on gradients.
fnnOptim.zero_grad()
out2 = modelMlp(data.x, data.edge_index).flatten()
loss2 = criterion(out2, data.y.flatten())
loss2.backward()
fnnOptim.step()
return loss.item(), loss2.item()
avgLoss = []
for i in range(0, 100000):
lss = []
lss2 = []
for item in dataarray:
loss1, loss2 = train(item)
lss.append(loss1)
lss2.append(loss2)
avgLoss.append(np.average(lss))
print(f"AVg loss {np.average(lss)} MLP: {np.average(lss2)}")
#print(f"1 : {train(dataarray[0])} 2: {train(dataarray[1])}" )
For this most recent one the GCN Model achieved approximatley 0.1 SmoothL1Loss while the MLP Model achieved around 0.005.
Have I done anything wrong in this desgin or are GNNs completley unapplicable to this task, if so why?