I have a network of 225 sensors. I have data at every 5 minutes for the month of October 2022. The total observations for a node are 288*31. The data is in 2 csv files sensor_data.csv, and labels.csv sensor_data.csv: it has 288*31 observations of data for every sensor. each row represent one month data of all all sensors. the total columns are 225. (one for each sensor) labels.csv: it has a binary label corresponding to sensor data information. for normal data it is 0 and 1 otherwise. each row represent 1 month label information of all sensors at that timestamp.
adjacency: it is adjacency or connectivity matrix 225 x 225 defined in the code
The data is like the timeseries. the first column has the timestamp and at each timestamp data of all sensors is in next column. a small potion of data is given below
Time | node1 | node2 | node3 | node4 | node5 |
---|---|---|---|---|---|
10/1/2022 0:00 | 107.6 | 107.6 | 107.6 | 107.6 | 107.6 |
10/1/2022 0:05 | 105.1 | 105.1 | 105.1 | 105.1 | 105.1 |
The label data is on same format as given below
Time | node1 | node2 | node3 | node4 | node5 |
---|---|---|---|---|---|
10/1/2022 0:00 | 0 | 0 | 0 | 0 | 0 |
10/1/2022 0:05 | 0 | 0 | 0 | 1 | 0 |
10/1/2022 0:10 | 0 | 0 | 1 | 1 | 0 |
10/1/2022 0:15 | 0 | 0 | 1 | 0 | 0 |
The labels 1 are rare in the data and I am interested to predict these labels from the time series.
I have the following code but it is not working at all due to inconsistent dimension of input/output of layers.
when forward(self, input, adj) is called. I expect the size of support to be consistent with adj matrix. input is torch.Size([32, 225]) (batch_size x n_sensor). self.weight.shape is torch.Size([225, 64]). I got support shape torch.Size([32, 64]). So output = torch.bmm(adj, support) is invlaid and thus Runtime error. output = torch.bmm(adj, support) # [batch_size, num_nodes, hidden_features] RuntimeError: batch1 must be a 3D tensor
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim
# Load data from CSV files
sensor_data = pd.read_csv('sensor_data.csv')
labels_data = pd.read_csv('labels.csv')
n_sensor = 225
for i in range(n_sensor - 3):
adjacency_matrix [i][i + 1] = 1
adjacency_matrix [i][i + 2] = 1
adjacency_matrix [i][i + 3] = 1
adjacency_matrix [i + 1][i] = 1
# Standardize the data
sensor_data_values = sensor_data.iloc[:, 1:].values
data_mean = np.mean(sensor_data_values)
data_std = np.std(sensor_data_values)
sensor_data_values = (sensor_data_values - data_mean) / data_std
adjacency_tensor = torch.tensor(adjacency_matrix, dtype=torch.float32)
def __init__(self, in_features, out_features):
super(GraphConvolution, self).__init__()
self.weight = nn.Parameter(torch.FloatTensor(in_features, out_features))
self.bias = nn.Parameter(torch.FloatTensor(out_features))
self.reset_parameters()
def reset_parameters(self):
nn.init.kaiming_uniform_(self.weight)
nn.init.zeros_(self.bias)
def forward(self, input, adj):
support = torch.matmul(input, self.weight)
output = torch.bmm(adj, support)
output += self.bias
return output
class GNN(nn.Module):
def __init__(self, num_nodes, hidden_features):
super(GNN, self).__init__()
self.gc1 = GraphConvolution(num_nodes, hidden_features)
self.gc2 = GraphConvolution(hidden_features, num_nodes)
self.relu = nn.ReLU()
def forward(self, x, adj):
x = self.gc1(x, adj)
x = self.relu(x)
x = self.gc2(x, adj)
return x
# Hyperparameters
epochs = 1
learning_rate = 0.01
hidden_features = 64
# Model, Loss and Optimizer
model = GNN(num_nodes = 225, hidden_features=hidden_features)
criterion = nn.BCEWithLogitsLoss() # As it's a binary classification
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
# Convert data to tensor
inputs = torch.tensor(sensor_data_values, dtype=torch.float32)
labels = torch.tensor(labels_data.iloc[:, 1:].values, dtype=torch.float32)
# Create data loaders
batch_size = 32
dataset = TensorDataset(inputs, labels)
loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
adjacency_tensor = adjacency_tensor.to(device)
lossHist = []
for epoch in range(epochs):
total_loss = 0
for batch_inputs, batch_labels in loader:
batch_inputs, batch_labels = batch_inputs.to(device), batch_labels.to(device)
optimizer.zero_grad()
outputs = model(batch_inputs, adjacency_tensor)
loss = criterion(outputs, batch_labels)
loss.backward()
optimizer.step()
total_loss += loss.item() * batch_inputs.size(0)
avg_loss = total_loss / len(dataset)
lossHist.append(avg_loss)
if epoch % 10 == 0:
print(f"Epoch {epoch}, Loss: {avg_loss}")
The data is available at this link