Pytorch Summary()

Question

Data:

from torchvision import datasets, transforms

# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.5,), (0.5,)),
                              ])

# Download and load the training data
trainset = datasets.MNIST('/.',download=True, train=True, transform=transform)
valset = datasets.MNIST('/.',download=True, train=False, transform=transform)

# Sampling the dataset
trainset = torch.utils.data.Subset(trainset, np.arange(0, 1024))
valset = torch.utils.data.Subset(valset, np.arange(0, 1024))

# set it into minibatches
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)
valloader = torch.utils.data.DataLoader(valset, batch_size=64, shuffle=True)

Custom Layer

# Customization of a layer
class MyLayer(torch.nn.Module):
    def __init__(self, size_in, size_out):
        super().__init__()
        self.size_in, self.size_out = size_in, size_out
        self.A1 = torch.nn.Parameter(torch.Tensor(self.size_in, self.size_out)) # same thing as in tensorflow to set it as learnable
        self.B1 = torch.nn.Parameter(torch.Tensor(self.A1.shape)) # same as the above line
        # check 1 see if initialization helps to speed up the experiment
        torch.nn.init.kaiming_uniform_(self.A1, a=math.sqrt(5)) # weight init
        torch.nn.init.kaiming_uniform_(self.B1, a=math.sqrt(5))

        # initialization of A1 and B1
        # torch.nn.init.normal_(self.A1, mean=0.0, std=1e-2) 
        # torch.nn.init.constant_(self.B1, 1.0)

    def forward(self, x, omega):
        self.W = (self.A1 + omega*self.B1)
        w_times_x = torch.mm(x, self.W)
        return w_times_x

Used the layer in my model:

# The exact implementation of our model. I think pytorch is really neat in that perspective
class BasicModel(torch.nn.Module):
    def __init__(self, omega1, omega2, omega3, omega4, indim, outdim, hiddim):
        super().__init__()
        self.omega1, self.omega2, self.omega3, self.omega4 = omega1, omega2, omega3, omega4 # adding in the omega, our noise
        self.indim, self.outdim, self.hiddim = indim, outdim, hiddim
        self.linear1 = MyLayer(self.indim, self.hiddim[0]) # input x output h0
        self.linear2 = MyLayer(self.hiddim[0], self.hiddim[1]) # input h0 output h1
        self.linear3 = MyLayer(self.hiddim[1], self.hiddim[2])# input h1 output h2
        self.linear4 = MyLayer(self.hiddim[2], self.outdim)# input h2 output outdim

    def forward(self, x):
        # set our activation function non-linearization of each layer
        x = torch.sigmoid(self.linear1(x, self.omega1))
        x = torch.sigmoid(self.linear2(x, self.omega2))
        x = torch.sigmoid(self.linear3(x, self.omega3))
        x = self.linear4(x, self.omega4)
        x = torch.nn.LogSoftmax(dim=1)(x)
        
        
        return x

set the input shape and number of hidden nodes:

# set our shape
input = 28*28
output = 10
hidden = [128, 64, 32] # we are building an MLP assign the number of hidden units in each layer

# the omega, the noise added to the weight with a mean of 0 and a covariance of identity matrix
o1, o2, o3, o4 = torch.normal(0, 1,size=(input,hidden[0])),torch.normal(0, 1,size=(hidden[0],hidden[1])),torch.normal(0, 1,size=(hidden[1],hidden[2])),torch.normal(0, 1,size=(hidden[2],output))

Finally, use the summary:

# hopefully this works
device_name = torch.device("cuda" if torch.cuda.is_available() else "cpu") 
# Our model 
model = BasicModel(o1,o2,o3,o4, input, output, hidden).to(device_name)
summary(model,(1,28,28))

I got this error:

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and CPU!

The code is in sequence, you can copy it to google colab.

Thanks a lot and have a nice day

I tried to fix it the whole afternoon, but no luck. Please help, thanks. — Minty Fresh, May 04 '22 at 00:36
Can you check if the most upvoted answer from https://stackoverflow.com/questions/66091226/runtimeerror-expected-all-tensors-to-be-on-the-same-device-but-found-at-least helps? — medium-dimensional, May 04 '22 at 00:50
It might help you to go through [https://stackoverflow.com/search?q=pytorch+running%3A+RuntimeError%3A+Expected+all+tensors+to+be+on+the+same+device%2C+but+found+at+least+two+devices%2C+cuda%3A0+and+cpu ] (similar questions) as well. — medium-dimensional, May 04 '22 at 00:56
I checked the code and I got no error. The output is: `(BasicModel( (linear1): MyLayer() (linear2): MyLayer() (linear3): MyLayer() (linear4): MyLayer() ), (1, 28, 28))`. I used `display()` instead of `summary()` in the last line though. — medium-dimensional, May 04 '22 at 11:10
I used summary is because it is for the research in order to check the number of total parameters — Minty Fresh, May 04 '22 at 11:16

Pytorch Summary()

0 Answers0