-2

Just try to learn some MLP basics. I've found this code on Github. But it returs an error. Can somebody help me please to make it work? It seems there is some error in test part, Or may be you can give me some working samples of multylevel perceptrons in Python

This is the error -AssertionError: Training data and labels of different dimensions

import numpy as np

class NeuralNetwork:
    def __init__(self, input_layer_size, num_hidden_nodes, num_hidden_layers, output_size, lr, activation_function="sigmoid", output_activation=""):
        self.alpha = lr
        self.activation_function = activation_function
        #i-h-h-h-o Wn = Hn+1
        self.weight_layers = []
        self.weight_layers.append(np.random.rand(num_hidden_nodes, input_layer_size))

        for i in range(num_hidden_layers-1):
            self.weight_layers.append(np.random.rand(num_hidden_nodes,num_hidden_nodes))

        self.weight_layers.append(np.random.rand(output_size,num_hidden_nodes))

        #apply xavier normalization if we're using sigmoid function
        if self.activation_function == "sigmoid":
            for layer in range(len(self.weight_layers)):
                for n2 in range(len(self.weight_layers[layer])):
                    fan_in = len(self.weight_layers[layer][n2])
                    for n1 in range(len(self.weight_layers[layer][n2])):
                        self.weight_layers[layer][n2][n1] *= np.sqrt(1.0/fan_in)

        self.output_activation = output_activation



        #bias nodes
        self.biases = []
        for hl in range(num_hidden_layers+1):
            self.biases.append(np.zeros(num_hidden_nodes))

        self.biases.append(np.zeros(output_size))

    def activation(self, x, out_act=False):
        if out_act and self.output_activation == "softmax":
            osum = np.exp(x).sum()
            return np.exp(x) / osum

        if self.activation_function == "sigmoid":
            exp = np.exp(x)
            return exp / (exp + 1)
        elif self.activation_function == "relu":
            res = x
            for i in range(len(res)):
                res[i] = max(0,res[i])
            return res
        elif self.activation_function =="leakyrelu":
            res = x
            for i in range(len(res)):
                res[i] = max(0.01*res[i], res[i])
            return res
        elif self.activation_function == "tanh":
            return np.tanh(x)

    def activation_derivative(self, x, out_act=False):
        if out_act and self.output_activation == "softmax":
            res = self.activation(x, True)
            return res * (1.0 - res)

        if self.activation_function == "sigmoid":
            sig = self.activation(x)
            return sig * (1 - sig)
        elif self.activation_function == "relu":
            res = x
            for i in range(len(res)):
                if res[i] > 0:
                    res[i] = 1
                else:
                    res[i] = 0
            return res
        elif self.activation_function == "leakyrelu":
            res = x
            for i in range(len(res)):
                if res[i] > 0:
                    res[i] = 1
                else:
                    res[i] = 0.01
            return res
        elif self.activation_function == "tanh":
            return 1 - np.square(np.tanh(x))
        

    def feed_forward(self, input):
        #input through hidden layers
        self.input_layer = input
        self.hidden_layers = []
        self.hidden_layers.append(np.dot(self.weight_layers[0], self.input_layer))
        for hidden_layer in range(1,len(self.weight_layers)-1):
            self.hidden_layers.append(np.dot(self.weight_layers[hidden_layer], self.activation(self.hidden_layers[hidden_layer-1])) + self.biases[hidden_layer])

        #hidden layers through output layer
        self.output_layer = np.dot(self.weight_layers[len(self.weight_layers)-1], self.activation(self.hidden_layers[len(self.hidden_layers)-1])) + self.biases[len(self.biases)-1]

        return self.activation(self.output_layer, True)

    def back_propogate(self, y=[], oe=[]):
        #error from output -> backwards
        # BP-1
        out_error = []
        if len(y) == 0 and len(oe) != 0:
            out_error = oe
        else:
            out_error = np.multiply((self.activation(self.output_layer, True) - y), self.activation_derivative(self.output_layer, True))

        #BP-2
        #error from output to last hidden
        hidden_error = np.zeros([len(self.hidden_layers), len(self.hidden_layers[0])])
        hidden_error[len(hidden_error)-1] = np.multiply(np.dot(self.weight_layers[len(self.weight_layers)-1].transpose(), out_error), self.activation_derivative(self.hidden_layers[len(self.hidden_layers)-1]))

        #error from last hidden to first hidden layer
        for i in range(len(hidden_error)-1):
            hidden_layer = len(hidden_error) - i - 2
            hidden_error[hidden_layer] = np.multiply(np.dot(self.weight_layers[hidden_layer+1].transpose(), hidden_error[hidden_layer+1]), self.activation_derivative(self.hidden_layers[hidden_layer]))

        #adjust weights/biases wrt error
        #in to h1
        self.weight_layers[0] -= self.alpha * np.dot(np.reshape(hidden_error[0], (len(hidden_error[0]), -1)), np.reshape(self.activation(self.input_layer), (len(self.input_layer), -1)).transpose())
        self.biases[0] -= self.alpha * hidden_error[0]

        #h1 to hn
        for layer in range(1, len(self.weight_layers)-1):
            self.weight_layers[layer] -= self.alpha * (np.dot(hidden_error[layer], self.activation(self.hidden_layers[layer-1]).transpose()))
            self.biases[layer] -= self.alpha * hidden_error[layer]

        #hn to out
        self.weight_layers[len(self.weight_layers)-1] -= self.alpha * np.dot(np.reshape(out_error, (len(out_error), -1)), np.reshape(self.activation(self.hidden_layers[len(self.hidden_layers)-1]), (len(self.hidden_layers[len(self.hidden_layers)-1]), -1)).transpose())
        self.biases[len(self.biases)-1] -= self.alpha * out_error
        #adjusts done

    def train(self, train_data, labels, iterations, batch_size=0):
        assert(len(train_data) != len(labels)), "Training data and labels of different dimensions"
        if batch_size == 0:
            loss = np.zeros(iterations)
            for iteration in range(iterations):
                for i in range(len(train_data)):
                    self.feed_forward(train_data[i])
                    self.back_propogate(labels[i])
        else:
            batch_no = np.ceil(len(train_data) / batch_size)
            train = train_data
            l = labels
            for iteration in range(iterations+1):
                train, l = self.uniform_shuffle(np.array(train), np.array(l))
                train_batches = np.array_split(train, batch_no)
                label_batches = np.array_split(l, batch_no)
                for batch in range(len(train_batches)):
                    avg_error = np.zeros(len(labels[0]))
                    for batch_item in range(len(train_batches[batch])):
                        self.feed_forward(train_batches[batch][batch_item])
                        #feed into average error calculcation
                        avg_error = avg_error + np.multiply((self.activation(self.output_layer, True) - label_batches[batch][batch_item]), self.activation_derivative(self.output_layer, True))
                    avg_error = avg_error / batch_no
                    self.back_propogate(oe=avg_error)

    def predict(self, input):
        return self.feed_forward(input)

    #from: https://stackoverflow.com/questions/4601373/better-way-to-shuffle-two-numpy-arrays-in-unison
    def uniform_shuffle(self, a, b):
        p = np.random.permutation(len(a))
        return a[p], b[p]

    #used for testing while building the net
    def loss_function(self, x, y):
        if self.output_activation == "softmax":
            return np.sum(-1 * y * np.log(x))
        else:
            return np.sum(0.5 * (x - y) ** 2) / len(x)



#input_layer_size, num_hidden_nodes, num_hidden_layers, output_size, lr, activation_function, output_activation
net = NeuralNetwork(2, 2, 1, 1, 0.01, "relu")#, "softmax")

#the net can be taught, for example, the XOR function
test_data = [[1,1],
             [1,0],
             [0,1],
             [0,0]]

test_labels = [[0],
                [1],
                [1],
                [0]]

net.train(test_data, test_labels, 100000)

print("Predictions: ")
for i in range(len(test_data)):
    print(test_data[i])
    print(net.predict(test_data[i]))
Riggun
  • 195
  • 15

1 Answers1

2

Code is all prefect except a small bug. The number of train samples and the corresponding number of labels should be same.

change the line

assert(len(train_data) != len(labels)), "Training data and labels of different dimensions"

to

assert(len(train_data) == len(labels)), "Training data and labels of different dimensions"
mujjiga
  • 16,186
  • 2
  • 33
  • 51
  • Thanks a lot! Is there a way to test my simple? For example what if I need to test [1,1,1] combination. As I see it shows training results if im not mistaken? – Riggun Nov 29 '20 at 20:48