Why is numpy slower than pure python in my neural network?

Question

I'm learning about neural networks so I made a linear regression model in pure python and using numpy. I expected the numpy version to be faster; but it's actually much slower. Why might that be?

I'm running this on an M1 Macbook Pro with Python 3.9.13 and numpy 1.24.1.

Here is the pure python version:


class PureNeuralNetwork():

    def sigmoid(self, z):
        e = 2.718
        z = min(100, z)
        return (e ** z) / (1 + (e ** z)) - 0.5

    def predict(self, w, X, b):
        return w * X + b

    def loss(self, w, b, X, Y): # the average squared loss
        losses = [(self.predict(w, X[i], b) - Y[i]) ** 2 for i in range(len(X))]  
        return sum(losses) / len(X)

    def gradient_w(self, w, b, X, Y):
        inner = [2 * (w * X[i] + b - Y[i]) * X[i] for i in range(len(X))]
        grad = sum(inner) / len(X)
        return self.sigmoid(grad)

    def gradient_b(self, w, b, X, Y):
        inner = [2 * (w * X[i] + b - Y[i]) for i in range(len(X))]
        grad = sum(inner) / len(X)
        return self.sigmoid(grad)

    def accuracy(self, w, b, X, Y):
        accuracies = [1 - (abs(self.predict(w, X[i], b) - Y[i]) / Y[i]) for i in range(len(X))]
        return sum(accuracies) / len(accuracies) # average mean

    def report(self, i, w, b, X, Y, wgradient, bgradient):
        print("\nITERATION ", i)
        print("W        = ", w)
        print("B        = ", b)
        print("ERROR    = ", self.loss(w, b, X, Y))
        print("GRADIENT_W = ", wgradient)
        print("GRADIENT_B = ", bgradient)
        print("ACCURACY = ", self.accuracy(w, b, X, Y))

    def train(self, w, b, X, Y, iterations, lr):
        for i in range(iterations):
            wgradient = self.gradient_w(w, b, X, Y) * lr
            bgradient = self.gradient_b(w, b, X, Y) * lr

            w -= wgradient
            b -= bgradient

            if (i % (iterations / 10 ) == 0):
                self.report(i, w, b, X, Y, wgradient, bgradient)
    
        return w, b

    
# COLLECT DATA

f = open("pizza_comma.txt", "r")
all_nums = []
temp = []
for i in f.read():
    if i.isnumeric():
        temp.append(i)

    if i == "," or i == "\n":
        temp_string = ''.join(temp)
        all_nums.append(int(temp_string))
        temp = []
X = []
Y = []
for i in range(len(all_nums)):
    if i % 2 == 0:
        X.append(all_nums[i])
    else:
        Y.append(all_nums[i])

# CODE

NN = PureNeuralNetwork()

iterations = 1000000
w = 1
b = 1
lr = 0.01

w, b = NN.train(w, b, X, Y, iterations, lr)

print("\nFinal accuracy = ", NN.accuracy(w, b, X, Y))

And here is the data I used for the pure python

And here is the numpy version:

import numpy as np

class NpNeuralNetwork():

    def sigmoid(self, z):
        z = np.minimum(100, z)
        return np.exp(z) / (1 + np.exp(z)) - 0.5

    def predict(self, w, X, b):
        return w * X + b

    def loss(self, w, b, X, Y):
        squared_error = np.power((self.predict(w, X, b) - Y), 2)
        return np.mean(squared_error)

    def gradient_w(self, w, b, X, Y):
        inner = 2 * (w * X + b - Y) * X
        mean = np.mean(inner)
        return self.sigmoid(mean)

    def gradient_b(self, w, b, X, Y):
        inner = 2 * (w * X + b - Y)
        mean = np.mean(inner)
        return self.sigmoid(mean)

    def accuracy(self, w, b, X, Y):
        accuracies = 1 - (np.absolute(self.predict(w, X, b) - Y) / Y)
        return np.mean(accuracies)

    def report(self, i, w, b, X, Y, wgradient, bgradient):
        print("\nITERATION ", i)
        print("W        = ", w)
        print("B        = ", b)
        print("ERROR    = ", self.loss(w, b, X, Y))
        print("GRADIENT_W = ", wgradient)
        print("GRADIENT_B = ", bgradient)
        print("ACCURACY = ", self.accuracy(w, b, X, Y))

    def train(self, X, Y, iterations, lr, w=1, b=1):
        for i in range(iterations):
            wgradient = self.gradient_w(w, b, X, Y) * lr
            bgradient = self.gradient_b(w, b, X, Y) * lr

            w -= wgradient
            b -= bgradient

            if (i % (iterations / 10 ) == 0):
                self.report(i, w, b, X, Y, wgradient, bgradient)

        
        return w, b



data = np.loadtxt("pizza_space.txt").T
X = data[0]
Y = data[1]

NN = NpNeuralNetwork()

w, b = NN.train(X, Y, 1000000, 0.01)

print("\nFinal accuracy = ", NN.accuracy(w, b, X, Y))

and the data (formatted slightly differently)

'pure python'? If you are treating numpy arrays as though they were lists, they will be slower. Have you spent much time learning `numpy` basics? — hpaulj, Jan 16 '23 at 23:04
Numpy is designed to perform better than stdlib Python while data is large and operations are able to be vectorized. Your data are very small. I see you've vectorized functions like `gradient_w()`, but I bet that the data are too small to see any benefit of vectorization. — Michael Ruth, Jan 17 '23 at 00:07
My pleasure. Take a look at my [answer](https://stackoverflow.com/a/60941705/4583620) regarding performance of `sum()` vs `numpy.sum()` for some comparison. Note that I didn't vectorize the sum, but it does highlight the cost of setup for vectorization. — Michael Ruth, Jan 17 '23 at 18:56

Why is numpy slower than pure python in my neural network?

0 Answers0