I'm trying to understand back-propagation, for that I using some python code, but it's noting working properly. When I train with xor input-output the error does not converge. But if I change the value of the last output of xor it converge.
If I put some target output values >1 the error converge for target-1, this dos not seen to be right.
import numpy as np
import random
class neural_network():
activation = [] #List of values with the values of activation of each layers
weightsIn = []
weightsOut = []
def __init__(self, sizeOfLayers):
'''
sizeOfLayers: Tuple with numbers of neurons of each layer
(in, hidden, out)
'''
if len(sizeOfLayers) > 3:
raise ValueError('Wrong number of layers')
self.sizeOfLayers = sizeOfLayers
for i in range(len(sizeOfLayers)):
if i == 0:
#input layer + bias
self.activation.append(sizeOfLayers[i]*[0.0] + [0.0])
else:
self.activation.append(sizeOfLayers[i]*[0.0])
# Wi = len(Hid) x len(IN)+1(bias)
self.weightsIn = np.random.random((sizeOfLayers[1], sizeOfLayers[0] + 1))
# Wo = len(OUT) x len(Hid)
self.weightsOut = np.random.random((sizeOfLayers[2], sizeOfLayers[1]))
def forward(self, X):
'''
X: Vetor de entradas
'''
#In+bias add ativation vector
self.activation[0] = np.vstack((np.array([X]).T, np.array([1])))
#sum of (weights x in)
self.sumHidden = self.weightsIn.dot(self.activation[0])
#Ativation of hidden layer
self.activation[1] = (self.sigmoid(self.sumHidden))
#sum of(out weights x activation of last layer)
self.sumOut = self.weightsOut.dot(self.activation[1])
#activation of output
self.activation[2] = (self.sigmoid(self.sumOut))
return self.activation[2].T
def backPropagate(self, Y, trainRate = 0.1):
'''
Y: output target
trainRate:
'''
if len(Y) != self.sizeOfLayers[2]:
raise ValueError('Wrong number of inputs')
#Calc of output delta
error_o = Y.T - self.activation[2].T
out_delta = self.sigmoidPrime(self.activation[2]) * error_o.T
#Calc of hidden delta
error_h = out_delta.T.dot(self.weightsOut)
hiden_delta = self.sigmoidPrime(self.activation[1]) * error_h.T
# update output weights output
change_o = self.activation[1] * out_delta.T
for i in range(self.sizeOfLayers[2]):
for j in range(self.sizeOfLayers[1]):
self.weightsOut[i][j] = self.weightsOut[i][j] + trainRate*change_o[j][i]
# update Input weights
change_h = self.activation[0] * hiden_delta.T
for i in range(self.sizeOfLayers[1]):
for j in range(self.sizeOfLayers[0]):
self.weightsIn[i][j] = self.weightsIn[i][j] + trainRate*change_h[j][i]
#Error
return np.sum((Y.T - self.activation[2].T)**2)/0.5
def sigmoid(self, z, derv = False):
if derv == False:
return 1/(1+np.exp(-z))
def sigmoidPrime(self, z):
return self.sigmoid(z)*(1-self.sigmoid(z))
def train(self, target, trainRate = 0.001, it = 50000):
for i in range(it):
error = 0.0
for t in target:
inputs = np.array(t[0])
targets = np.array([t[1]])
self.forward(inputs)
error = error + self.backPropagate(targets, trainRate)
nn = neural_network((2,6,1))
xor = [
[[0,0], [0]],
[[0,1], [1]],
[[1,0], [1]],
[[1,1], [0]] #If I change her to 1 it converges
]
nn.train(xor)
Edit: Modifications were made according to what Diego Stéfano told (thank you Diego), but the error dos not converge yet.
import numpy as np
import math
import random
from scipy.special import expit
from sklearn.preprocessing import normalize
class neural_network(object):
activation = []
weightsIn = []
weightsOut = []
def __init__(self, sizeOfLayers):
'''
sizeOfLayers: Tuple with numbers of neurons of each layer
(in, hidden, out)
'''
self.sizeOfLayers = sizeOfLayers
for i in range(len(sizeOfLayers)):
self.activation.append(sizeOfLayers[i]*[0.0] + [0.0])
self.weightsIn = np.random.normal(scale=0.1, size = (sizeOfLayers[1], sizeOfLayers[0] + 1))
self.weightsOut = np.random.normal(scale=0.1, size = (sizeOfLayers[2], sizeOfLayers[1] + 1))
def forward(self, X):
'''
X: Vetor de entradas
'''
#In+bias add ativation vector
self.activation[0] = np.vstack((np.array([X]).T, np.array([1])))
#sum of (weights x in)
self.sumHidden = self.weightsIn.dot(self.activation[0])
#+bias add ativation vector
self.activation[1] = np.vstack((expit(self.sumHidden), np.array([1])))
#sum of(out weights x activation of last layer)
self.sumOut = self.weightsOut.dot(self.activation[1])
#activation of output
self.activation[2] = (expit(self.sumOut))
return self.activation[2].T
def backPropagate(self, X, Y, trainRate = 0.1):
self.forward(X)
#Calc of output delta
error_o = Y - self.activation[2].T
out_delta = self.sigmoidPrime(self.activation[2]) * error_o.T
#Calc of hidden delta
error_h = out_delta.T.dot(self.weightsOut)
hiden_delta = self.sigmoidPrime(self.activation[1]) * error_h.T
# update output weights output
change_o = self.activation[1] * np.transpose(out_delta)
self.weightsOut = self.weightsOut + trainRate*change_o.T
# update hidden weights output
change_h = self.activation[0].dot( hiden_delta[:-1].T)
self.weightsIn = self.weightsIn + trainRate*change_h.T
#error
return np.sum((Y - self.activation[2].T)**2)*0.5
def train(self, input_list, epochs):
for epoch in range(epochs):
ErrAcc = 0.0
for inputs, targets in input_list:
Err = self.backPropagate(np.array(inputs), np.array(targets), 0.2)
ErrAcc = ErrAcc + Err
if epoch % 1000 == 0:
print 'Epoch =', epoch, 'ErrAcc =', ErrAcc
def sigmoidPrime(self,x):
return expit(x)*(1-expit(x))
nn = neural_network((2,10,1))
xor = [
[[0,0], [0]],
[[0,1], [1]],
[[1,0], [1]],
[[1,1], [0]] #If I change her to 1 it converges
]
nn.train(xor, 300000)