So I finally coded a neural network (which I been wanting to do for ages) for the first time, with the help of the series from the youtube channel "Coding Train", with the "only" difference being I coded it in C instead of JS.
I tried to simulate an XOR: The structure is two input nodes, two hidden nodes, and one output node. After training, I found out it doesn't work properly.
Already seen this article and several others but they didn't help me: XOR Neural Network sometimes outputs 0.5
This is my training data:
Training data:
IN | OUT
00 | 0
01 | 1
10 | 1
11 | 0
I trained it for multiple sessions, training each session more than 10000 times with a learning rate ranging from 0.5 to 0.01 and expected their corresponding results. In the below table I listed the most common outputs, no matter what learning range I pick, after different training sessions.
One training session = many trainings
Actual output after training (OUTn is the nth training session):
IN | OUT1 | OUT2 | OUT3 | OUT4 | OUT5
00 | 0.01 | 0.01 | 0.01 | 0.66 | 0.01
01 | 0.99 | 0.99 | 0.50 | 0.66 | 0.66
10 | 0.99 | 0.50 | 0.99 | 0.66 | 0.66
11 | 0.01 | 0.50 | 0.50 | 0.66 | 0.66
Most of the time it outputs something pretty weird. And after several hours of error searching etc. I still couldn't figure out where the error is. Maybe someone reading this finds one?
Following is the code.
I defined a GETRANDOM and my network via a struct so that I can easily pass, modify, and return it:
#define GETRANDOM ( (double)rand() / RAND_MAX * 2.0 - 1.0 ) // random number between -1 and 1
// network structure
struct sNetwork {
// node count
int input_nodes;
int hidden_nodes;
int output_nodes;
// values
double* input_values;
double* hidden_values;
double* output_values;
double* expected_values;
// error
double* hidden_error;
double* output_error;
// bias
double* bias_h;
double* bias_o;
// weights
double** weights_ih;
double** weights_ho;
};
typedef struct sNetwork tNetwork;
And for that I also wrote a setup-function:
tNetwork* setup_network(tNetwork* tNet)
{
// general error check
if(tNet == NULL)
{
return NULL;
}
if((*tNet).input_nodes == 0 || (*tNet).hidden_nodes == 0 || (*tNet).output_nodes == 0)
{
return NULL;
}
// based on the defined size, set up the weights
// set up the input to hidden weights
(*tNet).weights_ih = (double**)malloc((*tNet).input_nodes * sizeof(double*));
for(int i = 0; i < (*tNet).input_nodes; i++)
{
(*tNet).weights_ih[i] = (double*)malloc((*tNet).hidden_nodes * sizeof(double));
for(int j = 0; j < (*tNet).hidden_nodes; j++)
{
(*tNet).weights_ih[i][j] = GETRANDOM;
}
}
// set up the hidden to output weights
(*tNet).weights_ho = (double**)malloc((*tNet).hidden_nodes * sizeof(double*));
for(int i = 0; i < (*tNet).hidden_nodes; i++)
{
(*tNet).weights_ho[i] = (double*)malloc((*tNet).output_nodes * sizeof(double));
for(int j = 0; j < (*tNet).output_nodes; j++)
{
(*tNet).weights_ho[i][j] = GETRANDOM;
}
}
// set up the bias
// set up hidden bias and value
(*tNet).bias_h = (double*)malloc((*tNet).hidden_nodes * sizeof(double));
for(int i = 0; i < (*tNet).hidden_nodes; i++)
{
(*tNet).bias_h[i] = GETRANDOM;
}
// set up the output bias and value
(*tNet).bias_o = (double*)malloc((*tNet).output_nodes * sizeof(double));
for(int i = 0; i < (*tNet).output_nodes; i++)
{
(*tNet).bias_o[i] = GETRANDOM;
}
// set up the values
(*tNet).hidden_values = (double*)malloc((*tNet).hidden_nodes * sizeof(double));
(*tNet).output_values = (double*)malloc((*tNet).output_nodes * sizeof(double));
(*tNet).input_values = (double*)malloc((*tNet).input_nodes * sizeof(double));
(*tNet).expected_values = (double*)malloc((*tNet).output_nodes * sizeof(double));
// set up the error stuff
(*tNet).hidden_error = (double*)malloc((*tNet).hidden_nodes * sizeof(double));
(*tNet).output_error = (double*)malloc((*tNet).output_nodes * sizeof(double));
return tNet;
}
The sigmoid functions:
double sigmoid(double x)
{
return 1 / (1 + exp(-x));
}
double dsigmoid(double x)
{
return x * (1 - (x));
}
Then I coded the feed-forward function:
tNetwork* feed_forward(tNetwork* tNet)
{
// calculate the hidden outputs
for(int i = 0; i < (*tNet).hidden_nodes; i++)
{
(*tNet).hidden_values[i] = (*tNet).bias_h[i]; // add bias to weighted sum
for(int j = 0; j < (*tNet).input_nodes; j++)
{
(*tNet).hidden_values[i] += ( (*tNet).input_values[j] * (*tNet).weights_ih[j][i] ); // build the weighted sum
}
(*tNet).hidden_values[i] = sigmoid((*tNet).hidden_values[i]);
}
// calculate the output
for(int i = 0; i < (*tNet).output_nodes; i++)
{
(*tNet).output_values[i] = (*tNet).bias_o[i]; // add bias to weighted sum
for(int j = 0; j < (*tNet).hidden_nodes; j++)
{
(*tNet).output_values[i] += ( (*tNet).hidden_values[j] * (*tNet).weights_ho[j][i] ); // build the weighted sum
}
(*tNet).output_values[i] = sigmoid((*tNet).output_values[i]);
}
return tNet;
}
After that the train function:
tNetwork* train(tNetwork* tNet, double learning_rate)
{
// first of all feed the network
tNet = feed_forward(tNet);
// init the hidden errors
for(int i = 0; i < (*tNet).hidden_nodes; i++)
{
(*tNet).hidden_error[i] = 0;
}
// calculate the output error
for(int i = 0; i < (*tNet).output_nodes; i++)
{
(*tNet).output_error[i] = (*tNet).expected_values[i] - (*tNet).output_values[i];
}
// calculate the hidden error
for(int i = 0; i < (*tNet).hidden_nodes; i++)
{
for(int j = 0; j < (*tNet).output_nodes; j++)
{
(*tNet).hidden_error[i] += ( (*tNet).weights_ho[i][j] * (*tNet).output_error[j] );
}
}
// adjust outputs
for(int i = 0; i < (*tNet).output_nodes; i++)
{
// adjust output bias
double gradient = learning_rate * (*tNet).output_error[i] * dsigmoid((*tNet).output_values[i]);
(*tNet).bias_o[i] += gradient;
for(int j = 0; j < (*tNet).hidden_nodes; j++)
{
// adjust hidden->output weights
(*tNet).weights_ho[j][i] += gradient * (*tNet).hidden_values[j];
}
}
// adjust hiddens
for(int j = 0; j < (*tNet).hidden_nodes; j++)
{
// adjust hidden bias
double gradient = learning_rate * (*tNet).hidden_error[j] * dsigmoid((*tNet).hidden_values[j]);
(*tNet).bias_h[j] += gradient;
for(int k = 0; k < (*tNet).input_nodes; k++)
{
// adjust input->hidden weights
(*tNet).weights_ih[k][j] += gradient * (*tNet).input_values[k];
}
}
return tNet;
}
Finally, in my main function I did this:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
int main(void)
{
// initialize
srand(time(NULL));
// create neural network
tNetwork* network = (tNetwork*)malloc(sizeof(tNetwork));
// set up the properties of the network and initialize it
network->input_nodes = 2;
network->hidden_nodes = 2;
network->output_nodes = 1;
network = setup_network(network);
// train
for(int i = 0; i < 50000; i++)
{
switch(rand() % 4)
{
case 0:
// train #1
network->input_values[0] = 0;
network->input_values[1] = 0;
network->expected_values[0] = 0;
network = train(network, 0.1);
break;
case 1:
// train #2
network->input_values[0] = 1;
network->input_values[1] = 0;
network->expected_values[0] = 1;
network = train(network, 0.1);
break;
case 2:
// train #3
network->input_values[0] = 0;
network->input_values[1] = 1;
network->expected_values[0] = 1;
network = train(network, 0.1);
break;
case 3:
// train #4
network->input_values[0] = 1;
network->input_values[1] = 1;
network->expected_values[0] = 0;
network = train(network, 0.1);
break;
default:
break;
}
}
// check the functionality
network->input_values[0] = 0;
network->input_values[1] = 0;
network = feed_forward(network);
printf("%f\n", network->output_values[0]);
network->input_values[0] = 0;
network->input_values[1] = 1;
network = feed_forward(network);
printf("%f\n", network->output_values[0]);
network->input_values[0] = 1;
network->input_values[1] = 0;
network = feed_forward(network);
printf("%f\n", network->output_values[0]);
network->input_values[0] = 1;
network->input_values[1] = 1;
network = feed_forward(network);
printf("%f\n", network->output_values[0]);
return 0;
}
If anyone actually read this far, I'm impressed and if there is any error spotted and explained, I'm very grateful, thanks in advance!!