I am trying to write a simple neural network, and at the same time, improving my OOP skills.
main.cpp
#include <stdio.h>
#include <math.h>
#include <vector>
#include <unistd.h>
using namespace std;
#include "Neuron.h"
#include "fileio.h"
#include "helpers.h"
#define IMAGE_HEIGHT 28
#define IMAGE_WIDTH 28
#define L0SIZE IMAGE_HEIGHT*IMAGE_WIDTH
#define L1SIZE 30
#define L2SIZE 10
int main(){
printf("ok %d, %d, %d, %d\n", sizeof(Neuron), sizeof(InputNeuron), sizeof(HiddenNeuron), sizeof(OutputNeuron));
int labelVal = getNextLabel();
vector<InputNeuron> inputLayer;
createInputLayer(L0SIZE, &inputLayer);
vector<HiddenNeuron> hiddenLayerOne;
createHiddenLayer(L1SIZE, inputLayer, &hiddenLayerOne);
vector<OutputNeuron> outputLayer;
createOutputLayer(L2SIZE, hiddenLayerOne, &outputLayer);
printf("added all neurons\n");
//do the recursive backwards sweep through the NN to find the outputs
for(int i = 0; i < outputLayer.size(); i++){
printf("output %d, value %f\n", i, outputLayer[i].computeOutput());
}
return(0);
}
Neuron.h
#ifndef NEURON_H
#define NEURON_H
#include <vector>
class Neuron{
public:
Neuron();
virtual float computeOutput();
int _index;
int _layer;
};
class InputNeuron: public Neuron{
public:
InputNeuron(int layer, int index, int _value);
float computeOutput() override;
private:
float _value;
};
class HiddenNeuron: public Neuron{
public:
HiddenNeuron(){};
HiddenNeuron(int layer, int index);
float computeOutput() override;
void addSynapse(Neuron* previousNeuron, float weight);
void setBias(float b);
float getBias(void);
float getWeight(int index);
void setWeight(int index, float w);
protected:
std::vector<float> weights;
float bias = 0.0;
int previousNeuronCount = 0;
std::vector<Neuron*> previousNeurons;
};
class OutputNeuron: public HiddenNeuron{
public:
OutputNeuron(int layer, int index);
};
#endif
Neuron.cpp
#include "Neuron.h"
#include <stdio.h>
#include <math.h>
#include <vector>
float sigmoid(float x){
return(1.0/(1.0+exp(-x)));
}
//constructors
Neuron::Neuron(){
}
//overrides base(Neuron) constructor for the InputNeuron class
HiddenNeuron::HiddenNeuron(int layer, int index){
_index = index;
_layer = layer;
previousNeuronCount = 0;
}
//overrides base(Neuron) constructor for the HiddenNeuron class
InputNeuron::InputNeuron(int layer, int index, int value){
_index = index;
_layer = layer;
_value = value/255.0;
}
//overrides base(Neuron) constructor for the OutputNeuron class
OutputNeuron::OutputNeuron(int layer, int index){
_index = index;
_layer = layer;
previousNeuronCount = 0;
}
//pure computeOutput function
float Neuron::computeOutput(){
printf("Mega fucking warning, this is the virtual function, needs to be overloaded\n");
return(0.0);
}
//overrides the computeOutput of the base Neuron class
float InputNeuron::computeOutput(){
return(_value);
}
//overrides the computeOutput of the base Neuron class
float HiddenNeuron::computeOutput(){
float sum = bias;
printf("evaluating %d, %d\n", _layer, _index);
for(int i = 1; i < weights.size(); i++){
sum += weights[i]*(previousNeurons[i]->computeOutput());
}
return(sigmoid(sum));
}
void HiddenNeuron::addSynapse(Neuron* previousNeuron, float weight){
previousNeurons.push_back(previousNeuron);
weights.push_back(weight);
previousNeuronCount++;
}
void HiddenNeuron::setBias(float b){
bias = b;
}
helpers.h
#ifndef HELPERS_H
#define HELPERS_H
#include "Neuron.h"
float randFloat();
void intToUnary(int, float*);
float MSE(float* a, float* b, int listSize);
void createInputLayer(int size, std::vector<InputNeuron>* thisLayer);
//for when you're attaching to an input layer
void createHiddenLayer(int size, std::vector<InputNeuron> prevLayer, std::vector<HiddenNeuron>* thisLayer);
void createOutputLayer(int size, std::vector<HiddenNeuron> prevLayer, std::vector<OutputNeuron>* thisLayer);
#endif
helpers.cpp
#include <math.h>
#include "helpers.h"
#include "fileio.h"
float randFloat(){
return(static_cast <float> (rand()) / static_cast <float> (RAND_MAX) - 0.5);
}
//both lists must be of order given by listSize
float MSE(float* listA, float* listB, int listSize){
float squareError = 0.0;
for(int i = 0; i < listSize; i++){
squareError += pow((listA[i] - listB[i]), 2);
}
return(squareError/listSize);
}
//wow vectors so cool B)
void createInputLayer(int size, std::vector<InputNeuron>* thisLayer){
for(int i = 0; i < size; i++){
thisLayer->push_back(InputNeuron(0, i, getNextPixel()));
}
}
void createHiddenLayer(int size, std::vector<InputNeuron> prevLayer, std::vector<HiddenNeuron>* thisLayer){
for(int i = 0; i < size; i++){
HiddenNeuron h(1, i);
h.setBias(randFloat());
for (int j = 0; j < prevLayer.size(); j++){
h.addSynapse(&prevLayer[j], randFloat());
}
thisLayer->push_back(h);
}
}
void createOutputLayer(int size, std::vector<HiddenNeuron> prevLayer, std::vector<OutputNeuron>* thisLayer){
for(int i = 0; i < size; i++){
OutputNeuron h(2, i);
h.setBias(randFloat());
for (int j = 0; j < prevLayer.size(); j++){
h.addSynapse(&prevLayer[j], randFloat());
}
thisLayer->push_back(h);
}
}
apologies for the long code, but I don't want to remove anything as I'm not sure what's actually causing the problem!
basically, i have a base Neuron
class, from which the inputNeuron
, hiddenNeuron
, and outputNeuron
classes inherit and expand. In main, i use helper functions to create and populate vectors for each layer of the network, and then call the computeOutput
function to step back through the network and eventually get the output. It correctly finds the first element in the output vector, then follows the pointer to a hiddenNeuron
in the hidden layer, and then tries to follow a pointer back to the input layer. The problem seems to be that the pointers to the input layer are not valid and there is then a stack overflow (line 52, Neuron.cpp). My initial thoughts were that passing a pointer to the base class might not gel when the object being pointed to is of a derived class, and therefore of a different size. Am I on the right track? thanks in advance!
Edit: This setup worked when the construction and population of the vectors was done inside the main loop, only when moving this to functions in a seperate file did this stack overflow occur
Closed: I needed to change my function calls to use pass by reference because I was taking pointers from arrays passed by value, which were actually copies (since they were passed by value). Thank you everyone :)