-2

I am trying to write a simple neural network, and at the same time, improving my OOP skills.

main.cpp

#include <stdio.h>
#include <math.h>
#include <vector>
#include <unistd.h>

using namespace std;

#include "Neuron.h"
#include "fileio.h"
#include "helpers.h"

#define IMAGE_HEIGHT 28
#define IMAGE_WIDTH 28
#define L0SIZE IMAGE_HEIGHT*IMAGE_WIDTH
#define L1SIZE 30
#define L2SIZE 10

int main(){

    printf("ok %d, %d, %d, %d\n", sizeof(Neuron), sizeof(InputNeuron), sizeof(HiddenNeuron), sizeof(OutputNeuron));
    int labelVal = getNextLabel();
    
    vector<InputNeuron> inputLayer;
    createInputLayer(L0SIZE, &inputLayer);

    vector<HiddenNeuron> hiddenLayerOne;
    createHiddenLayer(L1SIZE, inputLayer, &hiddenLayerOne);
   

    vector<OutputNeuron> outputLayer;
    createOutputLayer(L2SIZE, hiddenLayerOne, &outputLayer);
    
    printf("added all neurons\n");
    //do the recursive backwards sweep through the NN to find the outputs
    for(int i = 0; i < outputLayer.size(); i++){
        printf("output %d, value %f\n", i, outputLayer[i].computeOutput());
    }

    return(0); 
}

Neuron.h

#ifndef NEURON_H
#define NEURON_H
#include <vector>

class Neuron{
    public: 
        Neuron();
        virtual float computeOutput();
        int _index;
        int _layer;
};

class InputNeuron: public Neuron{
    public:
        InputNeuron(int layer, int index, int _value);
        float computeOutput() override;
    private:
        float _value;
};

class HiddenNeuron: public Neuron{
    public:
        HiddenNeuron(){};
        HiddenNeuron(int layer, int index);
        float computeOutput() override;
        void addSynapse(Neuron* previousNeuron, float weight);

        void setBias(float b);
        float getBias(void);

        float getWeight(int index);
        void setWeight(int index, float w);

    protected:
        std::vector<float> weights;
        float bias = 0.0;
        int previousNeuronCount = 0;
        std::vector<Neuron*> previousNeurons;
};

class OutputNeuron: public HiddenNeuron{
    public:
        OutputNeuron(int layer, int index);
};  
#endif

Neuron.cpp

#include "Neuron.h"
#include <stdio.h>
#include <math.h>
#include <vector>

float sigmoid(float x){
    return(1.0/(1.0+exp(-x)));
}

//constructors
Neuron::Neuron(){
}

//overrides base(Neuron) constructor for the InputNeuron class
HiddenNeuron::HiddenNeuron(int layer, int index){
    _index = index;
    _layer = layer;
    previousNeuronCount = 0;
}

//overrides base(Neuron) constructor for the HiddenNeuron class
InputNeuron::InputNeuron(int layer, int index, int value){
    _index = index;
    _layer = layer;
    _value = value/255.0;
}

//overrides base(Neuron) constructor for the OutputNeuron class
OutputNeuron::OutputNeuron(int layer, int index){
    _index = index;
    _layer = layer;
    previousNeuronCount = 0;
}

//pure computeOutput function
float Neuron::computeOutput(){
    printf("Mega fucking warning, this is the virtual function, needs to be overloaded\n");
    return(0.0);
}

//overrides the computeOutput of the base Neuron class
float InputNeuron::computeOutput(){
    return(_value);
}

//overrides the computeOutput of the base Neuron class
float HiddenNeuron::computeOutput(){
    float sum = bias;
    printf("evaluating %d, %d\n", _layer, _index);
    for(int i = 1; i < weights.size(); i++){
        sum  += weights[i]*(previousNeurons[i]->computeOutput());
    }
    return(sigmoid(sum));
}

void HiddenNeuron::addSynapse(Neuron* previousNeuron, float weight){
    previousNeurons.push_back(previousNeuron);
    weights.push_back(weight);
    previousNeuronCount++;
}

void HiddenNeuron::setBias(float b){
    bias = b;
}

helpers.h

#ifndef HELPERS_H
#define HELPERS_H

#include "Neuron.h"

float randFloat();

void intToUnary(int, float*);

float MSE(float* a, float* b, int listSize);

void createInputLayer(int size, std::vector<InputNeuron>* thisLayer);

//for when you're attaching to an input layer
void createHiddenLayer(int size, std::vector<InputNeuron> prevLayer, std::vector<HiddenNeuron>* thisLayer);

void createOutputLayer(int size, std::vector<HiddenNeuron> prevLayer, std::vector<OutputNeuron>* thisLayer);

#endif

helpers.cpp

#include <math.h>
#include "helpers.h"
#include "fileio.h"


float randFloat(){
    return(static_cast <float> (rand()) / static_cast <float> (RAND_MAX) - 0.5);
}

//both lists must be of order given by listSize
float MSE(float* listA, float* listB, int listSize){
    float squareError = 0.0;
    for(int i = 0; i < listSize; i++){
        squareError += pow((listA[i] - listB[i]), 2);
    }
    return(squareError/listSize);
}

//wow vectors so cool B)
void createInputLayer(int size, std::vector<InputNeuron>* thisLayer){
    
    for(int i = 0; i < size; i++){
        thisLayer->push_back(InputNeuron(0, i, getNextPixel()));
    }
}

void createHiddenLayer(int size, std::vector<InputNeuron> prevLayer, std::vector<HiddenNeuron>* thisLayer){
    for(int i = 0; i < size; i++){
        HiddenNeuron h(1, i);
        h.setBias(randFloat());
        for (int j = 0; j < prevLayer.size(); j++){
            h.addSynapse(&prevLayer[j], randFloat());
        }
        thisLayer->push_back(h);
    }
}

void createOutputLayer(int size, std::vector<HiddenNeuron> prevLayer, std::vector<OutputNeuron>* thisLayer){
    for(int i = 0; i < size; i++){
        OutputNeuron h(2, i);
        h.setBias(randFloat());
        for (int j = 0; j < prevLayer.size(); j++){
            h.addSynapse(&prevLayer[j], randFloat());
        }
        thisLayer->push_back(h);
    }
}

apologies for the long code, but I don't want to remove anything as I'm not sure what's actually causing the problem!

basically, i have a base Neuron class, from which the inputNeuron, hiddenNeuron, and outputNeuron classes inherit and expand. In main, i use helper functions to create and populate vectors for each layer of the network, and then call the computeOutput function to step back through the network and eventually get the output. It correctly finds the first element in the output vector, then follows the pointer to a hiddenNeuron in the hidden layer, and then tries to follow a pointer back to the input layer. The problem seems to be that the pointers to the input layer are not valid and there is then a stack overflow (line 52, Neuron.cpp). My initial thoughts were that passing a pointer to the base class might not gel when the object being pointed to is of a derived class, and therefore of a different size. Am I on the right track? thanks in advance!

Edit: This setup worked when the construction and population of the vectors was done inside the main loop, only when moving this to functions in a seperate file did this stack overflow occur

Closed: I needed to change my function calls to use pass by reference because I was taking pointers from arrays passed by value, which were actually copies (since they were passed by value). Thank you everyone :)

  • 4
    "but I don't want to remove anything as I'm not sure what's actually causing the problem!" This is actually a good testing process: remove stuff until the problem is fixed (or start from zero and add stuff until the problem manifests). – L. Scott Johnson Aug 10 '20 at 18:10
  • Which line is line 52? – nada Aug 10 '20 at 18:11
  • @nada sorry for omitting that! line 52 is in function `float hiddenNeuron::computeOutput()`, and the specific line is `sum += weights[i]*(previousNeurons[i]->computeOutput());`. Contextually, this is the hidden neuron attempting to call computeOutput on an input neuron, the pointer to which appears invalid – Raphael Treccani-Chinelli Aug 10 '20 at 18:14
  • @L.ScottJohnson i appreciate that this can be a valuable debugging strategy, but I meant this in the context of omitting code from my question and consequently being asked to provide it – Raphael Treccani-Chinelli Aug 10 '20 at 18:16
  • 2
    Here's a different point of view: If you haven't attempted to isolate the problem by playing a few rounds of divide and conquer, you've been wasting your time on ineffective debugging. – user4581301 Aug 10 '20 at 18:17
  • 2
    @user3423925 First thing to try is to look at the call stack and variables to try to figure out the recursivity loop. If it is hard to understand, put a conditionnal breakpoint so that it stop after n hit inside `computeOutput` and trace code from that point. Or start from the beginning and compare results from the code with expected values (on paper). – Phil1970 Aug 10 '20 at 18:24
  • Before you spend too much time debugging, resolve the slicing problem pointed out by v010dya's answer. While there's merit in learning what went wrong here for future reference, debugging why a program that cannot work has a stack overflow won't get the program done – user4581301 Aug 10 '20 at 18:29
  • @Phil1970 I've stepped through the computeOutput function and determined that it correctly moves from layer 2 (the output layer), through to the first element in layer 1, and then fails to move into layer 0. – Raphael Treccani-Chinelli Aug 10 '20 at 18:33
  • 2
    I don't know if you are coming from another language where you previously learned object-oriented programming, but you have so many things wrong with your code. One thing seems to be that you don't realize that C++ uses value-semantics, not reference semantics. For example, this: `void createHiddenLayer(int size, std::vector prevLayer, std::vector* thisLayer){` and then doing this: `h.addSynapse(&prevLayer[j], randFloat());` -- will never work due to the vector being passed by value, thus that address becomes invalid as soon as that function returns. – PaulMcKenzie Aug 10 '20 at 18:38
  • 1
    You should try to create a [mcve]. That will help understand the problem. – Thomas Sablik Aug 10 '20 at 18:43
  • @PaulMcKenzie I was never taught object oriented programming (i took a course on C at university and then spent a while doing C and assembly for embedded applications which is why my OOP is very naive). I definitely have more to learn, but sometimes it's hard to get a straight answer. one of the things that i initially did was pass all vectors by value, but then decided to try converting them to pass by reference. I guess if I had a more knowledgeable programmer around I'd ask them if it was ok to pass by value for vector types as i've never used them before – Raphael Treccani-Chinelli Aug 10 '20 at 18:44
  • @user3423925 -- When you pass anything by value, whether it is as simple as an `int`, or "complex" like a `std::vector`, the function is using a temporary copy of what is passed. As soon as that function is completed, that temporary is gone. So you now have the address of something that no longer exists. It's basically that simple -- so whatever code you have now literally cannot work correctly, so expect strange errors to occur. – PaulMcKenzie Aug 10 '20 at 18:48
  • @PaulMcKenzie thanks for the explanation. between the answer from you and the link to the slicing problem definition, I think i need to rework all my code to use references instead of values. This gels with my experience that the code worked before I moved the code to populate vectors into seperate functions – Raphael Treccani-Chinelli Aug 10 '20 at 18:55
  • @user3423925 Once you have determined that you have invalid pointers, the only thing you have to do is to determine why or when the become invalide. If you debug with run-time checks enable, it should be realatively easy to figure out. At least with Visual Studio default configuration, one can look hexadecimal values of garbage and have a good idea of the problem (deleted data, uninitialized data...). – Phil1970 Aug 10 '20 at 19:08

2 Answers2

2

It's too difficult to parse everything in your code, but you are making some serious mistakes when it comes to working with polymorphic c++ code. Your vector is a vector of objects, which means that when you push back an Input or an Output neuron you actually do several things:

1) Create a correct neuron
2) Run a copy constructor into a base class
3) Add that Base class

Make your base class abstract by making non-existent computeOutput() function purely virtual (look it up if you don't know what i'm talking about here. This will make your code fail to compile, but it will force you to restructure things, which will remove many errors.

I am, however, not 100% sure if this will resolve the specific error that you have. But it will put you on the right path to fix many things.

P.S. You may want to consider to use smart pointers, and place them into your vector. Just a thought.

v010dya
  • 5,296
  • 7
  • 28
  • 48
  • 2
    Key term to help understand this answer: [Object Slicing](https://stackoverflow.com/questions/274626/what-is-object-slicing) – user4581301 Aug 10 '20 at 18:22
  • @user4581301 thanks for the link. So i see that my base class is smaller than my derived classes (i tested this earlier with sizeof()). So this means i need to always move my derived classes by reference, not directly? Also there are some things I don't understand about @v010dya answer. for example, my base class is already abstract i think, since it contains the virtual function `computeOutput` which is overriden in the derived classes – Raphael Treccani-Chinelli Aug 10 '20 at 18:53
  • @user3423925 You are looking for a purely abstract function. `virtual float computeOutput() = 0;` if you have never seen `= 0` next to the function name, that does look strange but it is not a setting operator, see https://stackoverflow.com/questions/2156634/why-is-a-pure-virtual-function-initialized-by-0 – v010dya Aug 11 '20 at 04:38
2

When you call createHiddenLayer and createOutputLayer you pass the second argument prevLayer by value.

It means that you create a copy of the vector. The really bad thing is that you take address from that temporary copy in the line

h.addSynapse(&prevLayer[j], randFloat());

and addSynapse put copies of those temporaries addresses in previousNeurons

and use those pointers laters later when the the temporary copy is destroyed while leads to undefined behavior.

In practice, you should almost never pass vector (or any other container) by value. Even if you want to make a copy, it should generally be done as near as the target anyway.

Thus you should have:

void createHiddenLayer(int size, 
    std::vector<InputNeuron> &prevLayer,    // Pass by reference here!
    std::vector<HiddenNeuron>* thisLayer);

and

void createOutputLayer(int size, 
    std::vector<HiddenNeuron> &prevLayer,   // Pass by reference here!
    std::vector<OutputNeuron>* thisLayer);

Be aware that when you take the address of an element inside a vector, you must ensure that you won't add or remove items to that vector. Exact rules are defined in the documentation (see iterator invalidation in https://en.cppreference.com/w/cpp/container/vector).

Each container has its own rules. Either learn them or look the documentation each time you want to keep pointers or iterators while modifying the container.

As mentionned in another answer, you could also use a vector of smart pointers which could be useful if you need to modifying the source vector somehow.

Phil1970
  • 2,605
  • 2
  • 14
  • 15