1

EDIT: As suggested by others I post the whole code which can be run the only think you would need is to have Eigen installed.

This is the main header with three classes, in summary ADNodeInstance is a data holder, ADNode is encapsulating a pointer to ADNodeInstance, and ADGraphBuilder is a main class which holds all the ADNodeInstances and manages them.:

//
// Created by alex on 07/05/15.
//

#ifndef ADVIBE_STACK_H
#define ADVIBE_STACK_H

#include "memory"
#include "iostream"
#include "fstream"
#include "Eigen/Dense"

typedef Eigen::MatrixXd Matrix;

enum class OPERATOR {
    NONE,
    ADD,
    UNNARY_NEGATION,
    MULTIPLY,
    DOT_MULTIPLY, // Linear algebra
    VERTICAL_CONCATENATION,
    TRANSPOSE,
    SUBINDEX,
    TANH
};


enum TYPE{
    CONSTANT,
    INPUT,
    INPUT_DERIVED,
    GRADIENT
};


/**
 * Main graph classes
 */

class ADNodeInstance {
public:
    TYPE type;
    int id;
    OPERATOR op;
    int dim1, dim2;
    std::string name;
    Matrix value;
    std::vector<std::weak_ptr<ADNodeInstance>> parents;
    std::vector<std::weak_ptr<ADNodeInstance>> children;

    ADNodeInstance(TYPE type, int id, OPERATOR op, int dim1, int dim2, std::string name) :
            type(type), id(id), op(op), dim1(dim1), dim2(dim2), name(name) { };

    ADNodeInstance(TYPE type, int id, OPERATOR op, const Eigen::Ref<const Matrix> &value) :
            type(type), id(id), op(op), dim1(value.rows()), dim2(value.cols()), value(value) { };

    ADNodeInstance(TYPE type, int id, OPERATOR op, int dim1, int dim2) :
            type(type), id(id), op(op), dim1(dim1), dim2(dim2) { }

    ~ADNodeInstance() { };
};

class ADGraphBuilder;

class ADNode{
public:
    ADGraphBuilder* builder;
    std::shared_ptr<ADNodeInstance> pointer;
    ADNode(){};
    ADNode(ADGraphBuilder* builder):
            builder(builder){};
    ADNode(const ADNode& node):
            builder(node.builder), pointer(node.pointer){};
    ~ADNode() {};

    inline ADNode getVariable(OPERATOR op, const int dim1, const int dim2) const;

    inline ADNode apply(OPERATOR op) const{
        ADNode result = getVariable(op, this->pointer->dim1, this->pointer->dim2);
        result.pointer->parents.push_back(pointer);
        pointer->children.push_back(result.pointer);
        return result;
    };

    inline ADNode tanh() const{
        return apply(OPERATOR::TANH);
    }

    inline ADNode transpose() const{
        ADNode result = getVariable(OPERATOR::TRANSPOSE, this->pointer->dim2, this->pointer->dim1);
        result.pointer->parents.push_back(pointer);
        pointer->children.push_back(result.pointer);
        return result;
    }

    inline ADNode  operator-() const{
        std::cout << pointer->id << " U " << std::endl;
        std::cout << "NEG: " << builder << " - " << pointer->id << std::endl;
        ADNode node = apply(OPERATOR::UNNARY_NEGATION);
        std::cout << "NEG2: " << builder << " - " << pointer->id << std::endl;
        return node;
    };

    inline ADNode block(const int loc1, const int loc2, const int dim1, const int dim2) const {
        ADNode result = getVariable(OPERATOR::SUBINDEX, dim1, dim2);
        result.pointer->parents.push_back(pointer);
        pointer->children.push_back(result.pointer);
        return result;
    }
};



class ADGraphBuilder{
protected:
    int counter;
public:
    std::vector<ADNode> nodes;
    ADGraphBuilder():
            counter(0){};
    ~ADGraphBuilder(){};

    std::vector<ADNode> gradient(const ADNode& target, const ADNode& param);
    ADNode createGradientMessage(const ADNode& child, const ADNode& parent, const ADNode& directGradient);

    inline ADNode createInternalVariable(const TYPE type, const OPERATOR op, const int dim1, const int dim2) {
        ADNode result = ADNode(this);
        result.pointer = std::make_shared<ADNodeInstance>(type, counter++, op, dim1, dim2);
        nodes.push_back(result);
        return result;
    }

    inline ADNode createInternalVariable(const TYPE type, const OPERATOR op, const int dim1, const int dim2, const std::string name) {
        ADNode result = ADNode(this);
        result.pointer = std::make_shared<ADNodeInstance>(type, counter++, op, dim1, dim2, name);
        nodes.push_back(result);
        return result;
    }

    inline ADNode createConstantVariable(const Eigen::Ref<const Matrix>& value){
        ADNode result = ADNode(this);
        result.pointer = std::make_shared<ADNodeInstance>(TYPE::CONSTANT, counter++, OPERATOR::NONE , value);
        nodes.push_back(result);
        return result;
    }

    inline ADNode createVariable(const int dim1, const int dim2){
        ADNode result = ADNode(this);
        result.pointer = std::make_shared<ADNodeInstance>(TYPE::INPUT, counter++, OPERATOR::NONE , dim1, dim2);
        nodes.push_back(result);
        return result;
    }

};

inline ADNode ADNode::getVariable(OPERATOR op, const int dim1, const int dim2) const{
    if(this->pointer->type == GRADIENT)
        return builder->createInternalVariable(GRADIENT, op, dim1, dim2);
    else
        return builder->createInternalVariable(INPUT_DERIVED, op, dim1, dim2);
}

/**
 * Concatenation
 */

inline ADNode vertCat(const ADNode& lhs, const ADNode& rhs) {
    if (lhs.builder != rhs.builder) {
        std::cerr << "Can not operate with notes from different builder instances!" << std::endl;
        throw 1;
    }
    if (lhs.pointer->dim2 != rhs.pointer->dim2) {
        std::cerr << "Can not perform vertical concatanation on nodes with different size in dimension 2." << std::endl;
        throw 2;
    }

    ADNode result = lhs.builder->createInternalVariable(INPUT_DERIVED, OPERATOR::VERTICAL_CONCATENATION,
                                                        lhs.pointer->dim1 + rhs.pointer->dim1, lhs.pointer->dim2);
    result.pointer->parents.push_back(lhs.pointer);
    result.pointer->parents.push_back(rhs.pointer);
    lhs.pointer->children.push_back(result.pointer);
    rhs.pointer->children.push_back(result.pointer);
    return result;
}

inline ADNode vertCat(const ADNode& lhs, const double rhs) {
    return vertCat(lhs, lhs.builder->createConstantVariable(Matrix::Constant(1,1, rhs)));
}

/**
 * Joining nodes
 */
ADNode joinNodes(const OPERATOR op, const ADNode& lhs, const ADNode& rhs){
    if(lhs.builder != rhs.builder){
        std::cerr << "Different builders " << std::endl;
        throw 5;
    }
    ADNode  result = lhs.builder->createInternalVariable(INPUT_DERIVED, op , 1, 1);
    result.pointer->parents.push_back(lhs.pointer);
    result.pointer->parents.push_back(rhs.pointer);
    lhs.pointer->children.push_back(result.pointer);
    rhs.pointer->children.push_back(result.pointer);
    return result;
}

inline ADNode dot(const ADNode& lhs, const ADNode& rhs){
    return joinNodes(OPERATOR::DOT_MULTIPLY, lhs, rhs);
}

inline ADNode tanh(const ADNode& lhs){
    return lhs.tanh();
}

inline ADNode operator*(const ADNode& lhs, const ADNode& rhs){
    std::cout << "MULT: " << rhs.builder << " - " << rhs.pointer->id << std::endl;
    ADNode node = joinNodes(OPERATOR::MULTIPLY, lhs, rhs);
    std::cout << "MULT2: " << rhs.builder << " - " << rhs.pointer->id << std::endl;
    return node;
}

inline ADNode operator+(const ADNode& lhs, const ADNode& rhs){
    std::cout << "ADD: " << rhs.builder << " - " << rhs.pointer->id << std::endl;
    ADNode node = joinNodes(OPERATOR::ADD, lhs, rhs);
    std::cout << "ADD2: " << rhs.builder << " - " << rhs.pointer->id << std::endl;
    return node;
}

inline ADNode operator-(const double lhs, const ADNode& rhs){
    return rhs.builder->createConstantVariable(Matrix::Constant(1,1,lhs)) + (-rhs);
}

inline int getIndex(const std::vector<std::weak_ptr<ADNodeInstance>> vector, const ADNode& target){
    for (int i = 0; i < vector.size(); ++i) {
        auto item = vector[i].lock();
        if (item->id == target.pointer->id)
            return i;
    }
    return -1;
}


std::vector<ADNode> ADGraphBuilder::gradient(const ADNode &target, const ADNode& params) {
    std::vector<ADNode> results{1, this};
    // Will contain information about the subtree that effects the target node
    bool subtree[nodes.size()];
    std::vector<std::vector<ADNode>> childrenGradients(nodes.size(), std::vector<ADNode>{});
    std::vector<ADNode> directGradients{nodes.size(), this};
    subtree[target.pointer->id] = true;
    for (int i = target.pointer->id; i >= 0; i--) {
        std::cout << "passed " << i << std::endl;
        if(subtree[i]){
            // Create a new gradient node summing up all incoming gradient messages
            if(i == target.pointer->id) {
                directGradients[i] = createConstantVariable(Matrix::Ones(target.pointer->dim1, target.pointer->dim2));
                directGradients[i].pointer->name = "Gradient[" + std::to_string(i) + "]";
            }
            else
                directGradients[i] = createInternalVariable(GRADIENT, OPERATOR::ADD,
                                                            nodes[i].pointer->dim1, nodes[i].pointer->dim2, std::to_string(i));
            for (auto child : childrenGradients[i]) {
                directGradients[i].pointer->parents.push_back(child.pointer);
                child.pointer->children.push_back(directGradients[i].pointer);
            }
            for (auto item : nodes[i].pointer->parents) {
                auto parent = item.lock();
                // Check if parent is not constant
                if(parent->type != CONSTANT) {
                    // Mark parent as node to traverse
                    subtree[parent->id] = true;
                    // Create the corresponding message as a new node and add it to the parents list
                    // of gradient incoming messages
                    ADNode capsule = ADNode(this);
                    capsule.pointer = parent;
                    //childrenGradients[parent->id].push_back(createConstantVariable(Matrix::Constant(1,1,1)));
                    std::cout << "Capsule: " << capsule.builder << " - " << capsule.pointer->id << std::endl;
                    childrenGradients[parent->id].push_back(createGradientMessage(nodes[i], capsule, directGradients[i]));
                    std::cout << "Capsule: " << capsule.builder << " - " << capsule.pointer->id << std::endl;
                    //childrenGradients[parent->id].back().pointer->name = "Gradinet[" + childrenGradients[parent->id].back().pointer->name + "]";
                }
            }
        }
    }
    return results;
};

ADNode ADGraphBuilder::createGradientMessage(const ADNode& child, const ADNode& parent, const ADNode& directGradient){
    switch (child.pointer->op){
        case(OPERATOR::NONE):
                std::cout << "Making gradient of an INPUT node? Are you serious?";
            return createInternalVariable(CONSTANT, OPERATOR::NONE, child.pointer->dim1,child.pointer->dim2,
                                          std::to_string(child.pointer->id) + "->" + std::to_string(parent.pointer->id));
        case(OPERATOR::ADD):
            return directGradient;
        case(OPERATOR::UNNARY_NEGATION):
            return -directGradient;
        case(OPERATOR::DOT_MULTIPLY):
            if(child.pointer->parents.size() == 2){
                int index = getIndex(child.pointer->parents, parent);
                ADNode otherParent(child.builder);
                otherParent.pointer = child.pointer->parents[1-index].lock();
                if(index == 0)
                    return dot(directGradient,otherParent.transpose());
                else
                    return dot(otherParent.transpose(),directGradient);
            }
            else{
                std::cerr<< "Unimplemented" << std::endl;
                return NULL;
            }
        case(OPERATOR::VERTICAL_CONCATENATION): {
            int index = getIndex(child.pointer->parents, parent);
            int start = 0;
            for (int i = 0; i < index; ++i) {
                auto parent = child.pointer->parents[i].lock();
                start += parent->dim1;
            }
            return directGradient.block(start, 0, parent.pointer->dim1, parent.pointer->dim2);
        }
        case(OPERATOR::TANH):
            return directGradient * child * (1 - child);
    }
}


#endif //ADVIBE_STACK_H

The main program:

#include <iostream>
#include "Eigen/Dense"
#include "stack.h"

int main() {

    ADGraphBuilder builder;
    ADNode param1 = builder.createVariable(2,1);
    ADNode param2 = builder.createVariable(2,3);
    Matrix sad(1,1);
    sad << 2;
    ADNode h = tanh(dot(param2, vertCat(param1,1)));
    for (int i = 0; i < 0; ++i) {
        h = tanh(dot(param2,vertCat(h,1)));
    }
    ADNode e = dot(h.transpose(), h);
    auto grad = builder.gradient(e, param2);
    std::cout << "(" << e.pointer->dim1 << ", " << e.pointer->dim2 << ")" << std::endl;
    return 0;
}

When I ran it the output I get is the following:

passed 7
Capsule: 0x7ffffff7a750 - 6
Capsule: 0x7ffffff7a750 - 6
Capsule: 0x7ffffff7a750 - 5
Capsule: 0x7ffffff7a750 - 5
passed 6
Capsule: 0x7ffffff7a750 - 5
Capsule: 0x7ffffff7a750 - 5
passed 5
Capsule: 0x7ffffff7a750 - 4
5 U 
NEG: 0x7ffffff7a750 - 5
NEG2: 0x7ffffff7a750 - 5
ADD: 0x7ffffff7a750 - 15
ADD2: 0x7ffffff7a750 - 15
MULT: 0 - 5
Different builders 
terminate called after throwing an instance of 'int'

Process finished with exit code 134

According to this this mean that in the expression in function ADGraphBuilder::createGradientMessage for TANH for some reason the expression child * (1 - child) after the right operand is evaluated the child variable is garbage collected or I don't know?

Any ideas and suggestions are welcome and still thanks to Azad and Whoz for the discussion so far.

Alex Botev
  • 1,369
  • 2
  • 19
  • 34
  • Looks like "ADNode result = ADNode(this)" makes suspicious modifications – madduci May 07 '15 at 04:21
  • 1
    While you're at it, Include the code for the ADNode constructor takes `ADNode(this)`. That isn't a "normal" construct. And of course, copy and move ctors and assignment ops are probably worth looking at. – WhozCraig May 07 '15 at 04:22
  • I've correct it. That is the constructor since it is a member function of the builder class. Hope what I updated makes it more sensible. – Alex Botev May 07 '15 at 04:30
  • That constructor taking a pointer to `ADGraphBuilder` worries me. What if the pointer is to an object that is destructed before the `ADNode` object is destructed, like a temporary object? That would leave you with a stale pointer and probable UB. – Some programmer dude May 07 '15 at 04:36
  • I agree `ADNode(this)` is suspicious. `this` is a pointer and you're casting it. Did you meant `ADNode(*this)` ? – Azad May 07 '15 at 04:38
  • I agree, but nevertheless the object is not destroyed. The builder itself is the only thing that actually holds a shared_ptr to the ADNodeInstances, so if it get destroyed everything else does (It is essentially like my management system) so in my case I can assume it is living – Alex Botev May 07 '15 at 04:39
  • @Azad what do you mean I'm casting it? Probably you were looking at before when I did not included that the function is a member of the class ADGraphBuilder or? – Alex Botev May 07 '15 at 04:42
  • Well, based on it's implementation and how you're using `builder` and `pointer`, the `ADNode` copy-ctor isn't required at all (if I'm reading this correctly). The default would suffice. Not that it addresses your problem, but just wanted to mention that is the behavior you've coded (and it sounds like you're probably aware of that). Is `ADNodeInstance` derived from `ADNode`, perchance? – WhozCraig May 07 '15 at 04:56
  • Could you post your main function as well? – Azad May 07 '15 at 04:56
  • Correct me if I'm wrong, but if this is correct: http://www.tutorialspoint.com/cplusplus/unary_operators_overloading.htm it seems to me that the unary operator in fact stores the returned value in the variable that it was invoked on. E.g. the unary negation will try to somehow modify the rhs reference rather than create a new one? – Alex Botev May 07 '15 at 04:57
  • I posted the main. The interesting thing though is that it does not fail on the first but on almost the last operation of the gradient function and somehow works fine for the previous. (E.g. this is when taking gradient of tanh which is (1 - node) * node) – Alex Botev May 07 '15 at 05:07
  • @Belov only if you code it to do so, and imho, that sample linked is *dreadful*. You don't expect `int y = 1, x = -y;` to end with `y == -1`. It at least appears you're trying to do [the right thing](http://stackoverflow.com/questions/2155275/how-to-overload-unary-minus-operator-in-c). My advise at this point would be to slam output or breakpoints in the only constructors you've defined and log the values being brought to them. You'll likely do the same for the invoke-points as well. – WhozCraig May 07 '15 at 05:08
  • In that example ctor is in return. most compilers do something called return value optimization. that object will be constructed outside the function. but you're constructing inside, returning it, destructing it (it goes out of scope), creating another one as temporary outside and using that. There's a book called 'More Effective C++' by Scott Meyers. See item 20. – Azad May 07 '15 at 05:12
  • @Azad can you suggest away to do this? Just to make it clear I need to return by value not reference, since if I return by reference things like a*b*c would not evaluate because of the temporary - that's why I'm returning by value. Otherwise I did started with references, but for the users I need these things to work. – Alex Botev May 07 '15 at 05:15
  • Ok, I'll try to cut some code which to reproduce the problem, which to be fully runnable, but you would still need Eigen installed. Gona post it a bit later as it is 6 am and I wana go to bed. Thanks for now if you spot something please post a comment/answer. If not would be very nice if you later today after I post it you can have a look, maybe you'll be more successfull in it. – Alex Botev May 07 '15 at 05:22
  • I think it's the other way around. If you return an object. It will be unnamed and temporary. If you return a reference you can use it in other operations although it should be constant reference for some of your operators. – Azad May 07 '15 at 05:27
  • If you want you can try it, it will complain you can't construct a reference to a temporary object. – Alex Botev May 07 '15 at 05:30
  • Try this: inline const ADNode& operator-(const double lhs, const ADNode& rhs){ std::cout << lhs <<"d - " << rhs.pointer->id << " " << std::endl; ADNode& adNode = rhs.builder->createConstantVariable(Matrix::Constant(1,1,lhs)) + (-rhs); std::cout << lhs <<"d - " << rhs.pointer->id << " " << adNode.pointer->id << std::endl; return adNode; } – Azad May 07 '15 at 05:35
  • You really should send a complete program – Azad May 07 '15 at 05:40

0 Answers0