EDIT: As suggested by others I post the whole code which can be run the only think you would need is to have Eigen installed.
This is the main header with three classes, in summary ADNodeInstance is a data holder, ADNode is encapsulating a pointer to ADNodeInstance, and ADGraphBuilder is a main class which holds all the ADNodeInstances and manages them.:
//
// Created by alex on 07/05/15.
//
#ifndef ADVIBE_STACK_H
#define ADVIBE_STACK_H
#include "memory"
#include "iostream"
#include "fstream"
#include "Eigen/Dense"
typedef Eigen::MatrixXd Matrix;
enum class OPERATOR {
NONE,
ADD,
UNNARY_NEGATION,
MULTIPLY,
DOT_MULTIPLY, // Linear algebra
VERTICAL_CONCATENATION,
TRANSPOSE,
SUBINDEX,
TANH
};
enum TYPE{
CONSTANT,
INPUT,
INPUT_DERIVED,
GRADIENT
};
/**
* Main graph classes
*/
class ADNodeInstance {
public:
TYPE type;
int id;
OPERATOR op;
int dim1, dim2;
std::string name;
Matrix value;
std::vector<std::weak_ptr<ADNodeInstance>> parents;
std::vector<std::weak_ptr<ADNodeInstance>> children;
ADNodeInstance(TYPE type, int id, OPERATOR op, int dim1, int dim2, std::string name) :
type(type), id(id), op(op), dim1(dim1), dim2(dim2), name(name) { };
ADNodeInstance(TYPE type, int id, OPERATOR op, const Eigen::Ref<const Matrix> &value) :
type(type), id(id), op(op), dim1(value.rows()), dim2(value.cols()), value(value) { };
ADNodeInstance(TYPE type, int id, OPERATOR op, int dim1, int dim2) :
type(type), id(id), op(op), dim1(dim1), dim2(dim2) { }
~ADNodeInstance() { };
};
class ADGraphBuilder;
class ADNode{
public:
ADGraphBuilder* builder;
std::shared_ptr<ADNodeInstance> pointer;
ADNode(){};
ADNode(ADGraphBuilder* builder):
builder(builder){};
ADNode(const ADNode& node):
builder(node.builder), pointer(node.pointer){};
~ADNode() {};
inline ADNode getVariable(OPERATOR op, const int dim1, const int dim2) const;
inline ADNode apply(OPERATOR op) const{
ADNode result = getVariable(op, this->pointer->dim1, this->pointer->dim2);
result.pointer->parents.push_back(pointer);
pointer->children.push_back(result.pointer);
return result;
};
inline ADNode tanh() const{
return apply(OPERATOR::TANH);
}
inline ADNode transpose() const{
ADNode result = getVariable(OPERATOR::TRANSPOSE, this->pointer->dim2, this->pointer->dim1);
result.pointer->parents.push_back(pointer);
pointer->children.push_back(result.pointer);
return result;
}
inline ADNode operator-() const{
std::cout << pointer->id << " U " << std::endl;
std::cout << "NEG: " << builder << " - " << pointer->id << std::endl;
ADNode node = apply(OPERATOR::UNNARY_NEGATION);
std::cout << "NEG2: " << builder << " - " << pointer->id << std::endl;
return node;
};
inline ADNode block(const int loc1, const int loc2, const int dim1, const int dim2) const {
ADNode result = getVariable(OPERATOR::SUBINDEX, dim1, dim2);
result.pointer->parents.push_back(pointer);
pointer->children.push_back(result.pointer);
return result;
}
};
class ADGraphBuilder{
protected:
int counter;
public:
std::vector<ADNode> nodes;
ADGraphBuilder():
counter(0){};
~ADGraphBuilder(){};
std::vector<ADNode> gradient(const ADNode& target, const ADNode& param);
ADNode createGradientMessage(const ADNode& child, const ADNode& parent, const ADNode& directGradient);
inline ADNode createInternalVariable(const TYPE type, const OPERATOR op, const int dim1, const int dim2) {
ADNode result = ADNode(this);
result.pointer = std::make_shared<ADNodeInstance>(type, counter++, op, dim1, dim2);
nodes.push_back(result);
return result;
}
inline ADNode createInternalVariable(const TYPE type, const OPERATOR op, const int dim1, const int dim2, const std::string name) {
ADNode result = ADNode(this);
result.pointer = std::make_shared<ADNodeInstance>(type, counter++, op, dim1, dim2, name);
nodes.push_back(result);
return result;
}
inline ADNode createConstantVariable(const Eigen::Ref<const Matrix>& value){
ADNode result = ADNode(this);
result.pointer = std::make_shared<ADNodeInstance>(TYPE::CONSTANT, counter++, OPERATOR::NONE , value);
nodes.push_back(result);
return result;
}
inline ADNode createVariable(const int dim1, const int dim2){
ADNode result = ADNode(this);
result.pointer = std::make_shared<ADNodeInstance>(TYPE::INPUT, counter++, OPERATOR::NONE , dim1, dim2);
nodes.push_back(result);
return result;
}
};
inline ADNode ADNode::getVariable(OPERATOR op, const int dim1, const int dim2) const{
if(this->pointer->type == GRADIENT)
return builder->createInternalVariable(GRADIENT, op, dim1, dim2);
else
return builder->createInternalVariable(INPUT_DERIVED, op, dim1, dim2);
}
/**
* Concatenation
*/
inline ADNode vertCat(const ADNode& lhs, const ADNode& rhs) {
if (lhs.builder != rhs.builder) {
std::cerr << "Can not operate with notes from different builder instances!" << std::endl;
throw 1;
}
if (lhs.pointer->dim2 != rhs.pointer->dim2) {
std::cerr << "Can not perform vertical concatanation on nodes with different size in dimension 2." << std::endl;
throw 2;
}
ADNode result = lhs.builder->createInternalVariable(INPUT_DERIVED, OPERATOR::VERTICAL_CONCATENATION,
lhs.pointer->dim1 + rhs.pointer->dim1, lhs.pointer->dim2);
result.pointer->parents.push_back(lhs.pointer);
result.pointer->parents.push_back(rhs.pointer);
lhs.pointer->children.push_back(result.pointer);
rhs.pointer->children.push_back(result.pointer);
return result;
}
inline ADNode vertCat(const ADNode& lhs, const double rhs) {
return vertCat(lhs, lhs.builder->createConstantVariable(Matrix::Constant(1,1, rhs)));
}
/**
* Joining nodes
*/
ADNode joinNodes(const OPERATOR op, const ADNode& lhs, const ADNode& rhs){
if(lhs.builder != rhs.builder){
std::cerr << "Different builders " << std::endl;
throw 5;
}
ADNode result = lhs.builder->createInternalVariable(INPUT_DERIVED, op , 1, 1);
result.pointer->parents.push_back(lhs.pointer);
result.pointer->parents.push_back(rhs.pointer);
lhs.pointer->children.push_back(result.pointer);
rhs.pointer->children.push_back(result.pointer);
return result;
}
inline ADNode dot(const ADNode& lhs, const ADNode& rhs){
return joinNodes(OPERATOR::DOT_MULTIPLY, lhs, rhs);
}
inline ADNode tanh(const ADNode& lhs){
return lhs.tanh();
}
inline ADNode operator*(const ADNode& lhs, const ADNode& rhs){
std::cout << "MULT: " << rhs.builder << " - " << rhs.pointer->id << std::endl;
ADNode node = joinNodes(OPERATOR::MULTIPLY, lhs, rhs);
std::cout << "MULT2: " << rhs.builder << " - " << rhs.pointer->id << std::endl;
return node;
}
inline ADNode operator+(const ADNode& lhs, const ADNode& rhs){
std::cout << "ADD: " << rhs.builder << " - " << rhs.pointer->id << std::endl;
ADNode node = joinNodes(OPERATOR::ADD, lhs, rhs);
std::cout << "ADD2: " << rhs.builder << " - " << rhs.pointer->id << std::endl;
return node;
}
inline ADNode operator-(const double lhs, const ADNode& rhs){
return rhs.builder->createConstantVariable(Matrix::Constant(1,1,lhs)) + (-rhs);
}
inline int getIndex(const std::vector<std::weak_ptr<ADNodeInstance>> vector, const ADNode& target){
for (int i = 0; i < vector.size(); ++i) {
auto item = vector[i].lock();
if (item->id == target.pointer->id)
return i;
}
return -1;
}
std::vector<ADNode> ADGraphBuilder::gradient(const ADNode &target, const ADNode& params) {
std::vector<ADNode> results{1, this};
// Will contain information about the subtree that effects the target node
bool subtree[nodes.size()];
std::vector<std::vector<ADNode>> childrenGradients(nodes.size(), std::vector<ADNode>{});
std::vector<ADNode> directGradients{nodes.size(), this};
subtree[target.pointer->id] = true;
for (int i = target.pointer->id; i >= 0; i--) {
std::cout << "passed " << i << std::endl;
if(subtree[i]){
// Create a new gradient node summing up all incoming gradient messages
if(i == target.pointer->id) {
directGradients[i] = createConstantVariable(Matrix::Ones(target.pointer->dim1, target.pointer->dim2));
directGradients[i].pointer->name = "Gradient[" + std::to_string(i) + "]";
}
else
directGradients[i] = createInternalVariable(GRADIENT, OPERATOR::ADD,
nodes[i].pointer->dim1, nodes[i].pointer->dim2, std::to_string(i));
for (auto child : childrenGradients[i]) {
directGradients[i].pointer->parents.push_back(child.pointer);
child.pointer->children.push_back(directGradients[i].pointer);
}
for (auto item : nodes[i].pointer->parents) {
auto parent = item.lock();
// Check if parent is not constant
if(parent->type != CONSTANT) {
// Mark parent as node to traverse
subtree[parent->id] = true;
// Create the corresponding message as a new node and add it to the parents list
// of gradient incoming messages
ADNode capsule = ADNode(this);
capsule.pointer = parent;
//childrenGradients[parent->id].push_back(createConstantVariable(Matrix::Constant(1,1,1)));
std::cout << "Capsule: " << capsule.builder << " - " << capsule.pointer->id << std::endl;
childrenGradients[parent->id].push_back(createGradientMessage(nodes[i], capsule, directGradients[i]));
std::cout << "Capsule: " << capsule.builder << " - " << capsule.pointer->id << std::endl;
//childrenGradients[parent->id].back().pointer->name = "Gradinet[" + childrenGradients[parent->id].back().pointer->name + "]";
}
}
}
}
return results;
};
ADNode ADGraphBuilder::createGradientMessage(const ADNode& child, const ADNode& parent, const ADNode& directGradient){
switch (child.pointer->op){
case(OPERATOR::NONE):
std::cout << "Making gradient of an INPUT node? Are you serious?";
return createInternalVariable(CONSTANT, OPERATOR::NONE, child.pointer->dim1,child.pointer->dim2,
std::to_string(child.pointer->id) + "->" + std::to_string(parent.pointer->id));
case(OPERATOR::ADD):
return directGradient;
case(OPERATOR::UNNARY_NEGATION):
return -directGradient;
case(OPERATOR::DOT_MULTIPLY):
if(child.pointer->parents.size() == 2){
int index = getIndex(child.pointer->parents, parent);
ADNode otherParent(child.builder);
otherParent.pointer = child.pointer->parents[1-index].lock();
if(index == 0)
return dot(directGradient,otherParent.transpose());
else
return dot(otherParent.transpose(),directGradient);
}
else{
std::cerr<< "Unimplemented" << std::endl;
return NULL;
}
case(OPERATOR::VERTICAL_CONCATENATION): {
int index = getIndex(child.pointer->parents, parent);
int start = 0;
for (int i = 0; i < index; ++i) {
auto parent = child.pointer->parents[i].lock();
start += parent->dim1;
}
return directGradient.block(start, 0, parent.pointer->dim1, parent.pointer->dim2);
}
case(OPERATOR::TANH):
return directGradient * child * (1 - child);
}
}
#endif //ADVIBE_STACK_H
The main program:
#include <iostream>
#include "Eigen/Dense"
#include "stack.h"
int main() {
ADGraphBuilder builder;
ADNode param1 = builder.createVariable(2,1);
ADNode param2 = builder.createVariable(2,3);
Matrix sad(1,1);
sad << 2;
ADNode h = tanh(dot(param2, vertCat(param1,1)));
for (int i = 0; i < 0; ++i) {
h = tanh(dot(param2,vertCat(h,1)));
}
ADNode e = dot(h.transpose(), h);
auto grad = builder.gradient(e, param2);
std::cout << "(" << e.pointer->dim1 << ", " << e.pointer->dim2 << ")" << std::endl;
return 0;
}
When I ran it the output I get is the following:
passed 7
Capsule: 0x7ffffff7a750 - 6
Capsule: 0x7ffffff7a750 - 6
Capsule: 0x7ffffff7a750 - 5
Capsule: 0x7ffffff7a750 - 5
passed 6
Capsule: 0x7ffffff7a750 - 5
Capsule: 0x7ffffff7a750 - 5
passed 5
Capsule: 0x7ffffff7a750 - 4
5 U
NEG: 0x7ffffff7a750 - 5
NEG2: 0x7ffffff7a750 - 5
ADD: 0x7ffffff7a750 - 15
ADD2: 0x7ffffff7a750 - 15
MULT: 0 - 5
Different builders
terminate called after throwing an instance of 'int'
Process finished with exit code 134
According to this this mean that in the expression in function ADGraphBuilder::createGradientMessage for TANH for some reason the expression child * (1 - child) after the right operand is evaluated the child variable is garbage collected or I don't know?
Any ideas and suggestions are welcome and still thanks to Azad and Whoz for the discussion so far.