I have the same kind of questions that: Expression templates: improving performance in evaluating expressions?
My aim is to unroll loop of this expression
auto && intermediate = A+D*C
for(int i= 0; i<10 ;i++)
intermediate = intermediate + B
Vector result = intermediate * E
I would like to have in intermediate the whole tree of binary expression and that finally operator=(Expression ) of the class Vector run the inspection of the graph With my code it works only without the loop for ( I use the classical implementation of expression template , the one of Joel Falcou @cppcon 2015)
Edit : code compile issue due to the loop
if I uncomment the loop into the main I have the compiled error need to run with c++11
g++ -std=c++11 -O3 -fopenmp -Wall -pedantic -pthread main.cpp && ./a.out
#include <vector>
#include <iostream>
template <typename TBase, typename Derived>
struct BaseExpression
{
Derived const& self() const { return static_cast<const Derived&>(*this); }
Derived & self() { return static_cast<Derived&>(*this); }
TBase operator[](size_t szIdx) const { return self()[szIdx]; }
size_t size() const {return self().size();}
};
template <typename TBase, typename Operator, typename OP1, typename OP2>
class Binary_Expression : public BaseExpression<TBase, Binary_Expression<TBase, Operator, OP1, OP2> >
{
public:
Binary_Expression(OP1 const & a, OP2 const & b) : op1(a), op2(b){}
TBase operator[] (size_t idx) const { return op(op1[idx], op2[idx]); }
size_t size() const { return op1.size() != 0 ? op1.size() : op2.size(); }
private:
const OP1 & op1;
const OP2 & op2;
Operator op;
};
template <typename TBase >
class Vector : public BaseExpression<TBase, Vector<TBase> >
{
public:
explicit Vector(size_t szSizeN) : m_xMemory(szSizeN){}
Vector(const Vector &orig): m_xMemory()
{
this->copy(orig);
}
Vector & operator=(const Vector &orig)
{
if (&orig != this)
{
Vector temp(orig);
this->swap(temp);
}
return *this;
}
Vector & operator=(TBase factor)
{
size_t szSizeN = size();
#pragma omp parallel for
for (size_t idx = 0; idx < szSizeN; idx++)
{
m_xMemory[idx] = factor;
}
return *this;
}
template <typename Expression>
Vector(const BaseExpression<TBase, Expression> &b) :m_xMemory(b.size())
{
size_t szSizeN = size();
#pragma omp parallel for
for (size_t idx = 0; idx < szSizeN; idx++)
{
m_xMemory[idx] = b[idx];
}
}
void swap(Vector &orig)
{
using std::swap;
swap(m_xMemory, orig.m_xMemory);
}
TBase operator[] (size_t idx) const { return m_xMemory[idx]; }
TBase & operator[] (size_t idx) { return m_xMemory[idx]; }
size_t size() const { return m_xMemory.size(); }
void print()
{
size_t szSizeN = size();
for (size_t idx = 0; idx < szSizeN; idx++)
{
std::cout << "Index=" << idx << "\t" << "Value=" << m_xMemory[idx] << std::endl;
}
}
private:
void copy(const Vector &orig)
{
m_xMemory = orig.m_xMemory;
}
std::vector<TBase> m_xMemory;
};
template <typename TBase, typename E1, typename E2>
Binary_Expression<TBase, std::plus<TBase>, E1, E2> operator+(const BaseExpression<TBase, E1> & xE1, const BaseExpression< TBase, E2> & xE2)
{
return Binary_Expression<TBase, std::plus<TBase>, E1, E2>(xE1.self(), xE2.self());
}
int main()
{
Vector<double> x1(10);
Vector<double> x2(10);
Vector<double> x3(10);
x1 = 7.5;
x2 = 8.;
x3 = 4.2;
auto && intermediate = x1 + x2;
//compil error
/*
for (int i = 0; i< 10; i++)
{
intermediate = intermediate + x3;
}
*/
// inspection of the graph here
Vector<double> result = intermediate + x2;
result.print();
}
in fact in my final design I would like to write the following stuff :
Vector<double> x1(10);
Vector<double> x2(10);
Vector<double> x3(10);
x1 = 7.5;
x2 = 8.;
x3 = 4.2;
Vector<double> intermediate = x1 + x2;
for (int i = 0; i < 5; ++i)
intermediate = intermediate + x3;
Vector<double> result = x1 + x3 + intermediate;
// finally into result I have the expression tree, and evaluate method which will make the graph inspection
result.evaluate();
Thanks in advance Jonathan