Handling references to `expressions` in a `vector_binary_operation` class without unnecessary copies

Question

I'm trying to implement a simple vector class which uses expression templates in order to avoid that expressions such as vector w = x + y + z are implemented inefficiently (the implementation would first produce a temporary vector to hold x + y and then produce another vector with the elements of z added in):

namespace math
{
    template<class E>
    class expression
    {
    public:
        auto size() const {
            return static_cast<E const&>(*this).size();
        }

        auto operator[](std::size_t i) const
        {
            if (i >= size())
                throw std::length_error("");
            return static_cast<E const&>(*this)[i];
        }

        operator E&() { return static_cast<E&>(*this); }
        operator E const&() const { return static_cast<E const&>(*this); }
    }; // class expression

    template<typename T, class Allocator = std::allocator<T>>
    class vector
        : public expression<vector<T>>
    {
    private:
        using data_type = std::vector<T, Allocator>;
        data_type m_data;

    public:
        using value_type = T;
        using allocator_type = Allocator;
        using size_type = typename data_type::size_type;
        using difference_type = typename data_type::difference_type;
        using reference = typename data_type::reference;
        using const_reference = typename data_type::const_reference;
        using pointer = typename data_type::pointer ;
        using const_pointer = typename data_type::const_pointer;

        vector(size_type d)
            : m_data(d)
        { }
        vector(std::initializer_list<value_type> init)
            : m_data(init)
        { }
        template<class E>
        vector(expression<E> const& expression)
            : m_data(expression.size())
        {
            for (size_type i = 0; i < expression.size(); ++i)
                m_data[i] = expression[i];
        }

        size_type size() const {
            return m_data.size();
        }
        
        value_type  operator[](size_type i) const { return m_data[i]; }
        value_type& operator[](size_type i)       { return m_data[i]; };
    }; // class vector

    namespace detail
    {
        template<typename T>
        class scalar
            : public expression<scalar<T>>
        {
        public:
            using value_type = T;
            using allocator_type = std::allocator<void>;
            using size_type = typename std::allocator<T>::size_type;
            using difference_type = typename std::allocator<T>::difference_type;
            using reference = typename std::allocator<T>::reference;
            using const_reference = typename std::allocator<T>::const_reference;
            using pointer = typename std::allocator<T>::pointer;
            using const_pointer = typename std::allocator<T>::const_pointer;

            scalar(value_type value)
                : m_value(value)
            { }

            size_type size() const {
                return 0;
            }

            operator value_type&() { return static_cast<value_type&>(*this); }
            operator value_type const&() const { return static_cast<value_type const&>(*this); }

            value_type  operator[](size_type i) const { return m_value; }
            value_type& operator[](size_type i) { return m_value; }

        private:
            value_type m_value;
        }; // class scalar

        template<class>
        struct is_scalar : std::false_type { };

        template<class T>
        struct is_scalar<scalar<T>> : std::true_type { };
    } // namespace detail

    template<class E1, class E2, class BinaryOperation>
    class vector_binary_operation
        : public expression<vector_binary_operation<E1, E2, BinaryOperation>>
    {
    public:
        using value_type = decltype(BinaryOperation()(typename E1::value_type(), typename E2::value_type()));
        using allocator_type = std::conditional_t<
            detail::is_scalar<E1>::value,
            typename E2::allocator_type::template rebind<value_type>::other,
            typename E1::allocator_type::template rebind<value_type>::other>;

    private:
        using vector_type = vector<value_type, allocator_type>;

    public:
        using size_type = typename vector_type::size_type;
        using difference_type = typename vector_type::difference_type;
        using reference = typename vector_type::reference;
        using const_reference = typename vector_type::const_reference;
        using pointer = typename vector_type::pointer;
        using const_pointer = typename vector_type::const_pointer;

        vector_binary_operation(expression<E1> const& e1, expression<E2> const& e2, BinaryOperation op)
            : m_e1(e1), m_e2(e2),
              m_op(op)
        {
            if (e1.size() > 0 && e2.size() > 0 && !(e1.size() == e2.size()))
                throw std::logic_error("");
        }

        size_type size() const {
            return m_e1.size(); // == m_e2.size()
        }

        value_type operator[](size_type i) const {
            return m_op(m_e1[i], m_e2[i]);
        }

    private:
        E1 m_e1;
        E2 m_e2;
        //E1 const& m_e1;
        //E2 const& m_e2;
        BinaryOperation m_op;
    }; // class vector_binary_operation

    template<class E1, class E2>
    vector_binary_operation<E1, E2, std::plus<>>
    operator+(expression<E1> const& e1, expression<E2> const& e2) {
        return{ e1, e2, std::plus<>() };
    }
    template<class E1, class E2>
    vector_binary_operation<E1, E2, std::minus<>>
    operator-(expression<E1> const& e1, expression<E2> const& e2) {
        return{ e1, e2, std::minus<>() };
    }
    template<class E1, class E2>
    vector_binary_operation<E1, E2, std::multiplies<>>
    operator*(expression<E1> const& e1, expression<E2> const& e2) {
        return{ e1, e2, std::multiplies<>() };
    }
    template<class E1, class E2>
    vector_binary_operation<E1, E2, std::divides<>>
    operator/(expression<E1> const& e1, expression<E2> const& e2) {
        return{ e1, e2, std::divides<>() };
    }

    template<class E, typename T>
    vector_binary_operation<E, detail::scalar<T>, std::divides<>>
    operator/(expression<E> const& expr, T val) {
        return{ expr, detail::scalar<T>(val), std::divides<>() };
    }
    template<class E, typename T>
    vector_binary_operation<E, detail::scalar<T>, std::multiplies<>>
    operator*(T val, expression<E> const& expr) {
        return{ expr, detail::scalar<T>(val), std::multiplies<>() };
    }
    template<class E, typename T>
    vector_binary_operation<E, detail::scalar<T>, std::multiplies<>>
    operator*(expression<E> const& expr, T val) {
        return{ expr, detail::scalar<T>(val), std::multiplies<>() };
    }
} // namespace math

I don't know how I need to deal with the expression member variables in vector_binary_operation. It would make sense to declare them as const references, cause the whole point of the code is to avoid unnecessary copies. However, if we write sum = a + b + c, we will end up keeping a reference to the temporary a + b. Doing sum[0] will call operator()[0] on that temporary. But that object was deleted after the previous line.

What should I do?

`std::vector` has move semantics so there should not be any copies with the temporaries. also see: http://stackoverflow.com/questions/10720122/is-there-a-standard-way-of-moving-a-range-into-a-vector — NathanOliver, Feb 25 '16 at 13:10
@NathanOliver You've misunderstood me. What I meant is that `m_e1` and `m_e2` might contain copies of some `vector`s. — 0xbadf00d, Feb 25 '16 at 13:13
C++11 introduced move semantics, which allow you to steal the guts from a temporary object. Simplified example: [coliru](http://coliru.stacked-crooked.com/a/8d3313a4c55aeb2b). Now, if you want handle temporaries and references differently, (i.e., take ownership of temporaries, but hold non-temporaries by const reference) that might take some template fun :) — melak47, Feb 25 '16 at 13:13
@melak47 Handling temporaries and references differently seems to be a good idea, but I don't know how should do that. — 0xbadf00d, Feb 25 '16 at 13:15
I'm wondering if it's really necessary - can you manipulate your vector operations once created? If not, I don't see much harm in copying those lazily evaluated operations, and then you don't need special handling — melak47, Feb 25 '16 at 13:20
@melak47 But `m_e1` might of type `vector`. So, I copy a `vector` into `m_e1`. — 0xbadf00d, Feb 25 '16 at 13:29

davidhigh · Accepted Answer · 2016-02-25T14:28:16.540

2

I was facing a similar decision in my expression template code quite some time ago. The choice of how to store the wrapped arrays is thereby of central importance and should be made with care. But let's assume you already settled for references: that is, if you have code like

vector a;
vector b;
auto c = a + b;

a and b are stored by const-reference. This implies that as long as you use c, the original vectors have to be kept alive. (Other choices would be store-by-value, which can be expensive, or store-by-(wrapped)-shared-pointer, which let's you use c even if a or b left their scope but comes with a call overhead.)

Ok, but now as you settled for references, you obviously want to avoid references to a temporary. For that, C++ move semantics offers already anything you need:

template<typename E1, typename E2>
auto operator+(E1&& e1, E2&& e2)
{
     using value_type = decltype(e1[0] + e2[0]);
     return vector_binary_operation<E1, E2, std::plus<value_type> >
                   {std::forward<E1>(e1), std::forward<E2>(e2), std::plus<value_type>{}};
}

The difference to your code is in the type deduction of the used rvalue references E1&& and E2&&. Their purpose is to indicate whether the function got a valid and named object or a temporary. Based on this information, in this function you can make the choice how to store the passed references in the returned expression classes.

For example, if you now add two vectors

auto c = a + b;

E1 and E2 will be deduced both as vector&. On the opposite, if you have

auto d = (a + b) + b;

E1 is vector_binary_operation<vector&, vector&, std::plus<> > -- no reference here -- whereas E2 is still vector&.

edited Feb 25 '16 at 14:28

answered Feb 25 '16 at 14:14

davidhigh

14,652
2
44
75

If I'm not terribly wrong I don't need to modify anything of my code except the freestanding operators, right? I'm unsure whether or not I like that `a` and `b` need to stay alive as long as `c` is alive, but it seems like there is nothing we can do, if we don't want to use `shared_ptr` or copies, right? – 0xbadf00d Feb 25 '16 at 14:39
@davidhigh also, I believe you can just use `std::plus<>` in `operator+` and let that do the type deduction for you. – melak47 Feb 25 '16 at 14:41
@melak47 Yes, we can do that. `std::plus<>` uses the same technique to deduce the types. – 0xbadf00d Feb 25 '16 at 14:46
Yes, `std::plus<>` seems to be new since C++14, as described [here](http://en.cppreference.com/w/cpp/utility/functional/plus). – davidhigh Feb 25 '16 at 16:36
@0xbadf00d: to mention it again, the most important choice imo comes before, namely how to store the other expressions. In my code, I prefer to do that by wrapped shared-pointers, so you have no lifetime issues: The expression `a + b` will keep both summands alive. – davidhigh Feb 25 '16 at 16:39
`auto` without trailing return type is also C++14 :) – melak47 Feb 25 '16 at 18:48
@melak47 Should be no problem, since I've tagged the question with C++14 ;) Don't you think that using `shared_ptr` is a significant performance penalty? – 0xbadf00d Feb 25 '16 at 19:15
@0xbadf00d: yes, a shared-pointer can be performance penalty, particularly if it is nested. I made some tests [here](http://coliru.stacked-crooked.com/a/df74fe1d97b5f901) which stem from the motivation to [this question](http://stackoverflow.com/questions/35160431/c-safe-idiom-to-call-a-member-function-of-a-class-through-a-shared-ptr-class-m). I don't know whether the test is realistic, but it shows that one shared-pointer in the beginning and pass-by-value thereafter is not that expensive, but it's safer. – davidhigh Feb 25 '16 at 20:24
I think we still got problems. Now, `E1` (and `E2`) could be of a reference type such that `typename E1::value_type` (in `vector_binary_operation`) is not possible. We need to write `typename std::remove_reference_t::value_type`. And to be sure, we should write `std::remove_pointer_t>::value_type`. This is ugly and maybe I've forgotten another scenario where `typename E1::value_type` is not possible. Is there any type trait which will always yield the plain `vector` or `vector_binary_operation` type? – 0xbadf00d Feb 25 '16 at 20:29
@0xbadf00d: I frequently use `std::decay_t` instead of `std::remove_reference`, and you can also use `using value_type = decltype(std::declval()(std::declval(), std::declval()));`. Both won't work for pointer, but that's usually ok (why should you allow for pointers?). And, more importantly: ask yourself whether you need `value_type` at all (that holds even more for the `allocator_type`). I also started with hundreds of typedefs, but then removed those as they're hardly necessary (--and you can reconstruct them easily if needed). – davidhigh Feb 25 '16 at 20:46
@davidhigh I just wanted to be consistent with the other containers. – 0xbadf00d Feb 25 '16 at 20:48
@0xbadf00d: I know, but it's not that important in my experience. Moreover, your expression is no container, for example you can't assign elements. But still, whether to provide them or not is probably a matter of taste. – davidhigh Feb 25 '16 at 20:53
I need to change the ctor of `vector_binary_operation` to take arguments of the types `E1, E2, BinaryOperation` instead of `expression, expression, BinaryOperation` in order to make your code work. But that's not a good idea, cause now for every `foo x, y` the expression `x + y` is valid and of type `binary_vector_operation`. So, we need somehow to verify that we only declare the `operator+` for `expression`s. Any idea? – 0xbadf00d Apr 08 '16 at 11:25
@0xbadf00d: SFINAE is your friend. Set up a trait `is_expression` with which you can determine, well, whether `T` is an `expression`. Then use `std::enable_if_t` to enable your `operator+` overload (and all other operator overloads) only when the parameters are of type `expression` (otherwise use the "normal" one). Give me a sign if you need an example, then I'll edit my answer. – davidhigh Apr 08 '16 at 20:29

Handling references to `expressions` in a `vector_binary_operation` class without unnecessary copies

1 Answers1

Linked