How to do tuple reduction in thrust (also return a tuple)?

Question

Say I have a tuple consisting of two thrust::device_vector. I want the output to be a tuple consisting of two scalars, which are the sum of two vectors respectively. For example,

input tuple consisting of two vectors:
a: 3, 5, 2 
b: 6, 1, 7
output tuple consisting of two scalars:
10
14

I think this should be extremely easy but somehow I still haven't figure out a way to do so. My code

#include <iostream>
#include <stdlib.h>
#include <thrust/device_vector.h>
#include <thrust/reduce.h>
#include <thrust/transform.h>
#include <thrust/tuple.h>
#include <thrust/transform_reduce.h>
#include <thrust/iterator/zip_iterator.h>

template<typename T>
struct TestTuplePlus
{
    __host__ __device__
    thrust::tuple<T, T> operator()(thrust::tuple<T, T>& t0, thrust::tuple<T, T>& t1)
    {
            return thrust::make_tuple(thrust::get<0>(t0) + thrust::get<0>(t1), thrust::get<1>(t0) + thrust::get<1>(t1));
    }
};

int main()
{
  thrust::device_vector<float> a(3, 0);
  thrust::device_vector<float> b(3, 0);

  a[0] = 3;
  a[1] = 5;
  a[2] = 2;
  b[0] = 6;
  b[1] = 1;
  b[2] = 7;

  auto begin = thrust::make_zip_iterator(thrust::make_tuple(a.begin(), b.begin()));
  auto end = thrust::make_zip_iterator(thrust::make_tuple(a.end(), b.end()));

  // reduce to a tuple
  thrust::tuple<float, float> result = thrust::reduce(begin, end, thrust::make_tuple<float,float>(0,0), TestTuplePlus<float>()); // produce compilation error

  return 0;
}

The compilation error:

/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/detail/function.h(96): error: function "TestTuplePlus<T>::operator() [with T=float]" cannot be called with the given argument list
            argument types are: (thrust::tuple<float, float, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>, thrust::detail::tuple_of_iterator_references<float &, float &, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>)
            object type is: TestTuplePlus<float>
          detected during:
            instantiation of "Result thrust::detail::wrapped_function<Function, Result>::operator()(Argument1 &, const Argument2 &) const [with Function=TestTuplePlus<float>, Result=thrust::tuple<float, float, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>, Argument1=thrust::tuple<float, float, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>, Argument2=thrust::detail::tuple_of_iterator_references<thrust::device_reference<float>, thrust::device_reference<float>, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>]" 
/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/system/detail/sequential/reduce.h(61): here
            instantiation of "OutputType thrust::system::detail::sequential::reduce(thrust::system::detail::sequential::execution_policy<DerivedPolicy> &, InputIterator, InputIterator, OutputType, BinaryFunction) [with DerivedPolicy=thrust::detail::seq_t, InputIterator=thrust::zip_iterator<thrust::tuple<thrust::detail::normal_iterator<thrust::device_ptr<float>>, thrust::detail::normal_iterator<thrust::device_ptr<float>>, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>, OutputType=thrust::tuple<float, float, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>, BinaryFunction=TestTuplePlus<float>]" 
/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/detail/reduce.inl(71): here
            instantiation of "T thrust::reduce(const thrust::detail::execution_policy_base<DerivedPolicy> &, InputIterator, InputIterator, T, BinaryFunction) [with DerivedPolicy=thrust::detail::seq_t, InputIterator=thrust::zip_iterator<thrust::tuple<thrust::detail::normal_iterator<thrust::device_ptr<float>>, thrust::detail::normal_iterator<thrust::device_ptr<float>>, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>, T=thrust::tuple<float, float, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>, BinaryFunction=TestTuplePlus<float>]" 
/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/system/cuda/detail/reduce.h(1022): here
            instantiation of "T thrust::cuda_cub::reduce_n(thrust::cuda_cub::execution_policy<Derived> &, InputIt, Size, T, BinaryOp) [with Derived=thrust::cuda_cub::tag, InputIt=thrust::zip_iterator<thrust::tuple<thrust::detail::normal_iterator<thrust::device_ptr<float>>, thrust::detail::normal_iterator<thrust::device_ptr<float>>, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>, Size=signed long, T=thrust::tuple<float, float, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>, BinaryOp=TestTuplePlus<float>]" 
/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/system/cuda/detail/reduce.h(1037): here
            instantiation of "T thrust::cuda_cub::reduce(thrust::cuda_cub::execution_policy<Derived> &, InputIt, InputIt, T, BinaryOp) [with Derived=thrust::cuda_cub::tag, InputIt=thrust::zip_iterator<thrust::tuple<thrust::detail::normal_iterator<thrust::device_ptr<float>>, thrust::detail::normal_iterator<thrust::device_ptr<float>>, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>, T=thrust::tuple<float, float, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>, BinaryOp=TestTuplePlus<float>]" 
/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/detail/reduce.inl(71): here
            instantiation of "T thrust::reduce(const thrust::detail::execution_policy_base<DerivedPolicy> &, InputIterator, InputIterator, T, BinaryFunction) [with DerivedPolicy=thrust::cuda_cub::tag, InputIterator=thrust::zip_iterator<thrust::tuple<thrust::detail::normal_iterator<thrust::device_ptr<float>>, thrust::detail::normal_iterator<thrust::device_ptr<float>>, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>, T=thrust::tuple<float, float, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>, BinaryFunction=TestTuplePlus<float>]" 
/usr/local/cuda/bin/../targets/x86_64-linux/include/thrust/detail/reduce.inl(186): here
            instantiation of "T thrust::reduce(InputIterator, InputIterator, T, BinaryFunction) [with InputIterator=thrust::zip_iterator<thrust::tuple<thrust::detail::normal_iterator<thrust::device_ptr<float>>, thrust::detail::normal_iterator<thrust::device_ptr<float>>, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>>, T=thrust::tuple<float, float, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>, BinaryFunction=TestTuplePlus<float>]"

To be honest, I have no idea how to fix it.

I actually find this post, but I haven't got it compiled either.

Anyway, is there an easy way to do tuple reduction in cuda?

Your original code compiles fine for me, with cuda-10.1.243. — roy li, Aug 18 '21 at 19:24

Robert Crovella · Answer 1 · 2020-05-20T01:15:39.487

The compile error arises, strangely enough, due to the fact that thrust is actually passing a different tuple type for the first and second arguments to your functor. This can be deduced from this difference:

error: function "TestTuplePlus<T>::operator() [with T=float]" cannot be called with the given argument list
        argument types are: (thrust::tuple<float, float, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type, thrust::null_type>, thrust::detail::tuple_of_iterator_references<float &, float &, thrust::null_type, ...

For the first argument we are told:

argument types are: (thrust::tuple<float, float, thrust::null_type, ...

For the second argument we are told:

thrust::detail::tuple_of_iterator_references<float &, float &, thrust::null_type, ...

The first tuple contains two float quantities. The second tuple contains two float references. These are not the same type package. As a result, there is not one single adaptation of:

thrust::tuple<T, T>

which can conform to both types. Therefore there is no possible single instantiation of your templated functor that can conform to both.

We can work around this by allowing the templated functor to have two templated types, one for each argument. The code below demonstrates one possible solution:

$ cat t1727.cu
#include <iostream>
#include <stdlib.h>
#include <thrust/device_vector.h>
#include <thrust/reduce.h>
#include <thrust/transform.h>
#include <thrust/tuple.h>
#include <thrust/transform_reduce.h>
#include <thrust/iterator/zip_iterator.h>

struct TestTuplePlus
{
    template<typename T, typename T1>
    __host__ __device__
    thrust::tuple<T, T> operator()(thrust::tuple<T, T> t0, thrust::tuple<T1, T1> t1)
    {
            return thrust::make_tuple(thrust::get<0>(t0) + thrust::get<0>(t1), thrust::get<1>(t0) + thrust::get<1>(t1));
    }
};

int main()
{
  thrust::device_vector<float> a(3, 0);
  thrust::device_vector<float> b(3, 0);

  a[0] = 3;
  a[1] = 5;
  a[2] = 2;
  b[0] = 6;
  b[1] = 1;
  b[2] = 7;

  auto begin = thrust::make_zip_iterator(thrust::make_tuple(a.begin(), b.begin()));
  auto end = thrust::make_zip_iterator(thrust::make_tuple(a.end(), b.end()));

  // reduce to a tuple
  thrust::tuple<float, float> result = thrust::reduce(begin, end, thrust::make_tuple<float,float>(0,0), TestTuplePlus()); // produce compilation error
  std::cout << "a sum: " << thrust::get<0>(result) << " b sum: " << thrust::get<1>(result) << std::endl;
  return 0;
}
$ nvcc -std=c++11 t1727.cu -o t1727
$ ./t1727
a sum: 10 b sum: 14
$

(CUDA 10.1.243)

I'm sure other approaches are possible. Note that I have elected to template the operator itself, rather than the entire struct. This eliminates the need to specify the template type in the host code. Again, I'm sure other approaches are possible.

I won't be able to respond to questions related to "why does thrust work this way?"

If you find this behavior troublesome, you might wish to file a thrust issue.

I don't claim correctness for this code or any other code that I post. Anyone using any code I post does so at their own risk. I merely claim that I have attempted to address the questions in the original posting, and provide some explanation thereof. I am not claiming my code is defect-free, or that it is suitable for any particular purpose. Use it (or not) at your own risk.

score 0 · Answer 2 · edited Feb 11 '22 at 00:15

0

The compilation error is due to missing "const" qualifier in your functor, i.e., it should be:

thrust::tuple<T, T> operator()(const thrust::tuple<T, T>& t0, const thrust::tuple<T, T>& t1)

edited Feb 11 '22 at 00:15

Jeremy Caney

7,102
69
48
77

answered Feb 10 '22 at 17:45

pengwang

1

How to do tuple reduction in thrust (also return a tuple)?

2 Answers2