1

I am implementing a parallel accumulator class in C++. The implementation of the class is as follows:

#include <iostream>
#include <thread>
#include <cstring>
#include "cblas.h"

class ParallelAccumulator {
public:
    int num_contributions;
    double** contributions;
    int* is_contributing;
    int num_elements;
    
    ParallelAccumulator(int num_contributions, int num_elements) {
        this->num_contributions = num_contributions;
        this->num_elements = num_elements;
        contributions = new double*[num_contributions];
        is_contributing = new int[num_contributions];
        for (int i = 0; i < num_contributions; i++) {
            contributions[i] = new double[num_elements];
            is_contributing[i] = 0;
        }
    }
    
    void reset() {
        for (int i = 0; i < num_contributions; i++) {
            is_contributing[i] = 0;
        }
    }
    
    void zero_contributions() {
        for (int i = 0; i < num_contributions; i++) {
            memset(contributions[i], 0, num_elements * sizeof(double));
        }
    }
    
    int check_out_contribution() {
        for (int i = 0; i < num_contributions; i++) {
            if (is_contributing[i] == 0) {
                is_contributing[i] = 1;
                return i;
            }
        }
        return -1;
    }
    
    void check_in_contribution(int contrib_index) {
        is_contributing[contrib_index] = 0;
    }
    
    void reduce(double* output) {
        for (int i = 0; i < num_contributions; i++) {
            if (is_contributing[i] == 1) {
                cblas_daxpy(num_elements, 1.0, contributions[i], 1, output, 1);
            }
        }
    }
    
    ~ParallelAccumulator() {
        for (int i = 0; i < num_contributions; i++) {
            delete[] contributions[i];
        }
        delete[] contributions;
        delete[] is_contributing;
    }
};

However, I am having compilation issues when I create the threads to test the class as follows:

void test_function(ParallelAccumulator& accumulator, double* output, int id) {
    int contrib_index = accumulator.check_out_contribution();
    if (contrib_index == -1) {
        std::cout << "Error: no available contrib arrays" << std::endl;
        return;
    }
    double* contrib = accumulator.contributions[contrib_index];
    for (int i = 0; i < accumulator.num_elements; i++) {
        contrib[i] = id;
    }
    accumulator.check_in_contribution(contrib_index);
}

int main() {
    int num_contributions = 4;
    int num_elements = 10;
    double output[num_elements];
    ParallelAccumulator accumulator(num_contributions, num_elements);
    /* problematic code start here  */
    std::thread t1(test_function, std::ref(accumulator), output, 1);
    std::thread t2(test_function, std::ref(accumulator), output, 2);
    std::thread t3(test_function, std::ref(accumulator), output, 3);
    std::thread t4(test_function, std::ref(accumulator), output, 4);
    /* problematic code end here  */
    t1.join();
    t2.join();
    t3.join();
    t4.join();
    accumulator.reduce(output);
    for (int i = 0; i < num_elements; i++) {
        std::cout << output[i] << " ";
    }
    std::cout << std::endl;
    return 0;
}

The compilation errors are:

parallel_accumulator.cpp:87:67: error: no matching function for call to 'std::thread::thread(void (&)(ParallelAccumulator&, double*, int), std::reference_wrapper<ParallelAccumulator>, double [num_elements], int)'    87 |     std::thread t1(test_function, std::ref(accumulator), output, 1);
      |                                                                   ^ In file included from /usr/local/Cellar/gcc/11.3.0_2/include/c++/11/thread:43,
                 from parallel_accumulator.cpp:2: /usr/local/Cellar/gcc/11.3.0_2/include/c++/11/bits/std_thread.h:127:7: note: candidate: 'template<class _Callable, class ... _Args, class> std::thread::thread(_Callable&&, _Args&& ...)'   127 |       thread(_Callable&& __f, _Args&&... __args)
      |       ^~~~~~ /usr/local/Cellar/gcc/11.3.0_2/include/c++/11/bits/std_thread.h:127:7: note:   template argument deduction/substitution failed: parallel_accumulator.cpp:87:67: note:   variable-sized array type 'double (&)[num_elements]' is not a valid template argument    87 |    std::thread t1(test_function, std::ref(accumulator), output, 1);
      |                                                                   ^ In file included from /usr/local/Cellar/gcc/11.3.0_2/include/c++/11/thread:43,
                 from parallel_accumulator.cpp:2: /usr/local/Cellar/gcc/11.3.0_2/include/c++/11/bits/std_thread.h:157:5: note: candidate: 'std::thread::thread(std::thread&&)'   157 |     thread(thread&& __t) noexcept
      |     ^~~~~~ /usr/local/Cellar/gcc/11.3.0_2/include/c++/11/bits/std_thread.h:157:5: note:   candidate expects 1 argument, 4 provided /usr/local/Cellar/gcc/11.3.0_2/include/c++/11/bits/std_thread.h:121:5: note: candidate: 'std::thread::thread()'   121 |     thread() noexcept
= default;
      |     ^~~~~~

What is the proper syntax to fix the error? What modifications can I make to this implementation to make it work properly and have as much flexibility as possible?

user3116936
  • 492
  • 3
  • 21
  • 2
    the key part of the error is "note: variable-sized array type 'double (&)[num_elements]' is not a valid template argument ". `double output[num_elements];` is not standard C++, you should not expect it to be compatible with all standard C++. Use `std::vector` for dynamically sized arrays – 463035818_is_not_an_ai Jan 17 '23 at 13:36
  • 1
    though, you don't need the VLA, just make the size a cosntant: `int num_elements = 10;` -> `const int num_elements = 10;` – 463035818_is_not_an_ai Jan 17 '23 at 13:38
  • I think the rest is ok, so closing as dupe – 463035818_is_not_an_ai Jan 17 '23 at 13:38
  • 1
    if you use gcc, I suggest to compile with `-pedantic-errors` from time to time (or always), because gcc allows some weird stuff in default settings – 463035818_is_not_an_ai Jan 17 '23 at 13:39
  • 3
    Instead of pointers, `new`/`delete` and `memset` you could use `std::vector` to drastically simplify your code. – Konrad Rudolph Jan 17 '23 at 13:39
  • @463035818_is_not_a_number I don't see how this particular dupe explains anything. It doesn't mention what is the problem here, it doesn't explain what VLA is and how it differs from a real array, it doesn't explain why adding `const` fixes the issue here... – Yksisarvinen Jan 17 '23 at 13:40
  • @Yksisarvinen agreed. The q&a i proposed as dupe presumes the reader already knows what a VLA is and that it is non standard C++. This needs a q&a to explain what a VLA is and that it is non standard C++ – 463035818_is_not_an_ai Jan 17 '23 at 13:43
  • I don't know what a VLA is. Any help to fix the issue will be appreciated. – user3116936 Jan 17 '23 at 13:47
  • does `std::thread t1(test_function, std::ref(accumulator), &output[0], 1);` work? – doron Jan 17 '23 at 13:47
  • @user3116936 This question might help: https://stackoverflow.com/q/62035501/1968 – Konrad Rudolph Jan 17 '23 at 13:48
  • Yes! Thanks a lot. I will post the correct code to help other guys doing similar stuff. – user3116936 Jan 17 '23 at 13:51

2 Answers2

-1

Thanks a lot to all the participants in the discussion above, in particular to @doron. The code with the correct syntax is as follows:

    constexpr int num_elements = 10;
    std::thread t1(test_function, std::ref(accumulator), &output[0], 1);
    std::thread t2(test_function, std::ref(accumulator), &output[0], 2);
    std::thread t3(test_function, std::ref(accumulator), &output[0], 3);
    std::thread t4(test_function, std::ref(accumulator), &output[0], 4);
    
user3116936
  • 492
  • 3
  • 21
-1

There are several ways to call std::thread::thread(), when in doubt, use the simplest one using a lambda..

This sure isn't the most pedantic answer, but it does not penalise performance and always works.

// quick and efficient
std::thread t1([&]() { test_function(accumulator), &output[0], 1); });
// etc...

// If you do not like vague lambda captures
std::thread t1([&accumulator, &output]() { test_function(accumulator), &output[0], 1); });
// etc...
Michaël Roy
  • 6,338
  • 1
  • 15
  • 19
  • This proposal does not answer my specific question. – user3116936 Jan 19 '23 at 12:40
  • @user3116936 Sorry for being so specific, but the question was: *"What is the proper syntax to fix the error? What modifications can I make to this implementation to make it work properly and have as much flexibility as possible?"*. There is not one specific answer to those questions, and this is one of them, **as stated** in my answer. – Michaël Roy Jan 19 '23 at 13:09
  • Do you really think that using lambda with potentially vague lambda captures is the best option? I don't think this is a solution that is more sustainable and easy to maintain. – user3116936 Jan 19 '23 at 13:19
  • I believe using a lambda is the solution that's the most practical (one unified interface) and the most *flexible* of the available ways to call std::thread::thread(), as was asked. As for it being sustainable, and easy to maintain, it's as good as any. What would make it harder to maintain than any other syntax? I'll remind you that, as stated, this is NOT the most **pedantic** way of starting a thread, but, your having to ask for help on this site on this issue shows that using a lambda is also probably easier to read and reason about than other ways to call std::thread::thread(). – Michaël Roy Jan 19 '23 at 13:43