I have this simple self-contained example of a very rudimentary stencil application to work with OpenMP tasks and the dependence clause. At 2 steps one location of an array is added 3 values from another array, one from the corresponding location and its left and right neighbours. To avoid data races I have set up dependencies so that for every section on the second update its task can only be scheduled if the relevant tasks for the sections from the first update step are executed. I get the expected results but I am not sure if my assumptions are correct, because these tasks might be immediately executed by the encountering threads and not spawned. So my question is whether the tasks that are created in worksharing loops all sibling tasks and thus are the dependencies retained just like when the tasks are generated inside a single
construct.
#include <iostream>
#include <omp.h>
#include <math.h>
typedef double value_type;
int main(int argc, char * argv[]){
std::size_t size = 100000;
std::size_t part_size = 25;
std::size_t parts = ceil(float(size)/part_size);
std::size_t num_threads = 4;
value_type * A = (value_type *) malloc(sizeof(value_type)*size);
value_type * B = (value_type *) malloc(sizeof(value_type)*size);
value_type * C = (value_type *) malloc(sizeof(value_type)*size);
for (int i = 0; i < size; ++i) {
A[i] = 1;
B[i] = 1;
C[i] = 0;
}
#pragma omp parallel num_threads(num_threads)
{
#pragma omp for schedule(static)
for(int part=0; part<parts; part++){
std::size_t current_part = part * part_size;
std::size_t left_part = part != 0 ? (part-1)*part_size : current_part;
std::size_t right_part = part != parts-1 ? (part+1)*part_size : current_part;
std::size_t start = current_part;
std::size_t end = part == parts-1 ? size-1 : start+part_size;
if(part==0) start = 1;
#pragma omp task depend(in: A[current_part], A[left_part], A[right_part]) depend(out: B[current_part])
{
for(int i=start; i<end; i++){
B[i] += A[i] + A[i-1] + A[i+1];
}
}
}
#pragma omp for schedule(static)
for(int part=0; part<parts; part++){
int current_part = part * part_size;
std::size_t left_part = part != 0 ? (part-1)*part_size : current_part;
std::size_t right_part = part != parts-1 ? (part+1)*part_size : current_part;
std::size_t start = current_part;
std::size_t end = part == parts-1 ? size-1 : start+part_size;
if(part==0) start = 1;
#pragma omp task depend(in: B[current_part], B[left_part], B[right_part]) depend(out: C[current_part])
{
for(int i=start; i<end; i++){
C[i] += B[i] + B[i-1] + B[i+1];
}
}
}
}
value_type sum = 0;
value_type max = -1000000000000;
value_type min = 1000000000000;
for(int i = 0; i < size; i++){
sum+=C[i];
if(C[i]<min) min = C[i];
if(C[i]>max) max = C[i];
}
std::cout << "sum: " << sum << std::endl;
std::cout << "min: " << min << std::endl;
std::cout << "max: " << max << std::endl;
std::cout << "avg: " << sum/(size) << std::endl;
return 0;
}