I have a recursive function that is searching through all tuples. The full code is here. I would like to be able to specify the depth in the tree to invoke parallelization with OpenMP. An outline of what I am trying is
int main(int argc, char *argv[])
{
omp_set_num_threads(2);
backtrack(size,alphabet,tuple,0,threading_depth);
}
void backtrack(unsigned size, unsigned alphabet, unsigned *tuple, unsigned ell, unsigned t_depth)
{
if(ell==size)
{
#pragma omp critical
{
fprintf(stdout,"solution from thread #%d = ",omp_get_thread_num());
for(i=0;i<size;i++)
fprintf(stdout,"%3d ",tuple[i]);
fprintf(stdout,"\n");
}
}
else
{
#pragma omp parallel for if(ell == t_depth) default(none) shared(alphabet,tuple,size,ell,t_depth) private(j,unused)
for(i=0;i<alphabet;i++)
{
unsigned *tuple_to_send;
if(ell == t_depth)
{
unsigned *local_tuple;
local_tuple = (unsigned *) calloc(size,sizeof(unsigned));
for(j=0;j<ell;j++) local_tuple[j] = tuple[j];
tuple_to_send = local_tuple;
}
else
{
tuple_to_send = tuple;
}
tuple_to_send[ell] = i;
backtrack(size,alphabet,tuple_to_send,ell+1,t_depth);
}
}
}
I am running this on
$ uname -a
Linux 4.4.0-24-generic #43-Ubuntu SMP Wed Jun 8 19:27:37 UTC 2016 x86_64 x86_64 x86_64 GNU/Linux
which has a dual core processor (I have successfully run OpenMP code with two threads before). I compile with
gcc -O3 -fopenmp -o selective_threading_not_working selective_threading_not_working.c
When I run the code I expect to see something like
$ selective_threading_not_working
solution from thread #0 = 0 0 0
solution from thread #0 = 0 0 1
solution from thread #0 = 0 0 2
solution from thread #0 = 0 1 0
solution from thread #0 = 0 1 1
solution from thread #0 = 0 1 2
solution from thread #0 = 0 2 0
solution from thread #0 = 0 2 1
solution from thread #0 = 0 2 2
solution from thread #1 = 1 0 0
solution from thread #1 = 1 0 1
solution from thread #1 = 1 0 2
solution from thread #1 = 1 2 0
solution from thread #1 = 1 1 0
solution from thread #1 = 1 1 1
solution from thread #1 = 1 1 2
solution from thread #1 = 1 2 1
solution from thread #1 = 1 2 2
solution from thread #0 = 2 0 0
solution from thread #0 = 2 0 1
solution from thread #0 = 2 0 2
solution from thread #0 = 2 2 0
solution from thread #0 = 2 2 1
solution from thread #0 = 2 2 2
solution from thread #0 = 2 1 0
solution from thread #0 = 2 1 1
solution from thread #0 = 2 1 2
but what I see is
$ selective_threading_not_working
solution from thread #0 = 0 0 0
solution from thread #0 = 0 0 1
solution from thread #0 = 0 0 2
solution from thread #0 = 0 1 0
solution from thread #0 = 0 1 1
solution from thread #0 = 0 1 2
solution from thread #0 = 0 2 0
solution from thread #0 = 0 2 1
solution from thread #0 = 0 2 2
solution from thread #0 = 1 0 0
solution from thread #0 = 1 0 1
solution from thread #0 = 1 0 2
solution from thread #0 = 1 2 0
solution from thread #0 = 1 1 0
solution from thread #0 = 1 1 1
solution from thread #0 = 1 1 2
solution from thread #0 = 1 2 1
solution from thread #0 = 1 2 2
solution from thread #0 = 2 0 0
solution from thread #0 = 2 0 1
solution from thread #0 = 2 0 2
solution from thread #0 = 2 2 0
solution from thread #0 = 2 2 1
solution from thread #0 = 2 2 2
solution from thread #0 = 2 1 0
solution from thread #0 = 2 1 1
solution from thread #0 = 2 1 2
I have found suggestions of using omp_set_nested(1) but this had no effect.