I wrote a small code that grows a graph recursively and afterwards free the nodes too (shown below). I am compiling the code with mpicc with and without the -fopenmp flag.
Running the code with time (time mpirun ./a.out) I get a real time of 4.259 s, but I get a huge penalty using openmp directives (shown in the code) - real time is now 10.744 s. I have been trying some settings to set the numbers of threads etc., but I do not get any speed up.
Is there something more I can set to achieve more parallelism and performance gains? or is the code so small and has dependencies between parent and offspring nodes blocking any potential gains?
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <omp.h>
struct tree_node{
int value;
struct tree_node *left_node, *right_node;
};
typedef struct tree_node tree_node_t;
tree_node_t* new_node(int value){
tree_node_t *p = (tree_node_t*) malloc(sizeof(tree_node_t));
p->value = value;
p->left_node = p->right_node = NULL;
assert((p->value > 0 || p->value == 0) && p->value < 10);
return p;
}
void tree_grow(tree_node_t* parent, int depth){
if(depth == 0){
return;
}
#pragma omp task
{
#pragma omp task
{parent->left_node = new_node(rand() % 10);}
#pragma omp task
{parent->right_node = new_node(rand() % 10);}
#pragma omp task
{tree_grow(parent->left_node, depth-1);}
#pragma omp task
{tree_grow(parent->right_node, depth-1);}
}
}
void branch_cutting(tree_node_t* current){
if (current == NULL){
return;
}
#pragma omp task
{
#pragma omp task
{branch_cutting(current->left_node);}
#pragma omp task
{branch_cutting(current->right_node);}
}
#pragma omp task
free(current);
}
int main (int argc, char **argv)
{
//omp_set_dynamic(0);
//omp_set_num_threads(6);
tree_node_t *root = new_node(0);
tree_grow(root, 25);
printf("tree grown and now freeing the memory...");
//#pragma omp taskwait
branch_cutting(root);
printf("tree is removed\n");
return 0;
}