I think what you are looking for can be solved using a CPU dispatcher technique.
For benchmarking OpenMP code vs. non-OpenMP code you can create different object files from the same source code like this
//foo.c
#ifdef _OPENMP
double foo_omp() {
#else
double foo() {
#endif
double sum = 0;
#pragma omp parallel for reduction(+:sum)
for(int i=0; i<1000000000; i++) sum += i%10;
return sum;
}
Compile like this
gcc -O3 -c foo.c
gcc -O3 -fopenmp -c foo.c -o foo_omp.o
This creates two object files foo.o
and foo_omp.o
. Then you can call one of these functions like this
//bar.c
#include <stdio.h>
double foo();
double foo_omp();
double (*fp)();
int main(int argc, char *argv[]) {
if(argc>1) {
fp = foo_omp;
}
else {
fp = foo;
}
double sum = fp();
printf("sum %e\n", sum);
}
Compile and link like this
gcc -O3 -fopenmp bar.c foo.o foo_omp.o
Then I time the code like this
time ./a.out -omp
time ./a.out
and the first case takes about 0.4 s and the second case about 1.2 s on my system with 4 cores/8 hardware threads.
Here is a solution which only needs a single source file
#include <stdio.h>
typedef double foo_type();
foo_type foo, foo_omp, *fp;
#ifdef _OPENMP
#define FUNCNAME foo_omp
#else
#define FUNCNAME foo
#endif
double FUNCNAME () {
double sum = 0;
#pragma omp parallel for reduction(+:sum)
for(int i=0; i<1000000000; i++) sum += i%10;
return sum;
}
#ifdef _OPENMP
int main(int argc, char *argv[]) {
if(argc>1) {
fp = foo_omp;
}
else {
fp = foo;
}
double sum = fp();
printf("sum %e\n", sum);
}
#endif
Compile like this
gcc -O3 -c foo.c
gcc -O3 -fopenmp foo.c foo.o