I am trying to benchmark three functions in C for numerical Integration of PI as an exercise for usage of threads. The first is sequential, the second is with posix threads and the third is with openMP.
My benchmark output shows that the sequential function is the fastest, but that absolutely doesn't feel right. It feels almost half as fast, but the output shows that it's double as fast.
Here is my code:
// gcc -pthread -fopenmp benchmark.c -o benchmark -Wall -lm
#include <omp.h>
#include <stdio.h>
#include <unistd.h>
#include <pthread.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#define STEPS 1000000000 // Number of steps for numerical integration
#define STEP_SIZE 1.0/STEPS // Size of a step
#define ST (0.5) * STEP_SIZE // Starting size of the function
#define NUM_THREADS 4 // Number of Threads
/**
* This Struct represents a posix thread.
* pthread_create() takes this struct as an argument.
* @see pthread_pi()
*/
typedef struct _pthread_arg
{
int thread_id; // ID of the Thread
double val; // Integral of the Thread
} pthread_arg;
/**
* This Function measures the time of a function.
*/
void benchmark(double f())
{
double pi;
clock_t start, end;
double time_elapsed;
start = clock();
pi = (f());
end = clock();
time_elapsed = ((double)(end - start)) / CLOCKS_PER_SEC;
printf("Computed PI = %.13f \n", pi);
printf("Difference to Reference is %.13f \n", M_PI - pi);
printf("time elapsed:%f\n", time_elapsed);
}
/**
* This Function calculates the value of PI with numerical Integration
* sequentially.
*/
double sequential_pi()
{
double step_size = 1.0/STEPS, t = 0.5 * step_size, sum = 0;
while (t < 1.0)
{
sum += sqrt(1 - t * t) * step_size;
t += step_size;
}
return sum * 4;
}
/**
* pthread_create() takes this function as an argument.
* @see pthread_pi()
*/
void* pthread_calc_pi(void* ptr)
{
pthread_arg* arg;
arg = (pthread_arg*) ptr;
double t;
double val;
int thread_id;
int p; // left intervalboundary
int q; // right intervalboundary
int i;
thread_id = arg->thread_id;
p = STEPS / NUM_THREADS * thread_id;
q = STEPS / NUM_THREADS * (thread_id + 1);
t = ST + STEP_SIZE * p;
for(i = p; i < q; i++)
{
val += sqrt(1 - t * t) * STEP_SIZE;
t += STEP_SIZE;
}
arg->val = val;
pthread_exit(0);
}
/**
* This Function calculates the value of PI with numerical Integration with
* Posix Threads.
*/
double pthread_pi()
{
double pi;
int i;
pthread_t* thread;
pthread_arg* args;
thread = malloc(NUM_THREADS * sizeof(pthread_t));
args = malloc(NUM_THREADS * sizeof(pthread_arg));
// Initialisation and creation of Threads
for(i = 0; i < NUM_THREADS; i++)
{
args[i].thread_id = i;
pthread_create(&(thread[i]), NULL, &pthread_calc_pi, (void*) &args[i]);
}
// Synchronisation of threads and reduction of val
for (i = 0; i < NUM_THREADS; i++)
{
pthread_join(thread[i], NULL);
pi += args[i].val;
}
return pi * 4;
}
/**
* This Function calculates the value of PI with numerical Integration with
* openMP.
*/
double openMP_pi()
{
int i;
double pi = 0;
int p; // left intervalboundary
int q; // right intervalboundary
int j;
double t;
omp_set_num_threads(NUM_THREADS);
#pragma omp parallel for reduction(+:pi) private(p,q,t,j)
for(i = 0; i < NUM_THREADS; i++)
{
p = STEPS / NUM_THREADS * i;
q = STEPS / NUM_THREADS * (i + 1);
t = ST + STEP_SIZE * p;
for(j = p; j < q; j++)
{
pi += sqrt (1 - t * t) * STEP_SIZE;
t += STEP_SIZE;
}
}
return pi * 4;
}
int main()
{
printf("\npthread:\n");
benchmark(&pthread_pi);
printf("\nopenMP:\n");
benchmark(&openMP_pi);
printf("\nsequential:\n");
benchmark(&sequential_pi);
printf("\n");
return 0;
}
here is the output:
pthread:
Computed PI = 3.1415926680606
Difference to Reference is -0.0000000144708
time elapsed:8.774720
openMP:
Computed PI = 3.1415926680606
Difference to Reference is -0.0000000144708
time elapsed:8.783519
sequential:
Computed PI = 3.1415926636781
Difference to Reference is -0.0000000100883
time elapsed:4.235619