I have implemented concurrent quicksort in C++ using OpenMP.
#include <omp.h>
#include <iostream>
#include <algorithm>
using namespace std;
void sort(int *a, int low, int high);
int partition(int *a, int low, int high);
class QuickSort {
private:
int *arr;
int len;
public:
void init();
void Sort();
void Display();
};
int main() {
cout << "Program implementing Quicksort." << endl;
QuickSort a;
a.init();
a.Sort();
a.Display();
}
void sort(int *a, int low, int high) {
if(high < low || low == high)
return;
if(high == low+1) {
if(a[low] > a[high])
swap(a[low], a[high]);
return;
}
int pivotidx = partition(a, low, high);
/*for(int i = 0; i < 5; ++i)
cout << a[i] << " ";
cout << endl;*/
cout << "Pivot element has been placed at correct position: " << pivotidx << " by thread " << omp_get_thread_num() << endl;
#pragma omp parallel sections
{
#pragma omp section
{
sort(a, low, pivotidx);
}
#pragma omp section
{
sort(a, pivotidx+1, high);
}
}
}
int partition(int *a, int low, int high) {
int pivot = low;
int pivotval = a[low];
int leftpointer = low;
int rightpointer = high;
while(leftpointer < rightpointer) {
while(a[leftpointer] <= a[pivot] && leftpointer <= high)
++leftpointer;
if(leftpointer > high)
--leftpointer;
while(a[rightpointer] >= a[pivot] && rightpointer >= low)
--rightpointer;
if(rightpointer < low)
++rightpointer;
if(leftpointer < rightpointer)
swap(a[leftpointer], a[rightpointer]);
}
a[low] = a[rightpointer];
a[rightpointer] = pivotval;
return rightpointer;
}
void QuickSort::init() {
cout << "Enter the number of elements in the array: ";
cin >> len;
cout << "Enter the elements of the array: ";
arr = new int[len];
for(int i = 0; i < len; ++i)
cin >> arr[i];
}
void QuickSort::Sort() {
sort(arr, 0, len-1);
}
void QuickSort::Display() {
cout << "Sorted array is: " << endl;
for(int i = 0; i < len; ++i)
cout << arr[i] << " ";
cout << endl;
}
It's sorting correctly but I am not sure if it's really running on multiple cores. How can I check this? Also, my parallel code is very similar to the one present in the top answer here. There it's mentioned at the end that this can not extract more parallelism than two threads: if it is executed with more threads, the other threads don't have any work to do and will just sit down idle. Why is it so?