I have an issue with the performance of quicksort concerning the dual pivoting strategy.
The situation:
Take an array a of n
randomly generated 32-bit integers and bring these in ascending order. Lets fix n = 10_000_000
and let the random numbers be generated in [0,MAX_RAND]
. Here everything works as expected - the performance of my implementation is comparable the one of the qsort()
library implementation.
Now the problem:
Taking the same n
as above but restricting the intervall for the random-numbers to [0,K] where K < ~5000
, the performance breaks down dramatically. Some numbers from my machine:
K = MAX_RAND
QS-DUAL-PIVOT Time: 865ms
Ascending: 1
QS_LIBRARY Time: 1296ms
Ascending: 1
K = 10000
QS-DUAL-PIVOT Time: 2521ms
Ascending: 1
QS_LIBRARY Time: 1076ms
Ascending: 1
K = 5000
QS-DUAL-PIVOT Time: 4420ms
Ascending: 1
QS_LIBRARY Time: 1044ms
Ascending: 1
I think, you get the idea. Its compiled on my private intel machine with gcc-5.2 under Arch-Linux-64 with -O9.
Do you have any hints for me? Whats the problem here?
Example code to generate the above outputs:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
int is_ascending (int *arr, int size)
{
int i;
for (i = 0; i < size - 1; i ++)
if (arr[i] > arr[i + 1])
return 0;
return 1;
}
void quick_sort_dual_pivot (int *arr, int low, int high)
{
int lt, ht, i, temp;
if (low >= high)
return;
lt = low + 1;
ht = high - 1;
if (arr[low] > arr[high])
{
temp = arr[low];
arr[low] = arr[high];
arr[high] = temp;
}
if (low + 1 == high)
return;
for (i = low + 1; i <= high; ++i)
{
if (i > ht)
break;
if (arr[i] < arr[low])
{
temp = arr[lt];
arr[lt] = arr[i];
arr[i] = temp;
++lt;
}
else if (arr[i] > arr[high])
{
temp = arr[ht];
arr[ht] = arr[i];
arr[i] = temp;
--ht;
--i;
}
}
++ht;
temp = arr[ht];
arr[ht] = arr[high];
arr[high] = temp;
--lt;
temp = arr[lt];
arr[lt] = arr[low];
arr[low] = temp;
quick_sort_dual_pivot (arr, low, lt - 1);
quick_sort_dual_pivot (arr, lt + 1, ht - 1);
quick_sort_dual_pivot (arr, ht + 1, high);
}
int get_ms (int start)
{
static struct timeval start_time;
struct timeval end_time;
int msec, seconds, useconds;
if (start)
{
gettimeofday (&start_time, NULL);
return 0;
}
gettimeofday (&end_time, NULL);
seconds = end_time.tv_sec - start_time.tv_sec;
useconds = end_time.tv_usec - start_time.tv_usec;
msec = ((seconds) * 1000 + useconds / 1000.) + 0.5;
return msec;
}
int comp_asc (const void *arg1, const void *arg2)
{
return (*(int *)arg1) - (*(int *)arg2);
}
#define ARRAY_SIZE 10000000
int main(void)
{
int i;
int millisec;
static int a1[ARRAY_SIZE];
static int a2[ARRAY_SIZE];
srand (time (NULL));
for (i = 0; i < ARRAY_SIZE; i ++)
{
int r = rand () % 5000;
a1[i] = r;
a2[i] = r;
}
get_ms (1);
quick_sort_dual_pivot (a1, 0, ARRAY_SIZE - 1);
millisec = get_ms (0);
printf ("QS-DUAL-PIVOT Time: %dms\n", millisec);
printf ("Ascending: %d\n", is_ascending (a1, ARRAY_SIZE));
get_ms (1);
qsort (a2, ARRAY_SIZE, sizeof (int), comp_asc);
millisec = get_ms (0);
printf ("QS_LIBRARY Time: %dms\n", millisec);
printf ("Ascending: %d\n", is_ascending (a2, ARRAY_SIZE));
return 0;
}