Pass 1
In your (original) partition code, you had:
int partition(int a[], int start, int end){
int i,j,pivot,temp;
pivot = a[end]; //select last elem as the pivot
i = start- 1; // point i and j to before/after start end of the arr
j= end +1;
while(true){
//reduce j until we reach an elem that is less than pivot
if(a[j] >= pivot){ j= j-1; }
//increase i until we reach an elem that is more than pivot
if(a[i] <= pivot){ i = i+1; }
You're only supposed to access a[start]
to a[end]
, but you start off with the first iteration of the loop accessing outside theses bounds. This is bound to be part of the problem.
The fix isn't necessarily so obvious. It's tempting to just drop the subtraction and addition, but that's not necessarily correct. One of the key things is getting enough useful diagnostic printing in place to know what's going on.
Pass 2
The pseudo-code has been added and fixed since Pass 1. The pseudo-code is very similar to what's presented at Wikipedia under QuickSort. It is a minor variation on the theme of the 'Hoare Partitioning' — the variation being the use of repeat … until
in the question versus do … while
at Wikipedia.
algorithm quicksort(A, lo, hi) is
if lo < hi then
p := partition(A, lo, hi)
quicksort(A, lo, p)
quicksort(A, p + 1, hi)
algorithm partition(A, lo, hi) is
pivot := A[lo]
i := lo - 1
j := hi + 1
loop forever
do
i := i + 1
while A[i] < pivot
do
j := j - 1
while A[j] > pivot
if i >= j then
return j
swap A[i] with A[j]
Your current version of the partition code is:
int partition(int a[], int start, int end){
int i,j,pivot,temp;
pivot = a[end]; //select last elem as the pivot
i = start; // point i and j to before/after start end of the arr
j= end-1;
while(true){
//reduce j until we reach an elem that is less than pivot
if(a[j] >= pivot){ j= j-1; }
//increase i until we reach an elem that is more than pivot
if(a[i] <= pivot){ i = i+1; }
if(i<j){ //swap elems so that it is partitioned
temp = a[j];
a[j] = a[i];
a[i] = temp;
}
else{ return j; } //return the boundary for this partition
}
}
There are a number of key differences:
- You are using the last element instead of the first as the pivot.
- You are not looping at all while incrementing
i
and decrementing j
.
Fixing those problems leads to code like this:
static inline void swap_ints(int *A, int i, int j)
{
int t = A[i];
A[i] = A[j];
A[j] = t;
}
static int partition(int a[], int start, int end)
{
int pivot = a[start]; // select first elem as the pivot
//printf("-->> P(%d,%d) - pivot %d\n", start, end, pivot);
int i = start - 1; // point i before start of the array
int j = end + 1; // point j after end of the array
while (true)
{
do
{
j--;
//printf("---- j a[%d] = %d\n", j, a[j]);
} while (a[j] > pivot);
do
{
i++;
//printf("---- i a[%d] = %d\n", i, a[i]);
} while (a[i] < pivot);
if (i >= j)
break;
//printf("-<>- a[%d]=%d, a[%d]=%d\n", j, a[j], i, a[i]);
swap_ints(a, i, j);
//printf("-><- a[%d]=%d, a[%d]=%d\n", j, a[j], i, a[i]);
//dump_data("Post-swap", a, start, end);
}
//dump_data("Partition", a, start, end);
//printf("<<-- P(%d) = %d\n", j, a[j]);
return j;
}
There are lots of commented-out printing functions, which were used at times to reassure me that things were working as intended.
Your main quicksort function was OK, though it got decorated with (lots of) printing too:
void quicksort(int arr[], int start, int end)
{
if (start < end)
{
dump_data("QS Pre-partition", arr, start, end);
int boundary = partition(arr, start, end);
printf("QS Partition: %d:%d:%d\n", start, boundary, end);
dump_data("QS Pre-recursion L", arr, start, boundary);
quicksort(arr, start, boundary);
dump_data("QS Pre-recursion H", arr, boundary + 1, end);
quicksort(arr, boundary + 1, end);
dump_data("QS Post-Sort", arr, start, end);
}
}
The dump_data()
function looks like:
void dump_data(const char *tag, int *data, int start, int end)
{
printf("%s (%d):", tag, end - start + 1);
for (int i = start; i <= end; i++)
{
printf(" %d", data[i]);
}
putchar('\n');
}
And the test code (main()
) looks like:
int main(void)
{
int data[] =
{
/* random -n 20 0 9 | commalist -b ' ' -n 10 */
//4, 8, 0, 0, 3, 8, 3, 6, 5, 9,
//6, 8, 3, 6, 5, 5, 0, 8, 1, 1,
//3, 9, 4, 7, 2, 6, 9, 0, 6, 1,
//8, 0, 2, 1, 4, 0, 6, 5, 4, 2,
//7, 6, 2, 5, 4, 4, 6, 0, 8, 3,
//6, 1, 2, 7, 4, 3, 0, 0, 0, 4,
//4, 7, 8, 8, 4, 4, 4, 4, 9, 6,
//9, 0, 2, 7, 6, 5, 9, 2, 7, 7,
9, 7, 0, 9, 5, 4, 8, 7, 9, 9,
2, 9, 9, 7, 0, 3, 9, 6, 8, 5,
//5, 1, 4, 5, 5, 4, 0, 2, 6, 1,
//5, 8, 1, 0, 1, 9, 8, 4, 8, 0,
};
enum { NUM_DATA = sizeof(data) / sizeof(data[0]) };
int data_copy[NUM_DATA];
for (int end = 1; end < NUM_DATA; end++)
{
memcpy(data_copy, data, NUM_DATA * sizeof(data[0]));
dump_data("Unsorted", data_copy, 0, end);
quicksort(data_copy, 0, end);
dump_data("PostSort", data_copy, 0, end);
check_sorted(data_copy, 0, end);
}
return 0;
}
The check_sorted()
function doesn't usually say anything (but it did when used on unsorted data):
void check_sorted(int *data, int lo, int hi)
{
for (int i = lo + 1; i < hi; i++)
{
if (data[i-1] > data[i])
printf("Inversion @ A[%d] = %d vs A[%d] = %d\n", i-1, data[i-1], i, data[i]);
}
}
There are various sets of 20 random numbers in the range 0..9 in the main()
program.
Careful re-readingfor the Nth time of the Wikipedia page shows:
Partitioning: reorder the array so that all elements with values less than the pivot come before the pivot, while all elements with values greater than the pivot come after it (equal values can go either way).
A previous version of this tail-end commentary was concerned about a pivot value sometimes appearing in the lower half of the partition data, but noted that the sort worked anyway. The quotation shows that is perfectly permissible — there isn't a problem. There are partitioning schemes, such as fat pivot schemes, where that would not be true.