2

I've implemented both Insertion sort and Heap sort. In theory, Heap sort has nlogn time complexity and insertion has n^2. Why, then it takes my Insertion implementation about x6 times faster to sort a 100,000 long array?

I used JMH for benchmarking the average time of each sort algorithm. Here's my benchmark code:

import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;
import java.util.stream.IntStream;

import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;

public class MyBenchmark {

// setup the benchmark - create a new array for each iteration
    @State(Scope.Thread)
    public static class MyState {
        int[] array = null;

        @Setup(Level.Iteration)
        public void doSetup() {
            array = createArray(100000, 0, 100);
        }
    }

    @Benchmark
    @BenchmarkMode(Mode.AverageTime)
    @OutputTimeUnit(TimeUnit.SECONDS)
    public void insertionSort(MyState state) {
        int[] array = state.array;

        for (int i = 1; i < array.length; i++) {
            int element = array[i];
            for (int j = i - 1; j >= 0; j--) {
                if (element < array[j]) {
                    int temp = array[j];
                    array[j] = element;
                    array[j + 1] = temp;
                } else {
                    break;
                }
            }
        }
    }

    @Benchmark
    @BenchmarkMode(Mode.AverageTime)
    @OutputTimeUnit(TimeUnit.SECONDS)
    public void heapSort(MyState state) {
        int[] array = state.array;
        sort(array, array.length);
    }

    public static void sort(int[] arr, int size) {

        for (int i = 0; i < size;) {
            maxHeapify(size, arr);
            int temp = arr[0];
            arr[0] = arr[size - 1];
            arr[size - 1] = temp;
            size--;
        }
    }

    private static void maxHeapify(int size, int[] arr) {
        int nonLeafs = size / 2;
        for (int i = nonLeafs; i > 0; i--) {
            int arrayPos = heapToArrayPos(i), leftChild = heapToArrayPos(leftChild(i)),
                    rightChild = heapToArrayPos(rightChild(i));
            if (rightChild < size) {
                if (arr[rightChild] < arr[leftChild]) {
                    if (arr[arrayPos] < arr[leftChild]) {
                        switchWithLeftChild(arrayPos, arr);
                    }
                } else if (arr[arrayPos] < arr[rightChild]) {
                    switchWithRightChild(arrayPos, arr);
                }
            } else if (arr[arrayPos] < arr[leftChild]) {
                switchWithLeftChild(arrayPos, arr);
            }
        }
    }

    private static int heapToArrayPos(int heap) {
        return heap - 1;
    }

    private static int rightChild(int pos) {
        return pos * 2 + 1;
    }

    private static int leftChild(int pos) {
        return pos * 2;
    }

    private static void switchWithRightChild(int pos, int[] arr) {
        int father = arr[pos];
        int childPos = heapToArrayPos(rightChild(pos + 1)), child = arr[childPos];
        arr[childPos] = father;
        arr[pos] = child;
    }

    private static void switchWithLeftChild(int pos, int[] arr) {
        int father = arr[pos];
        int childPos = heapToArrayPos(leftChild(pos + 1)), child = arr[childPos];
        arr[childPos] = father;
        arr[pos] = child;
    }

    public static void main(String[] args) throws RunnerException {
        Options opt = new OptionsBuilder().include(MyBenchmark.class.getSimpleName()).forks(1).build();

        new Runner(opt).run();
    }

    public static int[] createArray(int length, int minValue, int maxValue) {
        return IntStream.generate(() -> ThreadLocalRandom.current().nextInt(minValue, maxValue)).limit(length)
                .toArray();
    }

    public static int[] createArray(int length) {
        return createArray(length, 0, 10);
    }

    public static int[] createArray(int minValue, int maxValue) {
        return createArray(10, minValue, maxValue);

    }
}

And here is the benchmarking output:

JMH 1.12 (released 51 days ago) VM version: JDK 1.8.0_65, VM 25.65-b01 VM invoker: C:\Program Files\Java\jdk1.8.0_65\jre\bin\java.exe VM options: -Dfile.encoding=UTF-8 -Xbootclasspath:C:\Program Files\Java\jdk1.8.0_65\jre\lib\resources.jar;C:\Program Files\Java\jdk1.8.0_65\jre\lib\rt.jar;C:\Program Files\Java\jdk1.8.0_65\jre\lib\jsse.jar;C:\Program Files\Java\jdk1.8.0_65\jre\lib\jce.jar;C:\Program Files\Java\jdk1.8.0_65\jre\lib\charsets.jar;C:\Program Files\Java\jdk1.8.0_65\jre\lib\jfr.jar;C:\Program Files\Java\jdk1.8.0_65\lib\tools.jar
Warmup: 20 iterations, 1 s each
Measurement: 20 iterations, 1 s each
Timeout: 10 min per iteration
Threads: 1 thread, will synchronize iterations
Benchmark mode: Average time, time/op
Benchmark: org.sample.MyBenchmark.heapSort

Run progress: 0.00% complete, ETA 00:01:20
Fork: 1 of 1
Warmup Iteration 1: 17.651 s/op
Warmup Iteration 2: 16.004 s/op
Warmup Iteration 3: 14.640 s/op
Warmup Iteration 4: 14.699 s/op
Warmup Iteration 5: 14.836 s/op
Warmup Iteration 6: 14.900 s/op
Warmup Iteration 7: 14.758 s/op
Warmup Iteration 8: 15.084 s/op
Warmup Iteration 9: 15.652 s/op
Warmup Iteration 10: 15.121 s/op
Warmup Iteration 11: 15.315 s/op
Warmup Iteration 12: 15.299 s/op
Warmup Iteration 13: 15.234 s/op
Warmup Iteration 14: 14.822 s/op
Warmup Iteration 15: 15.078 s/op
Warmup Iteration 16: 15.565 s/op
Warmup Iteration 17: 15.509 s/op
Warmup Iteration 18: 15.189 s/op
Warmup Iteration 19: 14.748 s/op
Warmup Iteration 20: 14.902 s/op
Iteration 1: 14.888 s/op
Iteration 2: 15.381 s/op
Iteration 3: 16.099 s/op
Iteration 4: 15.536 s/op
Iteration 5: 15.635 s/op
Iteration 6: 16.446 s/op
Iteration 7: 16.034 s/op
Iteration 8: 15.828 s/op
Iteration 9: 15.666 s/op
Iteration 10: 16.071 s/op
Iteration 11: 15.962 s/op
Iteration 12: 15.777 s/op
Iteration 13: 15.757 s/op
Iteration 14: 15.424 s/op
Iteration 15: 15.449 s/op
Iteration 16: 15.920 s/op
Iteration 17: 14.609 s/op
Iteration 18: 14.651 s/op
Iteration 19: 14.661 s/op
Iteration 20: 14.607 s/op

Result "heapSort": 15.520 ±(99.9%) 0.486 s/op [Average] (min, avg, max) = (14.607, 15.520, 16.446), stdev = 0.560 CI (99.9%): [15.034, 16.006] (assumes normal distribution)

JMH 1.12 (released 51 days ago) VM version: JDK 1.8.0_65, VM 25.65-b01 VM invoker: C:\Program Files\Java\jdk1.8.0_65\jre\bin\java.exe VM options: -Dfile.encoding=UTF-8 -Xbootclasspath:C:\Program Files\Java\jdk1.8.0_65\jre\lib\resources.jar;C:\Program Files\Java\jdk1.8.0_65\jre\lib\rt.jar;C:\Program Files\Java\jdk1.8.0_65\jre\lib\jsse.jar;C:\Program Files\Java\jdk1.8.0_65\jre\lib\jce.jar;C:\Program Files\Java\jdk1.8.0_65\jre\lib\charsets.jar;C:\Program Files\Java\jdk1.8.0_65\jre\lib\jfr.jar;C:\Program Files\Java\jdk1.8.0_65\lib\tools.jar Warmup: 20 iterations, 1 s each Measurement: 20 iterations, 1 s each Timeout: 10 min per iteration Threads: 1 thread, will synchronize iterations Benchmark mode: Average time, time/op Benchmark: org.sample.MyBenchmark.insertionSort

Run progress: 50.00% complete, ETA 00:10:15 Fork: 1 of 1 Warmup Iteration 1: 1.726 s/op Warmup Iteration 2: 1.636 s/op Warmup Iteration 3: 1.968 s/op Warmup Iteration 4: 1.970 s/op Warmup Iteration 5: 1.961 s/op Warmup Iteration 6: 1.966 s/op Warmup Iteration 7: 1.962 s/op Warmup Iteration 8: 1.961 s/op Warmup Iteration 9: 1.959 s/op Warmup Iteration 10: 1.965 s/op Warmup Iteration 11: 1.966 s/op Warmup Iteration 12: 1.970 s/op Warmup Iteration 13: 1.964 s/op Warmup Iteration 14: 1.952 s/op Warmup Iteration 15: 1.955 s/op Warmup Iteration 16: 1.956 s/op Warmup Iteration 17: 1.972 s/op Warmup Iteration 18: 1.966 s/op Warmup Iteration 19: 1.954 s/op Warmup Iteration 20: 1.956 s/op
Iteration 1: 1.969 s/op
Iteration 2: 1.963 s/op
Iteration 3: 2.050 s/op
Iteration 4: 2.019 s/op Iteration 5: 1.934 s/op
Iteration 6: 1.953 s/op
Iteration 7: 1.961 s/op
Iteration 8: 1.972 s/op
Iteration 9: 1.957 s/op
Iteration 10: 1.956 s/op
Iteration 11: 1.975 s/op
Iteration 12: 1.950 s/op
Iteration 13: 1.965 s/op
Iteration 14: 1.961 s/op
Iteration 15: 1.950 s/op
Iteration 16: 1.956 s/op
Iteration 17: 1.975 s/op
Iteration 18: 1.966 s/op
Iteration 19: 1.959 s/op
Iteration 20: 1.965 s/op

Result "insertionSort":
1.968 ±(99.9%) 0.022 s/op [Average] (min, avg, max) = (1.934, 1.968, 2.050), stdev = 0.025 CI (99.9%): [1.946, 1.990] (assumes normal distribution)

Run complete. Total time: 00:09:55

Benchmark Mode Cnt Score Error Units
MyBenchmark.heapSort avgt 20 12.692 ± 0.282 s/op
MyBenchmark.insertionSort avgt 20 2.024 ± 0.020 s/op

Edit: Since I've posted the question i added the @setup to set-up the array before the benchmark, so the array creation operations won't be a factor. I ran the benchmark again and the results for the insertion sort were pretty much the same. Heap sort benchmark got faster by 3 sec on avg. I've only posted the updated summary of the results.

MaxG
  • 1,079
  • 2
  • 13
  • 26
  • 1
    Despite your comment, this is a duplicate; because in order to benchmark Java - which you are doing - you first need to learn _how_. Because of lack of warmup of the JVM and iterations, all you are really timing is the classloader and the JIT. When you have written some benchmarks in JMH, come back with another question. – Boris the Spider May 21 '16 at 17:26
  • Maybe i don't know how to benchmark java. This isn't the point. The point is levels of complexity and how somehow I violate the theoretical complexity of heap sort. – MaxG May 21 '16 at 17:29
  • 2
    You don't violate any complexity, you're just not timing what you think you're timing. I'm sorry, but if you want meaningful benchmarks in Java you're going to have to learn how to write them. – Boris the Spider May 21 '16 at 17:30
  • @BoristheSpider I have used JMH benchmark for testing the code. Please, remove duplicate. – MaxG May 22 '16 at 12:45
  • 1
    Break leaves only the outer for-loop. Both impl. are correct. I've tested them. – MaxG May 23 '16 at 10:05
  • 1
    MaxG, have you tried running your benchmark for different array sizes? Does the trend stay for arrays 10 times as big? – CptBartender May 23 '16 at 10:51
  • @Slanec I used the canonical definition of [insertion sort](http://www.tutorialspoint.com/data_structures_algorithms/insertion_sort_algorithm.htm). Also, you can see that It's different from [Bubble sort](http://www.tutorialspoint.com/data_structures_algorithms/bubble_sort_algorithm.htm). They both are of O(n^2) time complexity. Analyzing your suggestion (it's interesting): you find the insertion point in logn time, but the shifting is of linear complexity. If i'm not mistaken, in theory it should take longer because you run on n elements nlogn operations. It's amazing that it runs faster. – MaxG May 23 '16 at 12:26
  • @CptBartender I currently am running a benchmark on 1M array size, but its gonna take a while :) – MaxG May 23 '16 at 12:30
  • @MaxG Yes, sorry, it _is_ an insert sort, I got deceived by my own debugging. Still, it is true that both the version that first finds the correct index and then shifts all numbers; and the one with binary search instead, are faster (the binary one by a lot). The difference is that both comparing and swapping elements takes time, and the binary version does less of comparing. The linear variant, I presume, is faster because of caching / better branch prediction or something. Not sure. I'll check your heapsort later on. – Petr Janeček May 23 '16 at 13:03
  • 1
    Most likely the heap sort is taking so much time because you implemented it wrong. Your `sort` method calls `maxHeapify` `size` times, and each call there does `size/2` iterations. So what you've implemented is an O(n^2) sorting algorithm. You should only have to call `maxHeapify` once. – Jim Mischel May 23 '16 at 17:27
  • @JimMischel This should be an answer instead of a comment. It's the correct observation and fixing that (While also fixing the balancing afterwards as currently it only swaps a parent with his child once - some elements might need to bubble down further, though.) makes the job done. – Petr Janeček May 24 '16 at 13:52
  • @Slanec: Thanks. I added an answer. – Jim Mischel May 24 '16 at 14:23

1 Answers1

5

Your heap sort is implemented incorrectly. The code you posted appears to be doing a selection sort. That is, for each item it calls maxHeapify, takes the first item in the heap, puts it at the end, and decreases the count. So maxHeapify is called size times, each time with a decreasing size. The number of iterations of the inner loop in maxHeapify ends up being something like (n^2)/4.

You've implemented a glorified selection sort with complexity O(n^2).

The trick to doing an in-place heap sort is to first build the heap--once--and then rearrange it to be sorted. You call maxHeapify once:

maxHeapify(size, arr);

When that's done, you have a valid max heap, with the largest item at arr[0], etc. That takes O(n) time.

What you want is an array in ascending order. To do that, you build a loop that copies the largest item from the heap (i.e. arr[0]) and saves it temporarily. Then, take the last item in the heap, reduce the count by one, and then re-insert that item at the top, sifting it down as required. Finally, place the previous largest item at the position that was previously occupied by the last item. When count gets to 0, you have a sorted array:

int count = size;
while (count > 0)
{
    int save = arr[0];      // save the largest item
    arr[0] = arr[count-1];  // move last item to top
    arr[count-1] = save;    // and place the largest item
    count = count - 1;      // reduce the count
    SiftDown(0);            // sift item into place
}

All you're doing is successively calling removeMax on the heap, and storing the result back into the array in the position that was vacated.

SiftDown is the same method you use when inserting an item into a heap.

See my blog post, A Simple Heap of Integers, for a complete example of building a heap using the O(n) heapify method. It's in C#, but I think simple enough that if you understand Java you can understand it. I don't show how to do the sort part, but with that code and the few lines above, you should do fine.

Jim Mischel
  • 131,090
  • 20
  • 188
  • 351