2

I am trying to profile two implementation ideas of multidimensional array classes.

In the code below, application creates thee huge multidimensional array of three classes: Array1, Array2 and conventional.

Application first creates arrays, which is not measured. Then it fills each array and read each of them by order. Time measurement occurs both programmatically and by profiler.

Unfortunately, section timings are not coincide. For example, the methods or class Array2 which consumes nearly half time, are not listed in profiler at all.

Why?

main code:

package tests;

public class Runner01 {

    private static final int[] dims = {400, 100, 20, 300};
    static private double[] data;

    static private Array1 array1;
    static private Array2 array2;
    static private double[][][][] array3;

    public static interface MyRunnable {

        public void run(int ii, int i, int j, int k, int l);
    }

    public static long test(MyRunnable runnable) {
        long start = System.nanoTime();
        int ii = 0;
        for (int i = 0; i < dims[0]; ++i) {
            for (int j = 0; j < dims[1]; ++j) {
                for (int k = 0; k < dims[2]; ++k) {
                    for (int l = 0; l < dims[3]; ++l) {
                        runnable.run(ii, i, j, k, l);
                        ii++;
                    }
                }
            }
        }
        long end = System.nanoTime();
        System.out.println("Done in " + (double) (end - start) / 1000000000 + " seconds");
        return end - start;
    }

    public static void main(String[] args) {

        int ii;
        long start, end, elapsed;

        start = System.nanoTime();

        System.out.println("Filling...");

        int size = 1;
        for (int i = 0; i < dims.length; ++i) {
            size *= dims[i];
        }

        data = new double[size];
        for (int i = 0; i < data.length; ++i) {
            data[i] = Math.random();
        }

        array1 = new Array1(dims);
        array2 = new Array2(dims);
        array3 = new double[dims[0]][dims[1]][dims[2]][dims[3]];

        System.out.println("Done.");

        System.out.println("Writing array1...");
        test(new MyRunnable() {
            @Override
            public void run(int ii, int i, int j, int k, int l) {
                array1.set(data[ii], i, j, k, l);
            }
        });

        System.out.println("Writing array2...");
        test(new MyRunnable() {
            @Override
            public void run(int ii, int i, int j, int k, int l) {
                array2.set(data[ii], i, j, k, l);
            }
        });

        System.out.println("Writing array3...");
        test(new MyRunnable() {
            @Override
            public void run(int ii, int i, int j, int k, int l) {
                array3[i][j][k][l] = data[ii];
            }
        });

        System.out.println("Reading array1...");
        test(new MyRunnable() {
            @Override
            public void run(int ii, int i, int j, int k, int l) {
                assert (data[ii] == array1.get(i, j, k, l));
            }
        });

        System.out.println("Reading array2...");
        test(new MyRunnable() {
            @Override
            public void run(int ii, int i, int j, int k, int l) {
                assert (data[ii] == array2.get(i, j, k, l));
            }
        });

        System.out.println("Reading array3...");
        test(new MyRunnable() {
            @Override
            public void run(int ii, int i, int j, int k, int l) {
                assert (array3[i][j][k][l] == data[ii]);
            }
        });

        end = System.nanoTime();
        elapsed = end - start;

        System.out.println("Total application time is " + (double) (end - start) / 1000000000 + " seconds");

    }

}

Array1 code:

package tests;

public class Array1 {

    private Object delegate;

    private Object allocate(Object delegate, int[] is, int pos) {
        if (pos < is.length) {
            delegate = new Object[is[pos]];
            for (int k = 0; k < is[pos]; ++k) {
                ((Object[]) delegate)[k] = allocate(((Object[]) delegate)[k], is, pos + 1);
            }
        }
        return delegate;
    }

    private Object get(Object delegate, int[] is, int pos) {
        if (pos < is.length) {
            Object subdelegate = ((Object[]) delegate)[is[pos]];
            return get(subdelegate, is, pos + 1);
        } else {
            return delegate;
        }
    }

    private void set(Object delegate, int[] is, int pos, double value) {
        if (pos < is.length - 1) {
            Object subdelegate = ((Object[]) delegate)[is[pos]];
            set(subdelegate, is, pos + 1, value);
        } else {
            ((Object[]) delegate)[is[pos]] = value;
        }
    }

    public Array1(int... is) {
        delegate = allocate(delegate, is, 0);
    }

    public double get(int... is) {
        return (double) get(delegate, is, 0);
    }

    public void set(double value, int... is) {
        set(delegate, is, 0, value);
    }

}

Array2 code:

package tests;

public class Array2 {

    private double[] delegate;
    private int[] pows;

    public Array2(int... is) {

        pows = new int[is.length];

        int size = 1;
        for (int k = 0; k < is.length; ++k) {
            pows[k] = size;
            size *= is[k];
        }

        delegate = new double[size];
    }

    public double get(int... is) {
        int pos = 0;

        for (int k = 0; k < is.length; ++k) {
            pos += is[k] * pows[k];
        }

        return delegate[pos];
    }

    public void set(double value, int... is) {
        int pos = 0;

        for (int k = 0; k < is.length; ++k) {
            pos += is[k] * pows[k];
        }

        delegate[pos] = value;
    }

}

The result:

enter image description here

Suzan Cioc
  • 29,281
  • 63
  • 213
  • 385

1 Answers1

0

You appear to have sub-call profiles for the first and fourth run, single call profiles for the second and fifth run, and no calls for the third and sixth run; these correspond to Array1, Array2 and built-in array-of-array access. (you have calls to $1.run, $2.run, $4.run and $5.run but not $3.run and $6.run)

So for your three cases, I would guess that the first has no inlined calls, the second it inlines everything below the call to the runnable, and the third time it sees only simple code inside the runnable so it inlines everything below test ( the totals of your printed times 101.36s agrees with the profiler's 101,373ms in test ).

There are common ways around ignoring warm-up results when micro-benchmarking java, with some good advice in this answer.

Community
  • 1
  • 1
Pete Kirkham
  • 48,893
  • 5
  • 92
  • 171
  • Sorry what are "sub-call profiles" and how one can have or not have them? – Suzan Cioc Nov 12 '14 at 09:40
  • In your screen shot, you have tree showing the profile ( total time / % ) for that call and under some of those, such as $1.run you have profiles for the subordinate calls made within that call, whereas for $2.run you have no such subordinate call profiles. – Pete Kirkham Nov 12 '14 at 09:46