-2

I have tested several simple functions with Golang and Java. To my surprise, Java sometimes is faster than Golang(especially in recursive function and some function in standard library such as math/rand.Rand). I wonder why. Here is some code I used for test and the result.

Golang code:

package main

import (
    "fmt"
    "math/rand"
    "time"
)

func calPi(pointCount int) float64 {
    inCircleCount := 0

    var x, y float64
    var Pi float64

    for i := 0; i < pointCount; i++ {
        x = rand.Float64()
        y = rand.Float64()

        if x*x+y*y < 1 {
            inCircleCount++
        }
    }

    Pi = (4.0 * float64(inCircleCount)) / float64(pointCount)

    return Pi
}

func fibonacci(c int64) int64 {
    if c < 2 {
        return c
    }

    return fibonacci(c-2) + fibonacci(c-1)
}

func main() {
    rand.Seed(time.Now().Unix()) 

    fmt.Printf("Test 1\n")

    startTime := time.Now()

    result := 0.0

    for i := 0.0; i < 1000000000; i = i + 1 {
        result += i * i
    }

    endTime := time.Now()

    fmt.Printf("Result: %v\n", result)

    fmt.Printf("Duration: %v\n", endTime.Sub(startTime))

    fmt.Printf("Test 2\n")

    startTime = time.Now()

    resultInt := fibonacci(50)

    endTime = time.Now()

    fmt.Printf("Result: %v\n", resultInt)

    fmt.Printf("Duration: %v\n", endTime.Sub(startTime))

    fmt.Printf("Test 3\n")

    startTime = time.Now()

    result = 0.0

    for i := 0.0; i < 100000000; i = i + 1 {
        result += rand.Float64()
    }

    endTime = time.Now()

    fmt.Printf("Result: %v\n", result)

    fmt.Printf("Duration: %v\n s", endTime.Sub(startTime))

    fmt.Printf("Test 4\n")

    startTime = time.Now()

    result = calPi(100000000)

    endTime = time.Now()

    fmt.Printf("Result: %v\n", result)

    fmt.Printf("Duration: %v s\n", endTime.Sub(startTime))

}

the result:

Test 1
Result: 3.333333328333552e+26
Duration: 1.449212507s
Test 2
Result: 12586269025
Duration: 1m31.645050682s
Test 3
Result: 4.999483069673434e+07
Duration: 2.534121566s
 sTest 4
Result: 3.14147056
Duration: 5.036491495s s

Java code:

public class Performance {

    public static double calPi(int pointCount) {
        int inCircleCount = 0;

        double x, y;
        double Pi;

        for (int i = 0; i < pointCount; i++) {
            x = Math.random();
            y = Math.random();

            if (x * x + y * y < 1) {
                inCircleCount++;
            }
        }

        Pi = (4.0 * inCircleCount) / pointCount;

        return Pi;
    }

    public static double cal(double a, double b, double c) {
        return a * b / (c + 1) + a;
    }

    public static long fibonacci(long c) {
        if (c < 2)
            return c;
        return fibonacci(c - 2) + fibonacci(c - 1);
    }

    public static void main(String[] args) {

        System.out.println("Test 1");

        long startTime = System.currentTimeMillis();

        double result = 0.0;

        for (double i = 0.0; i < 1000000000; i = i + 1) {
            result += i * i;
        }

        long endTime = System.currentTimeMillis();

        float duration = (float) (endTime - startTime) / 1000;

        System.out.println("Result: " + result);
        System.out.println("Duration: " + duration + " s");

        System.out.println("Test 2");

        startTime = System.currentTimeMillis();

        long resultInt = fibonacci(50);

        endTime = System.currentTimeMillis();

        duration = (float) (endTime - startTime) / 1000;

        System.out.println("Result: " + resultInt);
        System.out.println("Duration: " + duration + " s");

        System.out.println("Test 3");

        startTime = System.currentTimeMillis();

        result = 0.0;

        for (double i = 0; i < 100000000; i = i + 1) {
            result += Math.random();
        }

        endTime = System.currentTimeMillis();

        duration = (float) (endTime - startTime) / 1000;

        System.out.println("Result: " + result);
        System.out.println("Duration: " + duration + " s");

        System.out.println("Test 4");

        startTime = System.currentTimeMillis();

        result = calPi(100000000);

        endTime = System.currentTimeMillis();

        duration = (float) (endTime - startTime) / 1000;

        System.out.println("Result: " + result);
        System.out.println("Duration: " + duration + " s");

    }
}

result:

Test 1
Result: 3.333333328333552E26
Duration: 2.948 s
Test 2
Result: 12586269025
Duration: 60.816 s
Test 3
Result: 4.9999087237930864E7
Duration: 2.448 s
Test 4
Result: 3.14147284
Duration: 4.786 s

The difference of Test 2 results really shocked me! Please help me to find the reason, thanks. And better if someone could give me the example(s) to show the advantage of Golang (vs Java).

Topget
  • 113
  • 6
  • 12
    Why is this surprising? – Don Branson Jan 25 '19 at 02:33
  • It might be because of debug information (just a guess). Try building with this and tell us the difference (if any) `go build -ldflags "-s -w"` – hasen Jan 25 '19 at 02:41
  • 5
    You're not even doing the performance testing in Java right; you're not warming up the VM like you should for a microbenchmark. Use JMH for microbenchmarks in Java - many fewer things to think about. Java will be faster if you properly warm up the code giving Hotspot a chance to compiler to native code before you start the test. – Erwin Bolwidt Jan 25 '19 at 02:51
  • 2
    See: [How do I write a correct micro-benchmark in Java?](https://stackoverflow.com/questions/504103/how-do-i-write-a-correct-micro-benchmark-in-java) – Erwin Bolwidt Jan 25 '19 at 02:52
  • @ErwinBolwidt yes, but Java is faster than Golang even now with no warming-up – Topget Jan 25 '19 at 02:55
  • @DonBranson I expected that code written in Golang runs more faster than Java, since Golang is more like C, and Java uses VM to run. – Topget Jan 25 '19 at 02:58
  • @hasen the result is almost no change after compiled with those flags: Test 1 Result: 3.333333328333552e+26 Duration: 1.446115258s Test 2 Result: 12586269025 Duration: 1m29.768865269s Test 3 Result: 5.0005127421499e+07 Duration: 2.521050461s sTest 4 Result: 3.14141112 Duration: 5.002467019s s – Topget Jan 25 '19 at 02:59
  • 3
    @Topget that's why the "you're not **even**". I'm surprised that people are still surprised that Java is fast. – Erwin Bolwidt Jan 25 '19 at 02:59
  • 2
    Tests 3 and 4 are pretty much the same. In Test 1 Java is 100% slower than Go which is surprising In Test 2 Go is 50% slower than Java, also surprising. You need to specify which compilers you are using. Also I have found that using the Go benchmarking facility gives more reliable results. – Andrew W. Phillips Jan 25 '19 at 02:59
  • 3
    @AndrewW.Phillips with proper warm ups of the VM like I mentioned above, the Java time taken for test 1 is less than half what it is without warmup. Test 1 is effectively a JVM warmup for the other tests – Erwin Bolwidt Jan 25 '19 at 03:00
  • @AndrewW.Phillips I used the default golang compiler(go version go1.11.2 darwin/amd64 and windows), and jre/jdk 1.8.x on MacOS and Windows 10. – Topget Jan 25 '19 at 03:08
  • @ErwinBolwidt Maybe I have to say that the optimization of Java is much better than Golang now. – Topget Jan 25 '19 at 03:13
  • 1
    @TopGet I think that in general Java implementations nowadays do not use a JVM interpreter but compile to native code, which is why Erwin suggest warming up the VM. For these simple benchmarks I would expect similar performance (and they are not orders of magnitude different). Recursive functions may be slower in Go because each go-routine only starts with a 4K stack and has to "realloc" when the limit is reached, whereas a Java stack is probably 1000 times bigger. – Andrew W. Phillips Jan 25 '19 at 03:15
  • 1
    @AndrewW.Phillips Compiling to native code isn't a reason why you would need a warmup. Selective compilation according to execution frequency is, and that's what JVMs, at least the Hotspot JVM, have been doing for 20 years. The remaining code is still interpreted. – user207421 Jan 25 '19 at 04:01
  • 3
    If you are interested in such questions you should dig into what CPU instruction the compiler, VM or JIT generates. But, this is basically uninteresting to almost everybody except some specialists working on the compiler, VM, JIT. – Volker Jan 25 '19 at 04:41
  • 5
    You're not doing your Go benchmarks the proper way, either. See [this post](https://dave.cheney.net/2013/06/30/how-to-write-benchmarks-in-go). – Jonathan Hall Jan 25 '19 at 08:03
  • 2
    Honestly, synthetic workload vs real life computation often have different outcomes. Therefore, you can't just test performances just by functions. But rather, test by real world applications like HTTP Servers to see the actual difference. – Etosticity Jan 25 '19 at 09:59
  • 1
    Go and Java are fundamentally different, and therefore exhibit different performance characteristics in various scenarios. One will not always be faster than the other. Also note that while Go is compiled directly to native binary instead of to bytecode, is still has a runtime with garbage collection and so will necessarily have more overhead (and more safety) than C. – Adrian Jan 25 '19 at 16:25

1 Answers1

4

Both Java and Golang programs are compiled into machine language, before getting executed—that's what JIT stands for Java VM. As of performance comparison, there must be a not-so-subtle difference between the Machine code generated by each.

Unfortunately, I don't have access to the machine code generated by Java JIT compiler, but we can take a look at what have been generated by Go compiler (v1.11.4-amd64) for fibonacci function:

        # Do the comparison
        MOVQ    "c", AX
        CMPQ    AX, $2
        JGE     @ELSE
        # Save the func result
        MOVQ    AX, "r"
        # Clean up and return
        MOVQ    24(SP), BP
        ADDQ    $32, SP
        RET
@ELSE:
        # Compute fib(c - 2)
        LEAQ    -2(AX), CX
        MOVQ    CX, (SP)
        CALL    fibonacci
        # Save the call result
        MOVQ    8(SP), AX
        MOVQ    AX, "temp"
        # Compute fib(c - 1)
        MOVQ    "c", CX
        DECQ    CX
        MOVQ    CX, (SP)
        CALL    fibonacci
        # Add previous results together
        MOVQ    16(SP), AX
        ADDQ    8(SP), AX
        # Save the func result
        MOVQ    AX, "r"
        # Clean up and return
        MOVQ    24(SP), BP
        ADDQ    $32, SP
        RET

Note that this code is not the exact same output, but I've modified it a little bit to make it more clear. Quoted variables are stack positions.

What I conclude is that while Go compiler does employ some optimization techniques to generate more performant code (see Compiler Optimization), it is not doing very well for allocating CPU registers (compare it to what would be generated by a C compiler), and relies too much on stack, especially for return values—and I think there have to be a reason for that which is probably related to the way language works (e.g. multiple return values).

Update 1

Just for comparison, this is the machine code generated by GCC (amd64) for the same function:

        pushq %rbp
        movq  %rsp, %rbp
        pushq %r14
        pushq %rbx
        # Do the comparison
        movq  %rdi, %rbx
        cmpq  $2, %rbx
        jge @ELSE
        # Save "c" in "r"
        movq  %rbx, %rax
        jmp @RETURN
@ELSE:
        # Compute fib(i - 2)
        leaq  -2(%rbx), %rdi
        callq fibonacci
        # Compute fib(i - 1)
        movq  %rax, %r14
        decq  %rbx
        movq  %rbx, %rdi
        callq fibonacci
        # Add previous results together
        addq  %r14, %rax
@RETURN:
        popq  %rbx
        popq  %r14
        popq  %rbp
        retq

Update 2

That being said, I strongly believe that in real-world projects, the language runtime (e.g. object allocation, garbage collection, call indirection, dynamic loading, concurrency support, etc.) would have a much greater effect on the overall performance of the program, rather than micro-optimizations on the function level.

Alirus
  • 472
  • 4
  • 14