I have the following Java code (all arrays are initialized before we call "arrays" and all are of size "arraySize")
int arraySize = 64;
float[] a;
float[] b;
float[] result;
public void arrays() {
for (int i = 0; i < arraySize; i++) {
result[i] = ((a[i] * b[i] + b[i] - b[i]) / b[i]) +
a[i] + a[i] + a[i] + a[i];
}
}
The JIT's output for it is:
# {method} {0x00000001034751a8} 'arrays' '()V' in 'main/ComplexExpression'
# [sp+0x30] (sp of caller)
[Entry Point]
0x000000010c4c55a0: mov 0x8(%rsi),%r10d
0x000000010c4c55a4: movabs $0x800000000,%r11
0x000000010c4c55ae: add %r11,%r10
0x000000010c4c55b1: cmp %r10,%rax
0x000000010c4c55b4: jne 0x000000010c44b780 ; {runtime_call ic_miss_stub}
0x000000010c4c55ba: xchg %ax,%ax
0x000000010c4c55bc: nopl 0x0(%rax)
[Verified Entry Point]
0x000000010c4c55c0: mov %eax,-0x14000(%rsp)
0x000000010c4c55c7: push %rbp
0x000000010c4c55c8: sub $0x20,%rsp ;*synchronization entry
; - main.ComplexExpression::arrays@-1 (line 51)
0x000000010c4c55cc: mov %rsi,%rcx
0x000000010c4c55cf: mov 0xc(%rsi),%ebp ;*getfield arraySize {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@4 (line 51)
0x000000010c4c55d2: test %ebp,%ebp
0x000000010c4c55d4: jle L0006 ;*if_icmpge {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@7 (line 51)
0x000000010c4c55da: mov 0x10(%rsi),%r11d ;*getfield a {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@16 (line 52)
0x000000010c4c55de: xchg %ax,%ax
0x000000010c4c55e0: mov 0xc(%r12,%r11,8),%r10d ; implicit exception: dispatches to 0x000000010c4c58a3
;*faload {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@20 (line 52)
0x000000010c4c55e5: test %r10d,%r10d
0x000000010c4c55e8: jbe L0007
0x000000010c4c55ee: movslq %ebp,%r9
0x000000010c4c55f1: movslq %r10d,%r10
0x000000010c4c55f4: dec %r9
0x000000010c4c55f7: cmp %r10,%r9
0x000000010c4c55fa: nopw 0x0(%rax,%rax,1)
0x000000010c4c5600: jae L0007
0x000000010c4c5606: mov 0x14(%rsi),%ebx ;*getfield b {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@22 (line 52)
0x000000010c4c5609: mov 0xc(%r12,%rbx,8),%r10d ; implicit exception: dispatches to 0x000000010c4c58a3
;*faload {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@26 (line 52)
0x000000010c4c560e: test %r10d,%r10d
0x000000010c4c5611: jbe L0007
0x000000010c4c5617: movslq %r10d,%r10
0x000000010c4c561a: nopw 0x0(%rax,%rax,1)
0x000000010c4c5620: cmp %r10,%r9
0x000000010c4c5623: jae L0007
0x000000010c4c5629: mov 0x18(%rsi),%r8d ;*getfield result {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@11 (line 52)
0x000000010c4c562d: mov 0xc(%r12,%r8,8),%r10d ; implicit exception: dispatches to 0x000000010c4c58a3
;*fastore {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@77 (line 52)
0x000000010c4c5632: test %r10d,%r10d
0x000000010c4c5635: jbe L0007
0x000000010c4c563b: movslq %r10d,%r10
0x000000010c4c563e: xchg %ax,%ax
0x000000010c4c5640: cmp %r10,%r9
0x000000010c4c5643: jae L0007
0x000000010c4c5649: lea (%r12,%r8,8),%rdx
0x000000010c4c564d: lea (%r12,%r11,8),%rdi
0x000000010c4c5651: mov %edx,%r11d
0x000000010c4c5654: lea (%r12,%rbx,8),%rax
0x000000010c4c5658: shr $0x2,%r11d
0x000000010c4c565c: and $0x7,%r11d
0x000000010c4c5660: mov $0x3,%r9d
0x000000010c4c5666: sub %r11d,%r9d
0x000000010c4c5669: and $0x7,%r9d
0x000000010c4c566d: inc %r9d
0x000000010c4c5670: cmp %ebp,%r9d
0x000000010c4c5673: cmovg %ebp,%r9d
0x000000010c4c5677: xor %r10d,%r10d
0x000000010c4c567a: xor %r11d,%r11d ;*aload_0 {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@10 (line 52)
L0000: vmovss 0x10(%rax,%r11,4),%xmm1 ;*faload {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@26 (line 52)
0x000000010c4c5684: vmovss 0x10(%rdi,%r11,4),%xmm0 ;*faload {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@20 (line 52)
0x000000010c4c568b: vmulss %xmm1,%xmm0,%xmm3
0x000000010c4c568f: vaddss %xmm1,%xmm3,%xmm2
0x000000010c4c5693: vsubss %xmm1,%xmm2,%xmm3
0x000000010c4c5697: vdivss %xmm1,%xmm3,%xmm1
0x000000010c4c569b: vaddss %xmm0,%xmm1,%xmm2
0x000000010c4c569f: vaddss %xmm0,%xmm2,%xmm1
0x000000010c4c56a3: vaddss %xmm0,%xmm1,%xmm2
0x000000010c4c56a7: vaddss %xmm0,%xmm2,%xmm0
0x000000010c4c56ab: vmovss %xmm0,0x10(%rdx,%r11,4) ;*fastore {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@77 (line 52)
0x000000010c4c56b2: inc %r11d ;*iinc {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@78 (line 51)
0x000000010c4c56b5: cmp %r9d,%r11d
0x000000010c4c56b8: jl L0000 ;*if_icmpge {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@7 (line 51)
0x000000010c4c56ba: mov %ebp,%r9d
0x000000010c4c56bd: add $0xffffffe1,%r9d
0x000000010c4c56c1: mov $0x80000000,%r8d
0x000000010c4c56c7: cmp %r9d,%ebp
0x000000010c4c56ca: cmovl %r8d,%r9d
0x000000010c4c56ce: cmp %r9d,%r11d
0x000000010c4c56d1: jge L0004
0x000000010c4c56d7: mov $0x7d00,%ebx
L0001: mov %r9d,%esi
0x000000010c4c56df: sub %r11d,%esi
0x000000010c4c56e2: cmp %r11d,%r9d
0x000000010c4c56e5: cmovl %r10d,%esi
0x000000010c4c56e9: cmp $0x7d00,%esi
0x000000010c4c56ef: cmova %ebx,%esi
0x000000010c4c56f2: add %r11d,%esi
0x000000010c4c56f5: data16 data16 nopw 0x0(%rax,%rax,1) ;*aload_0 {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@10 (line 52)
L0002: vmovdqu 0x10(%rax,%r11,4),%ymm0 ;*faload {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@26 (line 52)
0x000000010c4c5707: vmovdqu 0x10(%rdi,%r11,4),%ymm1 ;*faload {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@20 (line 52)
0x000000010c4c570e: vmulps %ymm0,%ymm1,%ymm2
0x000000010c4c5712: vaddps %ymm0,%ymm2,%ymm2
0x000000010c4c5716: vsubps %ymm0,%ymm2,%ymm2
0x000000010c4c571a: vdivps %ymm0,%ymm2,%ymm0
0x000000010c4c571e: vaddps %ymm1,%ymm0,%ymm0
0x000000010c4c5722: vaddps %ymm1,%ymm0,%ymm0
0x000000010c4c5726: vaddps %ymm1,%ymm0,%ymm0
0x000000010c4c572a: vaddps %ymm1,%ymm0,%ymm0
0x000000010c4c572e: vmovdqu %ymm0,0x10(%rdx,%r11,4) ;*fastore {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@77 (line 52)
0x000000010c4c5735: vmovdqu 0x30(%rdi,%r11,4),%ymm0 ;*faload {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@20 (line 52)
0x000000010c4c573c: vmovdqu 0x30(%rax,%r11,4),%ymm1 ;*faload {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@26 (line 52)
0x000000010c4c5743: vmulps %ymm1,%ymm0,%ymm2
0x000000010c4c5747: vaddps %ymm1,%ymm2,%ymm2
0x000000010c4c574b: vsubps %ymm1,%ymm2,%ymm2
0x000000010c4c574f: vdivps %ymm1,%ymm2,%ymm1
0x000000010c4c5753: vaddps %ymm0,%ymm1,%ymm1
0x000000010c4c5757: vaddps %ymm0,%ymm1,%ymm1
0x000000010c4c575b: vaddps %ymm0,%ymm1,%ymm1
0x000000010c4c575f: vaddps %ymm0,%ymm1,%ymm0
0x000000010c4c5763: vmovdqu %ymm0,0x30(%rdx,%r11,4) ;*fastore {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@77 (line 52)
0x000000010c4c576a: vmovdqu 0x50(%rdi,%r11,4),%ymm0 ;*faload {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@20 (line 52)
0x000000010c4c5771: vmovdqu 0x50(%rax,%r11,4),%ymm1 ;*faload {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@26 (line 52)
0x000000010c4c5778: vmulps %ymm1,%ymm0,%ymm2
0x000000010c4c577c: vaddps %ymm1,%ymm2,%ymm2
0x000000010c4c5780: vsubps %ymm1,%ymm2,%ymm2
0x000000010c4c5784: vdivps %ymm1,%ymm2,%ymm1
0x000000010c4c5788: vaddps %ymm0,%ymm1,%ymm1
0x000000010c4c578c: vaddps %ymm0,%ymm1,%ymm1
0x000000010c4c5790: vaddps %ymm0,%ymm1,%ymm1
0x000000010c4c5794: vaddps %ymm0,%ymm1,%ymm0
0x000000010c4c5798: vmovdqu %ymm0,0x50(%rdx,%r11,4) ;*fastore {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@77 (line 52)
0x000000010c4c579f: vmovdqu 0x70(%rdi,%r11,4),%ymm0 ;*faload {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@20 (line 52)
0x000000010c4c57a6: vmovdqu 0x70(%rax,%r11,4),%ymm1 ;*faload {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@26 (line 52)
0x000000010c4c57ad: vmulps %ymm1,%ymm0,%ymm2
0x000000010c4c57b1: vaddps %ymm1,%ymm2,%ymm2
0x000000010c4c57b5: vsubps %ymm1,%ymm2,%ymm2
0x000000010c4c57b9: vdivps %ymm1,%ymm2,%ymm1
0x000000010c4c57bd: vaddps %ymm0,%ymm1,%ymm1
0x000000010c4c57c1: vaddps %ymm0,%ymm1,%ymm1
0x000000010c4c57c5: vaddps %ymm0,%ymm1,%ymm1
0x000000010c4c57c9: vaddps %ymm0,%ymm1,%ymm0
0x000000010c4c57cd: vmovdqu %ymm0,0x70(%rdx,%r11,4) ;*fastore {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@77 (line 52)
0x000000010c4c57d4: add $0x20,%r11d ;*iinc {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@78 (line 51)
0x000000010c4c57d8: cmp %esi,%r11d
0x000000010c4c57db: nopl 0x0(%rax,%rax,1)
0x000000010c4c57e0: jl L0002 ;*goto {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@81 (line 51)
0x000000010c4c57e6: mov 0x348(%r15),%rsi ; ImmutableOopMap {rcx=Oop rdi=Oop rdx=Oop rax=Oop }
;*goto {reexecute=1 rethrow=0 return_oop=0}
; - (reexecute) main.ComplexExpression::arrays@81 (line 51)
0x000000010c4c57ed: test %eax,(%rsi) ;*goto {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@81 (line 51)
; {poll} *** SAFEPOINT POLL ***
0x000000010c4c57ef: cmp %r9d,%r11d
0x000000010c4c57f2: jl L0001
0x000000010c4c57f8: mov %ebp,%r10d
0x000000010c4c57fb: add $0xfffffff9,%r10d
0x000000010c4c57ff: cmp %r10d,%ebp
0x000000010c4c5802: cmovl %r8d,%r10d
0x000000010c4c5806: cmp %r10d,%r11d
0x000000010c4c5809: jge L0004
0x000000010c4c580b: nop ;*aload_0 {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@10 (line 52)
L0003: vmovdqu 0x10(%rax,%r11,4),%ymm0 ;*faload {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@26 (line 52)
0x000000010c4c5813: vmovdqu 0x10(%rdi,%r11,4),%ymm1 ;*faload {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@20 (line 52)
0x000000010c4c581a: vmulps %ymm0,%ymm1,%ymm2
0x000000010c4c581e: vaddps %ymm0,%ymm2,%ymm2
0x000000010c4c5822: vsubps %ymm0,%ymm2,%ymm2
0x000000010c4c5826: vdivps %ymm0,%ymm2,%ymm0
0x000000010c4c582a: vaddps %ymm1,%ymm0,%ymm0
0x000000010c4c582e: vaddps %ymm1,%ymm0,%ymm0
0x000000010c4c5832: vaddps %ymm1,%ymm0,%ymm0
0x000000010c4c5836: vaddps %ymm1,%ymm0,%ymm0
0x000000010c4c583a: vmovdqu %ymm0,0x10(%rdx,%r11,4) ;*fastore {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@77 (line 52)
0x000000010c4c5841: add $0x8,%r11d ;*iinc {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@78 (line 51)
0x000000010c4c5845: cmp %r10d,%r11d
0x000000010c4c5848: jl L0003 ;*if_icmpge {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@7 (line 51)
L0004: cmp %ebp,%r11d
0x000000010c4c584d: jge L0006
0x000000010c4c584f: nop ;*aload_0 {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@10 (line 52)
L0005: vmovss 0x10(%rax,%r11,4),%xmm1 ;*faload {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@26 (line 52)
0x000000010c4c5857: vmovss 0x10(%rdi,%r11,4),%xmm0 ;*faload {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@20 (line 52)
0x000000010c4c585e: vmulss %xmm1,%xmm0,%xmm3
0x000000010c4c5862: vaddss %xmm1,%xmm3,%xmm2
0x000000010c4c5866: vsubss %xmm1,%xmm2,%xmm3
0x000000010c4c586a: vdivss %xmm1,%xmm3,%xmm1
0x000000010c4c586e: vaddss %xmm0,%xmm1,%xmm2
0x000000010c4c5872: vaddss %xmm0,%xmm2,%xmm1
0x000000010c4c5876: vaddss %xmm0,%xmm1,%xmm2
0x000000010c4c587a: vaddss %xmm0,%xmm2,%xmm0
0x000000010c4c587e: vmovss %xmm0,0x10(%rdx,%r11,4) ;*fastore {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@77 (line 52)
0x000000010c4c5885: inc %r11d ;*iinc {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@78 (line 51)
0x000000010c4c5888: cmp %ebp,%r11d
0x000000010c4c588b: jl L0005
L0006: vzeroupper
0x000000010c4c5890: add $0x20,%rsp
0x000000010c4c5894: pop %rbp
0x000000010c4c5895: cmp 0x340(%r15),%rsp ; {poll_return} *** SAFEPOINT POLL ***
0x000000010c4c589c: ja L0008
0x000000010c4c58a2: ret ;*if_icmpge {reexecute=0 rethrow=0 return_oop=0}
; - main.ComplexExpression::arrays@7 (line 51)
L0007: mov $0xffffff76,%esi
0x000000010c4c58a8: mov %rcx,(%rsp)
0x000000010c4c58ac: vzeroupper
0x000000010c4c58af: call 0x000000010c451000 ; ImmutableOopMap {[0]=Oop }
;*if_icmpge {reexecute=1 rethrow=0 return_oop=0}
; - (reexecute) main.ComplexExpression::arrays@7 (line 51)
; {runtime_call UncommonTrapBlob}
L0008: movabs $0x10c4c5895,%r10 ; {internal_word}
0x000000010c4c58be: mov %r10,0x358(%r15)
0x000000010c4c58c5: jmp 0x000000010c452100 ; {runtime_call SafepointBlob}
0x000000010c4c58ca: hlt
0x000000010c4c58cb: hlt
0x000000010c4c58cc: hlt
0x000000010c4c58cd: hlt
0x000000010c4c58ce: hlt
0x000000010c4c58cf: hlt
0x000000010c4c58d0: hlt
0x000000010c4c58d1: hlt
0x000000010c4c58d2: hlt
0x000000010c4c58d3: hlt
0x000000010c4c58d4: hlt
0x000000010c4c58d5: hlt
0x000000010c4c58d6: hlt
0x000000010c4c58d7: hlt
0x000000010c4c58d8: hlt
0x000000010c4c58d9: hlt
0x000000010c4c58da: hlt
0x000000010c4c58db: hlt
0x000000010c4c58dc: hlt
0x000000010c4c58dd: hlt
0x000000010c4c58de: hlt
0x000000010c4c58df: hlt
[Exception Handler]
0x000000010c4c58e0: jmp 0x000000010c464a00 ; {no_reloc}
[Deopt Handler Code]
0x000000010c4c58e5: call 0x000000010c4c58ea
0x000000010c4c58ea: subq $0x5,(%rsp)
0x000000010c4c58ef: jmp 0x000000010c4513a0 ; {runtime_call DeoptimizationBlob}
0x000000010c4c58f4: hlt
0x000000010c4c58f5: hlt
0x000000010c4c58f6: hlt
0x000000010c4c58f7: hlt
It seems like the for loop was translated to multiple "assembly loops". The main part is in L0002 I guess, where you can see the loop unrolling, but there is also L0000, L0003 and L0005 which also seem to be part of the for loop but Im having a hard time understanding how they fit in.
Can someone explain how do all the parts in all the labels compose the actual loop? I imagine its some "known" pattern in the JIT that I just dont know.