Pattern matching on Int
s happens in O(log(n))
time, like a map lookup.
Consider the following code, compiled to x86 assembly by ghc -S
module F (
f
) where
f :: Int -> String
f 0 = "Zero"
f 1 = "One"
f 2 = "Two"
f 3 = "Three"
f 4 = "Four"
f 5 = "Five"
f 6 = "Six"
f 7 = "Seven"
f _ = "Undefined"
The compiled assembly code is
.text
.align 4,0x90
.long _F_f_srt-(_sl8_info)+0
.long 0
.long 65568
_sl8_info:
.Lcma:
movl 3(%esi),%eax
cmpl $4,%eax
jl .Lcmq
cmpl $6,%eax
jl .Lcmi
cmpl $7,%eax
jl .Lcme
cmpl $7,%eax
jne .Lcmc
movl $_ghczmprim_GHCziCString_unpackCStringzh_closure,%esi
movl $_cm7_str,0(%ebp)
jmp _stg_ap_n_fast
.Lcmc:
movl $_ghczmprim_GHCziCString_unpackCStringzh_closure,%esi
movl $_clB_str,0(%ebp)
jmp _stg_ap_n_fast
.Lcme:
cmpl $6,%eax
jne .Lcmc
movl $_ghczmprim_GHCziCString_unpackCStringzh_closure,%esi
movl $_cm3_str,0(%ebp)
jmp _stg_ap_n_fast
.Lcmg:
cmpl $4,%eax
jne .Lcmc
movl $_ghczmprim_GHCziCString_unpackCStringzh_closure,%esi
movl $_clV_str,0(%ebp)
jmp _stg_ap_n_fast
.Lcmi:
cmpl $5,%eax
jl .Lcmg
cmpl $5,%eax
jne .Lcmc
movl $_ghczmprim_GHCziCString_unpackCStringzh_closure,%esi
movl $_clZ_str,0(%ebp)
jmp _stg_ap_n_fast
.Lcmk:
cmpl $2,%eax
jne .Lcmc
movl $_ghczmprim_GHCziCString_unpackCStringzh_closure,%esi
movl $_clN_str,0(%ebp)
jmp _stg_ap_n_fast
.Lcmm:
testl %eax,%eax
jne .Lcmc
movl $_ghczmprim_GHCziCString_unpackCStringzh_closure,%esi
movl $_clF_str,0(%ebp)
jmp _stg_ap_n_fast
.Lcmo:
cmpl $1,%eax
jl .Lcmm
cmpl $1,%eax
jne .Lcmc
movl $_ghczmprim_GHCziCString_unpackCStringzh_closure,%esi
movl $_clJ_str,0(%ebp)
jmp _stg_ap_n_fast
.Lcmq:
cmpl $2,%eax
jl .Lcmo
cmpl $3,%eax
jl .Lcmk
cmpl $3,%eax
jne .Lcmc
movl $_ghczmprim_GHCziCString_unpackCStringzh_closure,%esi
movl $_clR_str,0(%ebp)
jmp _stg_ap_n_fast
.text
.align 4,0x90
.long _F_f_srt-(_F_f_info)+0
.long 65541
.long 0
.long 65551
.globl _F_f_info
_F_f_info:
.Lcmu:
movl 0(%ebp),%esi
movl $_sl8_info,0(%ebp)
testl $3,%esi
jne .Lcmx
jmp *(%esi)
.Lcmx:
jmp _sl8_info
This is doing a binary search on the integer arguments. .Lcma
is branching on <4 then <6 then <7. The first comparsion goes to .Lcmq
which is branching on <2 then <3. The first comparison from that goes to .Lcmo
, which branches on <1.
With ghc -O2 -S
, instead we get this, where we can see the same pattern:
.text
.align 4,0x90
.long _F_zdwf_srt-(_F_zdwf_info)+0
.long 65540
.long 0
.long 33488911
.globl _F_zdwf_info
_F_zdwf_info:
.LcqO:
movl 0(%ebp),%eax
cmpl $4,%eax
jl .Lcr6
cmpl $6,%eax
jl .LcqY
cmpl $7,%eax
jl .LcqU
cmpl $7,%eax
jne .LcqS
movl $_F_f1_closure,%esi
addl $4,%ebp
andl $-4,%esi
jmp *(%esi)
.LcqS:
movl $_F_f9_closure,%esi
addl $4,%ebp
andl $-4,%esi
jmp *(%esi)
.LcqU:
cmpl $6,%eax
jne .LcqS
movl $_F_f2_closure,%esi
addl $4,%ebp
andl $-4,%esi
jmp *(%esi)
.LcqW:
cmpl $4,%eax
jne .LcqS
movl $_F_f4_closure,%esi
addl $4,%ebp
andl $-4,%esi
jmp *(%esi)
.LcqY:
cmpl $5,%eax
jl .LcqW
cmpl $5,%eax
jne .LcqS
movl $_F_f3_closure,%esi
addl $4,%ebp
andl $-4,%esi
jmp *(%esi)
.Lcr0:
cmpl $2,%eax
jne .LcqS
movl $_F_f6_closure,%esi
addl $4,%ebp
andl $-4,%esi
jmp *(%esi)
.Lcr2:
testl %eax,%eax
jne .LcqS
movl $_F_f8_closure,%esi
addl $4,%ebp
andl $-4,%esi
jmp *(%esi)
.Lcr4:
cmpl $1,%eax
jl .Lcr2
cmpl $1,%eax
jne .LcqS
movl $_F_f7_closure,%esi
addl $4,%ebp
andl $-4,%esi
jmp *(%esi)
.Lcr6:
cmpl $2,%eax
jl .Lcr4
cmpl $3,%eax
jl .Lcr0
cmpl $3,%eax
jne .LcqS
movl $_F_f5_closure,%esi
addl $4,%ebp
andl $-4,%esi
jmp *(%esi)
.section .data
.align 4
.align 1
_F_f_srt:
.long _F_zdwf_closure
.data
.align 4
.align 1
.globl _F_f_closure
_F_f_closure:
.long _F_f_info
.long 0
.text
.align 4,0x90
.long _F_f_srt-(_srh_info)+0
.long 0
.long 65568
_srh_info:
.Lcrv:
movl 3(%esi),%eax
movl %eax,0(%ebp)
jmp _F_zdwf_info
.text
.align 4,0x90
.long _F_f_srt-(_F_f_info)+0
.long 65541
.long 0
.long 65551
.globl _F_f_info
_F_f_info:
.Lcrz:
movl 0(%ebp),%esi
movl $_srh_info,0(%ebp)
testl $3,%esi
jne _srh_info
jmp *(%esi)
If we change the original code to
f :: Int -> String
f 1 = "Zero"
f 2 = "One"
f 3 = "Two"
f 4 = "Three"
f 5 = "Four"
f 6 = "Five"
f 7 = "Six"
f 8 = "Seven"
f _ = "Undefined"
The branches are on <5, <7, <8, with <5 going to <3, <4, etc., so it's probably doing this based on sorting the arguments. We can test that by scrambling the numbers, and even adding spacing between them:
f :: Int -> String
f 20 = "Zero"
f 80 = "One"
f 70 = "Two"
f 30 = "Three"
f 40 = "Four"
f 50 = "Five"
f 10 = "Six"
f 60 = "Seven"
f _ = "Undefined"
Sure enough, the branches are still on <50, <70, <80, with <50 going to <30, <40, etc.