The Darwin arm64 ABI passes all varags arguments on the stack, each padded to the next multiple of 8 bytes. (Types that don't fit into 8 bytes have a pointer passed instead. Regular arguments that don't fit into x0-x7/q0-q7 come before varargs on the stack, naturally aligned.)
Here's a simple example:
.globl _main
.align 2
_main:
stp x29, x30, [sp, -0x10]!
sub sp, sp, 0x10
mov x8, 66
str x8, [sp]
adr x0, Lstr
bl _printf
mov w0, 0
add sp, sp, 0x10
ldp x29, x30, [sp], 0x10
ret
Lstr:
.asciz "test: %x\n"
Note that this is different from non-varargs arguments to unprototyped functions that are passed on the stack, which are only padded up to 4 bytes (sizeof(int)
). The following code:
#include <stdio.h>
#include <stdint.h>
extern void func();
__asm__
(
"_func:\n"
" ret\n"
);
int main(void)
{
uint8_t a = 1,
b = 2,
c = 3;
printf("%hhx %hhx %hhx %hhx %hhx %hhx\n", a, b, c, a, b, c);
func(a, b, c, a, b, c, a, b, c, a, b, c);
return 0;
}
compiles down to this with -O2
:
;-- _main:
0x100003ee8 ff0301d1 sub sp, sp, 0x40
0x100003eec fd7b03a9 stp x29, x30, [sp, 0x30]
0x100003ef0 fdc30091 add x29, sp, 0x30
0x100003ef4 68008052 mov w8, 3
0x100003ef8 49008052 mov w9, 2
0x100003efc e92302a9 stp x9, x8, [sp, 0x20]
0x100003f00 2a008052 mov w10, 1
0x100003f04 e82b01a9 stp x8, x10, [sp, 0x10]
0x100003f08 ea2700a9 stp x10, x9, [sp]
0x100003f0c 20040010 adr x0, str._hhx__hhx__hhx__hhx__hhx__hhx_n
0x100003f10 1f2003d5 nop
0x100003f14 13000094 bl sym.imp.printf
0x100003f18 480080d2 mov x8, 2
0x100003f1c 6800c0f2 movk x8, 3, lsl 32
0x100003f20 690080d2 mov x9, 3
0x100003f24 2900c0f2 movk x9, 1, lsl 32
0x100003f28 e92300a9 stp x9, x8, [sp]
0x100003f2c 20008052 mov w0, 1
0x100003f30 41008052 mov w1, 2
0x100003f34 62008052 mov w2, 3
0x100003f38 23008052 mov w3, 1
0x100003f3c 44008052 mov w4, 2
0x100003f40 65008052 mov w5, 3
0x100003f44 26008052 mov w6, 1
0x100003f48 47008052 mov w7, 2
0x100003f4c e6ffff97 bl sym._func
0x100003f50 00008052 mov w0, 0
0x100003f54 fd7b43a9 ldp x29, x30, [sp, 0x30]
0x100003f58 ff030191 add sp, sp, 0x40
0x100003f5c c0035fd6 ret
Giving the function an actual prototype allows the removal of any padding (except the one that serves alignment purposes), like so (note the last argument being 8 bytes):
extern void func(uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint8_t,
uint8_t, uint8_t, uint8_t, uint8_t, uint8_t, uint64_t);
The code then compiles down to:
;-- _main:
0x100003ee4 ff4301d1 sub sp, sp, 0x50
0x100003ee8 f44f03a9 stp x20, x19, [sp, 0x30]
0x100003eec fd7b04a9 stp x29, x30, [sp, 0x40]
0x100003ef0 fd030191 add x29, sp, 0x40
0x100003ef4 73008052 mov w19, 3
0x100003ef8 54008052 mov w20, 2
0x100003efc f44f02a9 stp x20, x19, [sp, 0x20]
0x100003f00 28008052 mov w8, 1
0x100003f04 f32301a9 stp x19, x8, [sp, 0x10]
0x100003f08 e85300a9 stp x8, x20, [sp]
0x100003f0c 20040010 adr x0, str._hhx__hhx__hhx__hhx__hhx__hhx_n
0x100003f10 1f2003d5 nop
0x100003f14 13000094 bl sym.imp.printf
0x100003f18 68208052 mov w8, 0x103
0x100003f1c f30700f9 str x19, [sp, 8]
0x100003f20 f40b0039 strb w20, [sp, 2]
0x100003f24 e8030079 strh w8, [sp]
0x100003f28 20008052 mov w0, 1
0x100003f2c 41008052 mov w1, 2
0x100003f30 62008052 mov w2, 3
0x100003f34 23008052 mov w3, 1
0x100003f38 44008052 mov w4, 2
0x100003f3c 65008052 mov w5, 3
0x100003f40 26008052 mov w6, 1
0x100003f44 47008052 mov w7, 2
0x100003f48 e6ffff97 bl sym._func
0x100003f4c 00008052 mov w0, 0
0x100003f50 fd7b44a9 ldp x29, x30, [sp, 0x40]
0x100003f54 f44f43a9 ldp x20, x19, [sp, 0x30]
0x100003f58 ff430191 add sp, sp, 0x50
0x100003f5c c0035fd6 ret