The following simple C code to exhibit the different behavior w.r.t. the toolchain used on macOS 11.6.1:
#include "assert.h"
#include "stdio.h"
int main()
{
double y[2] = {-0.01,0.9};
double r;
r = y[0]+0.03*y[1];
printf("r = %24.26e\n",r);
assert(r == 0.017);
}
The results with the default toolchain is
$ clang -v
Apple clang version 13.0.0 (clang-1300.0.29.30)
Target: arm64-apple-darwin20.6.0
Thread model: posix
$ clang -arch arm64 test.c -o testxcode; ./testxcode
r = 1.70000000000000012212453271e-02
while the result with conda 23.1.0 and cxx-compiler package (version given below)
$ conda list | grep cxx
cxx-compiler 1.5.2 hffc8910_0 conda-forge
libcxx 16.0.3 h4653b0c_0 conda-forge$ clang -arch
$ clang -v
clang version 14.0.6
Target: arm64-apple-darwin20.6.0
Thread model: posix
InstalledDir: /Users/mottelet/mambaforge/envs/scilab_build/bin
$ clang test.c -o testconda; ./testconda
r = 1.69999999999999977517983751e-02
Assertion failed: (r == 0.017), function main, file test.c, line 9.
zsh: abort ./testconda
In order to analyse this I have compiled and disassembled the simpler code
int main()
{
double y[2] = {-0.01,0.9};
double r = y[0]+0.03*y[1];
}
asm code with default macOS/Xcode tool chain (asmxcode in diff below):
_main:
sub sp, sp, #0x40
stp x29, x30, [sp, #0x30]
add x29, sp, #0x30
adrp x8, 1 ; 0x100004000
ldr x8, [x8] ; literal pool symbol address: ___stack_chk_guard
ldr x8, [x8]
stur x8, [x29, #-0x8]
adrp x8, 0 ; 0x100003000
add x8, x8, #0xfa0
ldr q0, [x8]
str q0, [sp, #0x10]
ldr d0, [sp, #0x10]
ldr d2, [sp, #0x18]
adrp x8, 0 ; 0x100003000
ldr d1, [x8, #0xf90]
fmul d1, d1, d2
fadd d0, d0, d1
str d0, [sp, #0x8]
adrp x8, 1 ; 0x100004000
ldr x8, [x8] ; literal pool symbol address: ___stack_chk_guard
ldr x8, [x8]
ldur x9, [x29, #-0x8]
subs x8, x8, x9
b.ne 0x100003f5c
mov w0, #0x0
ldp x29, x30, [sp, #0x30]
add sp, sp, #0x40
ret
bl 0x100003f60 ; symbol stub for: ___stack_chk_fail
asm code with conda tool chain (asmconda file in diff below):
_main:
sub sp, sp, #0x40
stp x29, x30, [sp, #0x30]
add x29, sp, #0x30
adrp x8, 1 ; 0x100004000
ldr x8, [x8] ; literal pool symbol address: ___stack_chk_guard
ldr x8, [x8]
stur x8, [x29, #-0x8]
adrp x8, 0 ; 0x100003000
add x8, x8, #0xfa0
ldr q0, [x8]
str q0, [sp, #0x10]
ldr d2, [sp, #0x10]
ldr d1, [sp, #0x18]
adrp x8, 0 ; 0x100003000
ldr d0, [x8, #0xf90]
fmadd d0, d0, d1, d2
str d0, [sp, #0x8]
ldur x9, [x29, #-0x8]
adrp x8, 1 ; 0x100004000
ldr x8, [x8] ; literal pool symbol address: ___stack_chk_guard
ldr x8, [x8]
subs x8, x8, x9
b.eq 0x100003f50
b 0x100003f4c
bl 0x100003f60 ; symbol stub for: ___stack_chk_fail
mov w0, #0x0
ldp x29, x30, [sp, #0x30]
add sp, sp, #0x40
ret
Here is the diff
mottelet@portmottelet-cr-1 unit_tests % diff -Naur asmconda asmxcode
--- asmconda 2023-06-05 19:58:14.000000000 +0200
+++ asmxcode 2023-06-05 19:58:20.000000000 +0200
@@ -10,21 +10,21 @@
add x8, x8, #0xfa0
ldr q0, [x8]
str q0, [sp, #0x10]
- ldr d2, [sp, #0x10]
- ldr d1, [sp, #0x18]
+ ldr d0, [sp, #0x10]
+ ldr d2, [sp, #0x18]
adrp x8, 0 ; 0x100003000
- ldr d0, [x8, #0xf90]
- fmadd d0, d0, d1, d2
+ ldr d1, [x8, #0xf90]
+ fmul d1, d1, d2
+ fadd d0, d0, d1
str d0, [sp, #0x8]
- ldur x9, [x29, #-0x8]
adrp x8, 1 ; 0x100004000
ldr x8, [x8] ; literal pool symbol address: ___stack_chk_guard
ldr x8, [x8]
+ ldur x9, [x29, #-0x8]
subs x8, x8, x9
- b.eq 0x100003f50
- b 0x100003f4c
- bl 0x100003f60 ; symbol stub for: ___stack_chk_fail
+ b.ne 0x100003f5c
mov w0, #0x0
ldp x29, x30, [sp, #0x30]
add sp, sp, #0x40
ret
+ bl 0x100003f60 ; symbol stub for: ___stack_chk_fail
Question:
How can I obtain the default toolchain asm code with conda toolchain (compilation flags to add/remove) ?