I have these two source files:
const ARR_LEN: usize = 128 * 1024;
pub fn plain_mod_test(x: &[u64; ARR_LEN], m: u64, result: &mut [u64; ARR_LEN]) {
for i in 0..ARR_LEN {
result[i] = x[i] % m;
}
}
and
#include <stdint.h>
#define ARR_LEN (128 * 1024)
void plain_mod_test(uint64_t *x, uint64_t m, uint64_t *result) {
for (int i = 0; i < ARR_LEN; ++ i) {
result[i] = x[i] % m;
}
}
Is my C code a good approximation to the Rust code?
When I compile the C code on godbolt.org x86_64 gcc12.2 -O3
I get the sensible:
plain_mod_test:
mov r8, rdx
xor ecx, ecx
.L2:
mov rax, QWORD PTR [rdi+rcx]
xor edx, edx
div rsi
mov QWORD PTR [r8+rcx], rdx
add rcx, 8
cmp rcx, 1048576
jne .L2
ret
But when I do the same for rustc 1.66 -C opt-level=3
I get this verbose output:
example::plain_mod_test:
push rax
test rsi, rsi
je .LBB0_10
mov r8, rdx
xor ecx, ecx
jmp .LBB0_2
.LBB0_7:
xor edx, edx
div rsi
mov qword ptr [r8 + 8*rcx + 8], rdx
mov rcx, r9
cmp r9, 131072
je .LBB0_9
.LBB0_2:
mov rax, qword ptr [rdi + 8*rcx]
mov rdx, rax
or rdx, rsi
shr rdx, 32
je .LBB0_3
xor edx, edx
div rsi
jmp .LBB0_5
.LBB0_3:
xor edx, edx
div esi
.LBB0_5:
mov qword ptr [r8 + 8*rcx], rdx
mov rax, qword ptr [rdi + 8*rcx + 8]
lea r9, [rcx + 2]
mov rdx, rax
or rdx, rsi
shr rdx, 32
jne .LBB0_7
xor edx, edx
div esi
mov qword ptr [r8 + 8*rcx + 8], rdx
mov rcx, r9
cmp r9, 131072
jne .LBB0_2
.LBB0_9:
pop rax
ret
.LBB0_10:
lea rdi, [rip + str.0]
lea rdx, [rip + .L__unnamed_1]
mov esi, 57
call qword ptr [rip + core::panicking::panic@GOTPCREL]
ud2
How do I write Rust code which compiles to assembly similar to that produced by gcc for C?
Update: When I compile the C code with clang 12.0.0 -O3
I get output which looks far more like the Rust assembly than the GCC/C assembly.
i.e. This looks like a GCC vs clang issue, rather than a C vs Rust difference.
plain_mod_test: # @plain_mod_test
mov r8, rdx
xor ecx, ecx
jmp .LBB0_1
.LBB0_6: # in Loop: Header=BB0_1 Depth=1
xor edx, edx
div rsi
mov qword ptr [r8 + 8*rcx + 8], rdx
add rcx, 2
cmp rcx, 131072
je .LBB0_8
.LBB0_1: # =>This Inner Loop Header: Depth=1
mov rax, qword ptr [rdi + 8*rcx]
mov rdx, rax
or rdx, rsi
shr rdx, 32
je .LBB0_2
xor edx, edx
div rsi
jmp .LBB0_4
.LBB0_2: # in Loop: Header=BB0_1 Depth=1
xor edx, edx
div esi
.LBB0_4: # in Loop: Header=BB0_1 Depth=1
mov qword ptr [r8 + 8*rcx], rdx
mov rax, qword ptr [rdi + 8*rcx + 8]
mov rdx, rax
or rdx, rsi
shr rdx, 32
jne .LBB0_6
xor edx, edx
div esi
mov qword ptr [r8 + 8*rcx + 8], rdx
add rcx, 2
cmp rcx, 131072
jne .LBB0_1
.LBB0_8:
ret