You were right, the 2nd string nul check falls out of the compare operation, slick!
Consider the following:
Fewer memory references the better:
Your code has 3 data memory references in the loop and 2 on exit (3N+2).
This code has 2 data memory references in the loop and 0 on exit (2N).
Fewer instruction bytes the better:
Your code has 37 bytes of loop code and 14 bytes of exit code.
This code has 20 bytes of loop code and 3 bytes of exit code.
One trick is to use the return value register as one of the working registers. Another trick is to move the operands into registers and then do operations in the registers.
Compilations were done at
https://defuse.ca/online-x86-assembler.htm#disassembly
Code was not tested.
ft_strcmp:
movzx cl,[rsi] #[2] get byte(1) from 1st string
movzx al,[rdi] #[2] get byte(2) from 2nd string
test cl,cl #[2] end of first string, is it nul?
je exit #[2]
cmp cl,al #[2] compare byte(1) w/ byte(2)
jne exit #[2] differ?
inc rsi #[3] point to next byte
inc rdi #[3] ditto
jmp ft_strcmp #[2] test them
#[20] bytes of instructions in loop
exit:
sub al, cl #[2] generate return value
# if neither null; return difference of values
# if byte(1) is null and byte(2) is null; ret 0
# if byte(1) is null and byte(2) is not null; return positive
# if byte(1) is not null and byte(2) is null; return negative
ret #[1]
#[3] bytes of instructions in exit
ft_strcmp:
cmp byte [rsi], 0 #[7]
je exit #[2]
mov cl, byte [rsi] #[6]
cmp byte [rdi], cl #[6]
jne exit #[2]
inc rsi #[6]
inc rdi #[6]
jmp ft_strcmp #[2]
#[37] bytes of loop code
exit:
xor rax, rax #[2]
mov al, [rdi] #[5]
sub al, byte [rsi] #[6]
ret #[1]
#[14] bytes of exit code