0

I'm trying to write an assembly program that compares two strings and outputs whether they're equal or not, I've tried to do this by increasing the index register one by one and comparing the characters.

But there seems to be a mistake in my code since I was expecting the output Equal

But the actual output was:

Equal
NotEqual

Code:

%include "asm_io.inc"
segment .data
str1: db "ThisIsSomeString", 0
str2: db "ThisIsSomeString", 0
msg_eq: db "Equal", 10, 0
msg_neq: db "NotEqual", 10, 0

segment .text
        global  asm_main
asm_main:
    mov esi, str1
    mov edi, str2
    xor edx, edx              ; to clear edx for index addressing
loop:
    mov al, [esi + edx]
    mov bl, [edi + edx]
    inc edx
    cmp al, bl
    jne not_equal
    cmp al, 0                 ; check if we're at the end of string
        je equal
    jmp loop
not_equal:
    mov eax, 4                ; system call number (sys_write = 4)
    mov ebx, 1                ; stdout = 1
    mov ecx, msg_neq          ; message to print
    int 0x80                  ; issue a system call
    jmp exit
equal:
    mov eax, 4
    mov ebx, 1
    mov ecx, msg_eq
    int 0x80
    jmp exit
exit:
    mov eax, 1                ; system call number (sys_exit = 1)
    mov ebx, 0                ; exit code
    int 0x80
Some programmer dude
  • 400,186
  • 35
  • 402
  • 621
FoxyZ
  • 148
  • 1
  • 12
  • How many characters are you telling `sys_write` to write? It doesn't care about nul-terminated strings -- it needs to know how many characters to output in `edx` – David C. Rankin Dec 24 '20 at 10:09

2 Answers2

2

You are not providing a length to sys_write. It takes the number of bytes to write in edx. It does not care that the string you are attempting to print is a nul-terminated string. You can solve the problem by saving the length of the messages you wish to output, e.g.

segment .data
str1: db "ThisIsSomeString", 0
str2: db "ThisIsSomeString", 0
msg_eq: db "Equal", 10, 0
len_eq   equ $ - msg_eq
msg_neq: db "NotEqual", 10, 0
len_neq  equ $ - msg_neq

In nasm the $ is the present stack address immediately before the current statement. So simply using $ - string_before where you declare your strings, you are able to save the length for later use with sys_write, e.g.

segment .data
str1: db "ThisIsSomeString", 0
str2: db "ThisIsSomeString", 0
msg_eq: db "Equal", 10, 0
len_eq   equ $ - msg_eq
msg_neq: db "NotEqual", 10, 0
len_neq  equ $ - msg_neq

segment .text
        global  _start
_start:

    mov esi, str1
    mov edi, str2
    xor edx, edx              ; to clear edx for index addressing
loop:
    mov al, [esi + edx]
    mov bl, [edi + edx]
    inc edx
    cmp al, bl
    jne not_equal
    cmp al, 0                 ; check if we're at the end of string
        je equal
    jmp loop
not_equal:
    mov eax, 4                ; system call number (sys_write = 4)
    mov ebx, 1                ; stdout = 1
    mov ecx, msg_neq          ; message to print
    mov edx, len_neq          ; length in edx
    int 0x80                  ; issue a system call
    jmp exit
equal:
    mov eax, 4
    mov ebx, 1
    mov ecx, msg_eq
    mov edx, len_eq           ; length in edx
    int 0x80
    jmp exit
exit:
    mov eax, 1                ; system call number (sys_exit = 1)
    mov ebx, 0                ; exit code
    int 0x80

(note: your %include "asm_io.inc" statement is not needed)

Also note, I have replaced your asm_main with _start to compile and run on my box, just change it back as needed.

Example Output

$ ./bin/strcmp32
Equal
David C. Rankin
  • 81,885
  • 6
  • 58
  • 85
  • Oops, I am clearly out of practice with writing answers! ;-) Good note about using `$` to retrieve the relative location! I'm going to leave my answer up, too, since I think it provides some useful suggestions on how to restructure the code to reduce the number of branches. – Cody Gray - on strike Dec 24 '20 at 10:27
  • I'm usually the slow one `:)` I'll probably learn something from your answer I didn't know to begin with -- all information is good information (except in politics...) If I can just save the length of a constant string with `$ - string_before` -- it's a lot shorter than writing a loop to search for the *nul-character* ending each string (but that will be needed when dealing with strings of unknown length like input, etc...) – David C. Rankin Dec 24 '20 at 10:29
  • If you include a zero byte in the string you should use eg `len_eq equ $ - msg_eq - 1` so as not to display the zero byte. – ecm Dec 24 '20 at 12:37
2

As David C. Rankin said, the sys_write system call doesn't work like puts in C—it doesn't write out a NUL-terminated string. Rather, it requires that you tell it explicitly how many characters to write. It accepts this argument in the EDX register. When you invoke sys_write at equal, EDX is left equal to the number of characters in the string being checked. Since the test string is 16 characters in length, and you've told sys_write to start printing characters at address msg_eq, it prints the following characters:

E
q
u
a
l
10
0
N
o
t
E
q
u
a
l
10

Which is exactly what you see! The key takeaway here is that the assembler is putting the msg_neq immediately after msg_eq, so when sys_write runs off the end of msq_eq, it continues into msg_neq.

I would recommend modifying your code as follows:

%include "asm_io.inc"
segment .data
str1:    db "ThisIsSomeString", 0
str2:    db "ThisIsSomeString", 0
msg_eq:  db "Equal", 10, 0
msg_neq: db "NotEqual", 10, 0

segment .text
global  asm_main
asm_main:
    mov  esi, str1
    mov  edi, str2
    xor  edx, edx              ; to clear edx for index addressing
    ; Set up output assuming the strings are not equal:
    mov  ecx, msg_neq          ; message to print
    mov  edx, 9                ; number of characters to print (length of string)
loop:
    mov  al, [esi + edx]
    mov  bl, [edi + edx]
    inc  edx
    cmp  al, bl
    jne  print                 ; if string is not equal, skip straight to print
    test al, al                ; check if we're at the end of string
    jne  loop
    ; The strings were actually equal, so change output setup:
    mov  ecx, msg_eq           ; message to print
    mov  edx, 6                ; number of characters to print (length of string)
print:
    mov  eax, 4                ; system call number (sys_write = 4)
    mov  ebx, 1                ; stdout = 1
    int  0x80                  ; issue a system call
exit:
    mov  eax, 1                ; system call number (sys_exit = 1)
    xor  ebx, ebx              ; exit code
    int  0x80

In addition to fixing this bug by setting explicit lengths in EDX, I've also rearranged your code to reduce the number of branches. This should make it slightly more efficient, but, more importantly, it makes it more readable. Note also that I've changed cmp reg, 0 to test reg, reg, which is almost always the preferable form. Similarly for using xor reg, reg over mov reg, 0 for zeroing a register.

Try it online!

Cody Gray - on strike
  • 239,200
  • 50
  • 490
  • 574