0

I am trying to implement a function in x86 nasm assembler which removes every n-th character from string. However, I am experiencing quite unexpected behaviour and don't really understand why it doesn't work.

// main.c

#include <stdio.h>

char *rem(char *s, int n);

int main(int argc, char *argv[])
{
    char s[] = "abcabcabc";
    int n = 3;
    printf("%s\n", rem(s, n));
}
; rem.s

                section .text
        global  rem
rem:
        push    ebp
        mov     ebp, esp
        push    esi
        push    edi
        push    ebx

        mov     eax, [ebp+8] ; read pointer
        mov     ecx, [ebp+8] ; write pointer
        mov     edx, [ebp+12] ; n
        mov     esi, 1 ; counter

loop:
        cmp     BYTE [eax], 0
        jz      fin

        cmp     edx, esi
        jz      remove

dont_remove:
        ; move current character to position
        ; pointed by the write pointer
        mov     edi, [eax]
        mov     [ecx], edi
        inc     ecx ; increase write pointer
        inc     eax ; increase read pointer
        inc     esi ; counter++
        jmp     loop

remove:
        mov     esi, 1 ; reset the counter
        inc     eax ; increase only read pointer
        jmp     loop

fin:
        mov     edi, [eax]
        mov     [ecx], edi

        mov     eax, [ebp+8]
        pop     ebx
        pop     edi
        pop     esi
        pop     ebp
        ret
# Makefile

EXEFILE = main
OBJECTS = main.o rem.o
CCFMT = -m32
NASMFMT = -f elf32
CCOPT = -m32 -c
NASMOPT = -w+all

.c.o:
    cc -g $(CCFMT) $(CCOPT) $<

.s.o:
    nasm $(NASMFMT) $(NASMOPT) $<

$(EXEFILE): $(OBJECTS)
    cc $(CCFMT) -o $(EXEFILE) $(OBJECTS)
    
clean:
    rm *.o $(EXEFILE)

After running the code with command make && ./main I expected to see ababab (so that it removes all of the c's from "abcabcabc", in other way, removes every 3rd character). However it returns abacb. What is causing this issue? Thanks in advance for help

szachneq
  • 3
  • 2
  • Recall that strings are arrays of bytes, not of dwords. Your code uses dword-sized operations to access the arrays, try to change it to use byte-sized operations. – fuz Dec 03 '22 at 10:32
  • Which operations do you refer to exactly? Does it only affect the `move` operations or are others also affected? – szachneq Dec 03 '22 at 10:38
  • 1
    Every operation with a memory operand pointing into the string. For your case, these all seem to be move instructions. Try e.g. `movzx ebx, byte [eax]; mov [ecx], bl` to copy one byte from one location to the other. – fuz Dec 03 '22 at 10:45
  • In `mov edi, [eax]`, the dword register (EDI) implies 32-bit operand-size. `mov edi, dword [eax]`. – Peter Cordes Dec 03 '22 at 11:23
  • If you run this in GDB, `p /x $eax` after it loads the stack arg, then copy/paste that address into `display (char*)0xffffd5d2`. Single-step instructions with `stepi`. You can then see the string changing as your code copies 4-byte chunks around. e.g. first to `ababcaabc` (after skipping the first `c`, still hasn't copied the 0-terminator any earlier yet), then to `abacaabbc` the next iteration since that dword copy went out past the read position. – Peter Cordes Dec 03 '22 at 11:46
  • Note that the first 2 copies have read_pos = write_pos, so you might as well start from `read_pos += n`, `write_pos += n-1`, except then you'd have to scan the first `n` bytes of the string for terminating zeros. – Peter Cordes Dec 03 '22 at 11:46

0 Answers0