simpler (?) (shorter for sure) algorithm:
mov ax,SEG str
mov ds,ax
mov es,ax
mov si,OFFSET str
mov di,si
mov bx,si
; ds:si = source pointer to read char by char
; es:di = destination pointer to write modified string
; bx = str pointer for compare during second phase
xor cx,cx ; cx = 0, counts spaces to copy
copyLoop:
lodsb ; al = ds:[si++]
cmp al,'#'
je removeTrailingSpaces
cmp al,' '
jne notSpace
jcxz copyLoop ; no more spaces allowed to copy, skip
; copy the space
dec cx ; --allowed
stosb ; es:[di++] = al
jmp copyLoop
notSpace:
mov cx,1 ; one space can be copied next time
stosb ; copy the not-space char
jmp copyLoop
removeTrailingSpaces:
cmp di,bx
je emptyStringResult
dec di
cmp BYTE PTR [di],' '
je removeTrailingSpaces
inc di ; not-space found, write '#' after it
emptyStringResult:
stosb ; write the '#' at final position
mov ax,4Ch ; exit
int 21h
How it works:
Just copies almost everything from ds:[si]
to es:[di]
, count-downs spaces and skips them when counter is zero. Non-space character resets counter to 1 (so next space after word will be copied).
When '#'
is found, it scans end of string for trailing spaces, and writes terminating '#' after last not-space character (or when string is empty).
Talking in comments about how I built this algorithm and how it's not possible to decide whether current word is last - gave me another idea, how to deal with end of string. To cache last known word end position, so after reaching end of source string I can use the cached pointer to directly set the terminator at correct place. Variant 2:
; initial code is identical, only function of bx is different, so updated comment:
...
; bx = str pointer pointing +1 beyond last non-space character
; (for empty input string that means OFFSET str to produce "#" result)
...
copyLoop:
lodsb ; al = ds:[si++]
cmp al,'#'
je setTerminatorAndExit
cmp al,' '
jne notSpace
jcxz copyLoop ; no more spaces allowed to copy, skip
; copy the space
dec cx ; --allowed
stosb ; es:[di++] = al
jmp copyLoop
notSpace:
mov cx,1 ; one space can be copied next time
stosb ; copy the not-space char
mov bx,di ; update bx to point +1 beyond last non-space char
jmp copyLoop
setTerminatorAndExit:
mov [bx],al ; write the '#' to cached position of last non-space+1
mov ax,4Ch ; exit
int 21h