I have some assembly code that makes the screen fade out on my MS-DOS game. It works, but it loops continuously

Question

I am making a game with C and assembly for MS-DOS (I am planning to distribute the game with DOSBox or my own modification thereof). I large portions of the graphics code are in assembly, which is a language I am weaker in, but I saw it as something that was convenient to use due to its ability to change things to such a nitty-gritty level (down to things like the graphical palette and such).

However, I am basing my code off of a fade out function that I found from some old source code that was in 16 bit assembly, and I am trying to convert it to 32 bit assembly.

I got the code to work, but however, it keeps looping repeatedly instead of continuing the code.

AFObeg  equ [bp+4]
AFOcnt  equ [bp+4+2]

proc FadeOut_ near
  push  ebx
  push  ecx
  push  edx
  mov   ebp,esp
  push  ds
  push  esi
  push  edi

  push  ds           ; get data segment into es
  pop  es
  mov  edx, offset _WorkPalette
  push  edx          ; save offset of opal
  xor  ebx, ebx
  mov  ecx, 100h
  mov  eax, 1017h    ; bios read dac registers function
  int  10h           ; read the palette registers into opal
  pop  edi           ; offset of opal, was in dx!
  mov  eax, AFObeg   ; get offset of first palette byte to
  mov  ebx, 3        ; be processed
  mul  ebx
  add  edi, eax      ; adjust offset into opal
  mov  eax, AFOcnt   ; find the number of bytes to be processed
  mov  ebx, 3
  mul  ebx           ; leave it in ax
  mov  ecx, 64       ; 64 passes through fade loop
o_fade_loop:
  push  ecx          ; save the fade loop counter
  push  edi          ; save offset of first byte processed in
  mov  bl, cl        ; we'll use the pass number as a threshold
  mov  ecx, eax      ; load number of bytes to process into cx
o_pal_cmp_loop:
  cmp  bl, es:[edi]  ; start decrementing when palette value
  jnz  o_no_dec      ; is equal loop count (it will stay equal
  dec  BYTE PTR es:[edi]  ; to loop count for the rest of this pass)
o_no_dec:
  inc  edi
  loop o_pal_cmp_loop       ; do the next byte

  mov  ebx, esp      ; need the stack pointer for a moment
  mov  di, ss:[ebx]  ; restore offset into pal without popping
  mov  ecx, AFOcnt   ; number of triplets to process
  push  eax          ; need to use ax for port i/o

  mov  edx, 03DAh    ; CRT controller input status 1 register
o_vbi_1:
  in  al, dx         ; watch vertical blanking bit
  test  al,08h       ; wait for it to clear to make sure
  jnz o_vbi_1        ; we're not in a blanking interval
o_vbi_2:
  in  al, dx         ; now wait for the start of the
  test  al,08h       ; next blanking interval
  jz  o_vbi_2

  mov  ah, AFObeg    ; get first register to process into ah
  mov  dx, 03c8h     ; DAC palette index register
o_pal_load_loop:
  mov  al, ah        ; get next palette number to write
  out  dx, al        ; write the register number to the dac
  inc  dx            ; address dac data register
  mov  al, BYTE PTR es:[di] ; get first byte of triplet
  out  dx, al        ; write it to the dac data register
  inc  edi           ; point to second byte
  mov  al, BYTE PTR es:[di] ; get second byte of triplet
  out  dx, al        ; write it to the dac data register
  inc  edi           ; point to third byte
  mov  al, BYTE PTR es:[di] ; get third byte of triplet
  out  dx, al        ; write it to the dac data register
  inc  edi           ; point to first byte of next triplet
  dec  edx           ; address the da21c index register
  inc  ah            ; point to next palette register
  loop o_pal_load_loop      ; process next triplet

  pop  eax           ; restore ax
  pop  edi           ; restore the offset into pal
  pop  ecx           ; restore the fade loop counter
  loop o_fade_loop   ; do the next pass through the fade loop

  pop  edi
  pop  esi
  pop  ds
  pop  ebp
  ret
endp ;end of the fade out function

If anyone has any more questions, I will gladly answer them.

Edit: for those of you wondering what the original code looked like, here you go:

AFObeg  equ [bp+ABASE]
AFOcnt  equ [bp+ABASE+2]

PBEGIN _FadeOut
    push    bp
    mov bp,sp
    push    ds
    push    si
    push    di

    push    ds        ; get data segment into es
    pop es
    mov dx, offset DGROUP:_WorkPalette
    push    dx        ; save offset of opal
    xor bx, bx
    mov cx, 100h
    mov ax, 1017h     ; bios read dac registers function
    int 10h       ; read the palette registers into opal
    pop di        ; offset of opal, was in dx!
    mov ax, AFObeg    ; get offset of first palette byte to
    mov bx, 3         ; be processed
    mul bx
    add di, ax        ; adjust offset into opal
    mov ax, AFOcnt    ; find the number of bytes to be processed
    mov bx, 3
    mul bx        ; leave it in ax
    mov cx, 64        ; 64 passes through fade loop
o_fade_loop:
    push    cx        ; save the fade loop counter
    push    di        ; save offset of first byte processed in
    mov bl, cl        ; we'll use the pass number as a threshold
    mov cx, ax        ; load number of bytes to process into cx
o_pal_cmp_loop:
    cmp bl, es:[di]   ; start decrementing when palette value
    jnz o_no_dec      ; is equal loop count (it will stay equal
    dec BYTE PTR es:[di]  ; to loop count for the rest of this pass)
o_no_dec:
    inc di
    loop    o_pal_cmp_loop      ; do the next byte

    mov bx, sp          ; need the stack pointer for a moment
    mov di, ss:[bx]     ; restore offset into pal without popping
    mov cx, AFOcnt      ; number of triplets to process
    push    ax          ; need to use ax for port i/o

        mov dx, 03DAh       ; CRT controller input status 1 register
o_vbi_1:
        in al, dx           ; watch vertical blanking bit
        test al,08h         ; wait for it to clear to make sure
        jnz o_vbi_1         ; we're not in a blanking interval
o_vbi_2:
        in al, dx           ; now wait for the start of the
        test al,08h         ; next blanking interval
        jz o_vbi_2

    mov ah, BYTE PTR AFObeg ; get first register to process into ah
    mov dx, 03c8h       ; DAC palette index register
o_pal_load_loop:
    mov al, ah          ; get next palette number to write
    out dx, al          ; write the register number to the dac
    inc dx          ; address dac data register
    mov al, BYTE PTR es:[di] ; get first byte of triplet
    out dx, al           ; write it to the dac data register
    inc di           ; point to second byte
    mov al, BYTE PTR es:[di] ; get second byte of triplet
    out dx, al           ; write it to the dac data register
    inc di           ; point to third byte
    mov al, BYTE PTR es:[di] ; get third byte of triplet
    out dx, al           ; write it to the dac data register
    inc di           ; point to first byte of next triplet
    dec dx           ; address the dac index register
    inc ah           ; point to next palette register
    loop    o_pal_load_loop      ; process next triplet

    pop ax        ; restore ax
    pop di        ; restore the offset into pal
    pop cx        ; restore the fade loop counter
    loop    o_fade_loop   ; do the next pass through the fade loop



    pop di
    pop si
    pop ds
    pop bp
    ret
_FadeOut endp

And here is the macro ET.MAC file where things like ABASE are defined

; MACRO FILE FOR EGA320 LIBRARY


IFDEF   _ML
    %OUT    LARGE MODEL

PBEGIN  MACRO   L
    PUBLIC  L
L   PROC    FAR
    ENDM

PEXTRN  MACRO   L
    EXTRN   L:FAR
    ENDM

ABASE   EQU 6       ;BASE STACK PTR UPON PROCEDURE ENTRY

ENDIF

IFDEF   _MC

PBEGIN  MACRO   L
    PUBLIC  L
L   PROC    NEAR
    ENDM

PEXTRN  MACRO   L
    EXTRN   L:NEAR
    ENDM

ABASE   EQU 4       ;BASE STACK PTR UPON PROCEDURE ENTRY

    %OUT    COMPACT MODEL

ENDIF


XMOV    MACRO A,B
    PUSH    B       ;XFER SOURCE
    POP A       ; TO DEST.
    ENDM


GEN MACRO OP,ARG
    IRP X,<ARG>
    OP  X
    ENDM
    ENDM


XSTM    MACRO S,O,D
    MOV WORD PTR D,O
    MOV WORD PTR D+2,S
    ENDM

But to avoid using the file, I have switched over to just changing the value of ABASE to 4 or 6, but they both still give a relatively similar result.

Definitions `AFObeg` and `AFOcnt` inherited from 16bit code look suspicious to me. Is the `AFObeg` provided in `ECX` and `AFOcnt` in the upper half of `ECX` combined with `BX` in your calling convention? — vitsoft, Jun 22 '21 at 07:25
Where those 3 initial `push` instructions there in the original code? Because it seems to me like your first arg would be at `[ebp + 16]` (note: `ebp`, not `bp`). But why not make it easy for yourself and use the named argument syntax where the assembler is responsible for figuring out the offsets? (i.e. something like `FadeOut PROC NEAR first:DWORD, cnt:DWORD`). — Michael, Jun 22 '21 at 08:01
The original code was compiled in Borland, and I am trying to compile it with 32 bit watcom. I added the 3 push things to the top. — Ryanwuzhere, Jun 22 '21 at 20:38
`AFObeg equ [bp+4]`: Can you really define an equ like that? I thought they had to evaluate to numbers. — Nate Eldredge, Jun 22 '21 at 22:14
@NateEldredge thats the way that compiles the best, anyways. It seems to work well. — Ryanwuzhere, Jun 22 '21 at 23:37

score 1 · Answer 1 · answered Jun 22 '21 at 12:49

1

For the fade loop, you load the count with mov ecx, eax. Looking back at where eax got its value, we find the mov eax, AFOcnt instruction. However, it seems that AFOcnt is only 16 bits, so there will be garbage values in the upper half of eax which will cause your loop to run for a long time.

Use movzx eax,word ptr AFOcnt to zero out the upper half of eax (or you can mask it with an and instruction).

A few other notes (not comprehensive):

you save ds, but modify es
this simple fade code will corrupt the colors as it fades. With a bit of effort, the fade can be redone to preserve the relative values of each red-green-blue component so the colors truly fade to black.
You use a mix of 16 bit ([di], function parameters) and 32 bit ([edi]) addressing. This should use 32 bit addressing exclusively.

answered Jun 22 '21 at 12:49

1201ProgramAlarm

32,384
7
42
56

`movzx eax,word ptr AFOcnt` leaves me with an illegal memory reference. However, if I just do `movzx eax, AFOcnt`, it compiles just fine. Would that produce the same result? – Ryanwuzhere Jun 22 '21 at 20:53
@Ryanwuzhere: With your text substitution `AFOcnt equ [bp+4+2]`, that would leave you without anything implying a source operand-size for movzx, so it would be ambiguous between byte and word. Better figure out what syntax will keep your assembler happy to make that explicit. (Try it without the equ macro substitution, or try disassembling to see what source size it picked and how it describes it.) – Peter Cordes Jun 23 '21 at 00:32
@PeterCordes, I am quite sure I do not have the ability to do that efficiently, as Watcom does not have a disassembler that supports Borland-compiled executables. – Ryanwuzhere Jun 23 '21 at 00:51
@Ryanwuzhere: You can use the disassembly features of a debugger, if you don't have a separate disassembler. Perhaps set a breakpoint in the code that contains this, if it needs to be in the right mode to disassemble a program that enters protected mode after starting from DOS. (You do have a debugger, right? That's an essential tool for learning asm and messing around with stuff; so much so debugging without a debugger is very inefficient with asm.) – Peter Cordes Jun 23 '21 at 00:56
@PeterCordes I can not find where Watcom has a debugger either. – Ryanwuzhere Jun 23 '21 at 01:13
@Ryanwuzhere: Ok, then that's your new first problem to solve. I don't have a recommendation since I don't use obsolete 16-bit stuff or DOS, but certainly there must have been tools that people used at the time, and/or more modern tools. – Peter Cordes Jun 23 '21 at 01:24
@PeterCordes noted. Do you have any recommendations for my code (besides using newer stuff)? – Ryanwuzhere Jun 23 '21 at 01:38
1

@Ryanwuzhere: Since you require a 386 anyway, definitely don't use `mul` to multiply by 3. (In fact don't do that if you care about speed even on older CPUs). Use `lea ax, [eax + eax*2]` to multiply AX by 3. (Or if you're in 32-bit mode, use EAX as the destination; prefer 32-bit op size.) Also, on 186 or better you could have done `imul ax, AFObeg, 3`, without destroying DX for no reason or spending time writing it. Also, [avoid the `loop` instruction it's slow](https://stackoverflow.com/questions/35742570/why-is-the-loop-instruction-slow-couldnt-intel-have-implemented-it-efficiently). – Peter Cordes Jun 23 '21 at 01:49
@PeterCordes I got rid of the loops... I am still trying to figure out where to use the `lea` command. – Ryanwuzhere Jun 23 '21 at 04:34

I have some assembly code that makes the screen fade out on my MS-DOS game. It works, but it loops continuously

1 Answers1