0

So, I have a code, in which I'm calculating a/b-d*c. Separately, it works just fine, but if I'm trying to put it into my project code, I'm getting "f" or "0.0e..." after calculations.

.686 
.MMX
.model flat, stdcall 
option casemap:none 
include \masm32\include\windows.inc ; 
include \MASM32\INCLUDE\masm32.inc
include \masm32\macros\macros.asm
uselib user32, kernel32, masm32, fpu
.data
_a dd 10.0
_b dd 3.0
_c dd 2.0
_d dd 5.0
res REAL8 ?
buf1 db 255 dup(?) ; 
array1 DWORD 1, 5, 15, -15
array2 DWORD 2, 4, 16, 16
result DWORD 0, 0, 0, 0 ; xor result array
MaxResult DWORD 4 dup(?); max elements array
maxIndex DWORD ?
pxorStr db "PXOR: %d, %d, %d, %d", 0
maxStr db "Max values of arrays: %d, %d, %d, %d", 0 
calcStr db "Calculation result: %f", 0
tit1 db "Result of using the MMX", 0
.code
start:
    mov esi, offset array1
    mov edi, offset array2
    mov ebx, offset result
    mov ecx, 4
    pxor_loop:
        movq MM0, [esi]
        pxor MM0, [edi]
        movq [ebx], MM0
        add esi, 4
        add edi, 4
        add ebx, 4
        loop pxor_loop
    ; Check third element and determine max elements if it <15
    mov eax, DWORD PTR [result+8]
    cmp eax, 15
    jle MaxElements
    mov eax, DWORD PTR [result+12]
    cmp eax, 0
    jge Calc1
    jmp Calc2
MaxElements:
;create array of max elements
    mov ecx, LENGTHOF array1 ; loop counter
    mov esi, OFFSET array1 ; source array 1
    mov edi, OFFSET array2 ; source array 2
    lea ebx, MaxResult ; destination array
    mov maxIndex, 0
compareLoop:
    mov eax, DWORD PTR [esi]
    mov edx, DWORD PTR [edi]
    cmp eax, edx
    jge a_greater_b
    mov eax, edx
a_greater_b:
    mov DWORD PTR [ebx], eax ; store max element
    add esi, TYPE array1 ; increment pointer to source array 1
    add edi, TYPE array2 ; increment pointer to source array 2
    add ebx, TYPE MaxResult ; increment pointer to destination array
    inc maxIndex ; increment index
    loop compareLoop
        
Calc1:
;a/b-d*c
    fld _a
    fdiv _b
    fld _d
    fmul _c
    fsubp st(1), st(0)
    fstp res
    jmp DisplayResult
    
Calc2:
;a/b
    fld _a
    fdiv _b
    fstp res
DisplayResult:
; Display results in message box
    invoke wsprintf, ADDR buf1, ADDR pxorStr, DWORD PTR [result], DWORD PTR [result+4], DWORD PTR [result+8], DWORD PTR [result+12] ;for xor
    invoke MessageBox, 0, addr buf1, addr tit1, MB_OK
    invoke wsprintf, ADDR buf1, ADDR maxStr, DWORD PTR [MaxResult], DWORD PTR [MaxResult+4], DWORD PTR [MaxResult+8], DWORD PTR [MaxResult+12];max res
    invoke MessageBox, 0, addr buf1, addr tit1, MB_OK
    invoke FloatToStr, res, OFFSET buf1;calc
    invoke MessageBox, 0, OFFSET buf1, addr tit1, MB_OK
    invoke ExitProcess, 0
end start

Problem are in function Calc1/Calc2 and FloatToStr function. I guess that's because MMX commands here, but it's part of a task, so I can't re-do it without it.

I've tried to use fptoa function before, but it didn't work at all, so I've choosed FloatToStr one.

hyen
  • 11
  • 1
  • `REAL8` is not a float but a double. Make sure your `FloatToStr` expects a double or change to float. – Jester Feb 28 '23 at 13:46
  • Are you aware that it is a bad idea to mix FPU and MMX code together? – Danny Cohen Feb 28 '23 at 13:47
  • @Jester if I'm trying to use something except REAL8, I get a type mismatch error, so I guess it works? – hyen Feb 28 '23 at 13:59
  • @Danny Cohen never knew about it actually.. how can I replace FPU here? – hyen Feb 28 '23 at 13:59
  • I don't know. I just read a book about X64 assembly that specifically warned against it. Use the FPU for math or use the MMX instructions, don't mix them. BTW, AFAIK, all MMX instructions have SSE or AVX upgraded instructions so better use them than MMX. – Danny Cohen Feb 28 '23 at 14:05
  • `REAL4` should work for a float. – Jester Feb 28 '23 at 14:17
  • @Jester I've tried it but got this error: `error A2114: INVOKE argument type mismatch : argument : 1` on the line `invoke FloatToStr, res, OFFSET buf1` – hyen Feb 28 '23 at 14:22
  • Okay so apparently `FloatToStr` does take a double. Try using an `EMMS` after your mmx loop to clear the state. – Jester Feb 28 '23 at 14:31
  • @DannyCohen: After using MMX instructions, you need an `emms` so x87 math instructions can work again. Code normally assumes the FPU is in x87 state. That's one major reason to avoid MMX entirely and only use SSE2, even though it means you need to worry about 16-byte alignment for memory operands, or use separate `movdqu` loads. – Peter Cordes Feb 28 '23 at 22:48
  • 1
    Since you're doing this for performance reasons, [don't use the `loop` instruction unless you're specifically optimizing for AMD Bulldozer and later](https://stackoverflow.com/questions/35742570/why-is-the-loop-instruction-slow-couldnt-intel-have-implemented-it-efficiently); it's slow on other CPUs. – Peter Cordes Mar 02 '23 at 03:14
  • Also, either unroll your loop or consider an indexed addressing mode for at least the load. e.g. `sub esi, edi` outside the loop, and `movq MM0, [esi+edi]` inside, so you address it relative to the other array and have one fewer pointer increment to do. That's good even if you do unroll. See [Micro fusion and addressing modes](https://stackoverflow.com/q/26046634) re: avoiding an indexed addressing mode for `pxor MM0, [edi]` on Sandybridge, or AVX1 `vxorps ymm0, ymm0, [edi]` on all later Intel (micro-fusion), and for the store on HSW / SKL (store AGU). – Peter Cordes Mar 02 '23 at 03:17

0 Answers0