1

I have the follow simple program I'm trying to build targeting x64 on Windows 10 building with MASM:

WSTR MACRO lbl:req,qstr:VARARG
LOCAL arg,unq,qot,q
    lbl LABEL WORD
    FOR arg,<qstr>
        qot SubStr <arg>,1,1
        q = 0
        IFIDNI qot,<!'>;'
            q = 1
        ELSEIFIDNI qot,<!">;"
            q = 1
        ELSE
            DW arg
        ENDIF
        IF q EQ 1
            unq SubStr <arg>,2,@SizeStr(<arg>)-2
        %   FORC c,<unq>
                DW "&c"
            ENDM
        ENDIF
    ENDM
    DW 0
ENDM

L MACRO qstr:VARARG
LOCAL sym,seg
    seg EQU <.code>
    %IFIDNI <@CurSeg>,<_DATA>
        seg EQU <.data>
    ENDIF
    .CONST
        ALIGN 4
        WSTR sym,qstr
    seg
    EXITM <OFFSET sym>
ENDM

extrn   LoadCursorW: PROC
extrn   MessageBoxW: PROC
extrn   ExitProcess: PROC
extrn   GetModuleHandleW: PROC
extrn   RegisterClassExW: PROC
extrn   CreateWindowExW: PROC
extrn   GetLastError: PROC
extrn   DefWindowProcW: PROC

.data
wstr windowClassName,"AsmTestClass",0,0
wstr windowTitle,"AsmTest",0,0

HWND_DESKTOP        textequ <0h>
MB_OK           textequ <0h>

WM_CREATE       textequ <0001h>
WM_DESTROY      textequ <0002h>
WM_SIZE         textequ <0005h>
WM_PAINT        textequ <000fh>

CS_VREDRAW      textequ <0001h>
CS_HREDRAW      textequ <0002h>

WS_OVERLAPPED   textequ <00000000h>
WS_CAPTION      textequ <00c00000h>
WS_SYSMENU      textequ <00080000h>
WS_MINIMIZEBOX  textequ <00020000h>

CW_USEDEFAULT   textequ <80000000h>

IDI_APPLICATION textequ <00007f00h>

WINDOW_WIDTH        DWORD   800
WINDOW_HEIGHT   DWORD   600

WNDCLASSEX STRUCT DWORD
  cbSize            DWORD   ?
  style             DWORD   ?
  lpfnWndProc       QWORD   ?
  cbClsExtra        DWORD   ?
  cbWndExtra        DWORD   ?
  hInstance         QWORD   ?
  hIcon             QWORD   ?
  hCursor           QWORD   ?
  hbrBackground     QWORD   ?
  lpszMenuName      QWORD   ?
  lpszClassName     QWORD   ?
  hIconSm           QWORD   ?
WNDCLASSEX ENDS

.code
main proc

LOCAL wc:WNDCLASSEX
LOCAL hWnd:QWORD
LOCAL hInstance:QWORD
LOCAL hCursor:QWORD
LOCAL ATOM:WORD

    ; hInstance = GetModuleHandle(NULL)
    mov     rcx, 0
    call        GetModuleHandleW
    mov     hInstance, rax

    ; hCursor = LoadCursor(NULL,IDI_APPLICATION)
    mov     edx, IDI_APPLICATION
    xor     ecx, ecx
    call        LoadCursorW
    mov     hCursor, rax
  
    ; Setup Window Class
    mov     wc.cbSize, SIZEOF WNDCLASSEX
    mov     wc.style, CS_VREDRAW or CS_HREDRAW
    lea     rax, OFFSET WndProc
    mov     wc.lpfnWndProc, rax
    mov     wc.cbClsExtra, 0
    mov     wc.cbWndExtra, 0
    lea     rax, hInstance
    mov     wc.hInstance, rax
    mov     wc.hbrBackground, 0
    mov     wc.lpszMenuName, 0
    lea     rax, hCursor
    mov     wc.hCursor, rax
    lea     rax, windowClassName
    mov     wc.lpszClassName, rax
    mov     wc.hIconSm, 0
    lea     rcx, wc
    call        RegisterClassExW
    mov     ATOM, ax

    ; CreateWindowExW
    mov     QWORD PTR [rsp+88], 0               ;   lpParam
    lea     rax, hInstance
    mov     QWORD PTR [rsp+80], rax             ;   hInstance
    mov     QWORD PTR [rsp+72], 0               ;   hMenu
    mov     QWORD PTR [rsp+64], 0               ;   hWndParent
    mov     edx, WINDOW_HEIGHT
    mov     DWORD PTR [rsp+56], edx             ;   nHeight
    mov     edx, WINDOW_WIDTH
    mov     DWORD PTR [rsp+48], edx             ;   nWidth
    mov     DWORD PTR [rsp+40], CW_USEDEFAULT   ;   Y
    mov     DWORD PTR [rsp+32], CW_USEDEFAULT   ;   X
    mov     r9d, WS_OVERLAPPED or WS_CAPTION or WS_SYSMENU or WS_MINIMIZEBOX        ; dwStyle
    lea     r8, windowTitle                     ;   lpWindowName
    lea     rdx, windowClassName                    ;   lpClassName
    xor     ecx,ecx                             ;   dwExStyle
    call        CreateWindowExW

    cmp     rax,  0
    je      WindowFailed
    jmp     WindowSuccess
WindowFailed:
    call        GetLastError
    ; to-do check error
WindowSuccess:
    mov     hWnd, rax

    mov     rcx, HWND_DESKTOP               ; hWnd
    lea     rdx, L("Hello x64 World!",0,0)  ; lpText
    lea     r8, L("Win64 Demo",0,0)         ; lpCaption
    mov     r9d, MB_OK                      ; uType 
    call        MessageBoxW
    mov     ecx, eax                            ; uExitCode
    call        ExitProcess

main endp

WndProc proc
    LOCAL hWnd:QWORD
    LOCAL uMsg:DWORD
    LOCAL wParam:QWORD
    LOCAL lParam:QWORD
    LOCAL result:QWORD

    mov     lParam, r9
    mov     wParam, r8
    mov     uMsg, edx
    mov     hWnd, rcx
    
    ; msg handler
    cmp     uMsg,WM_CREATE
    je      create
    cmp     uMsg,WM_PAINT
    je      paint
    cmp     uMsg,WM_DESTROY
    je      destroy
    cmp     uMsg,WM_SIZE
    je      resize  

    ; default
    call        DefWindowProcW
    mov     result,rax
    jmp     finish

create:
    mov     result, 0
    jmp     finish

paint:
    mov     result, 0
    jmp     finish

destroy:
    mov     result, 0
    jmp     finish

resize:
    mov     result, 0
    jmp     finish

finish: 
    mov     rax, result
    
    ret 

WndProc endp
End

Masm is automatically adding the following code:

main proc
push        rbp  
mov         rbp,rsp  
add         rsp,0FFFFFFFFFFFFFF90h  
..
main endp

WndProc proc
push        rbp  
mov         rbp,rsp  
add         rsp,0FFFFFFFFFFFFFFD8h  
...
leave
ret
WndProc endp

However when WndProc is routine us called an access violation is triggered on call to DefWindowProcW

I suspect this is because stack pointer is not correctly setup, but masm seems to be adding code for this. How to do this correctly?

The code added by masm

add         rsp,0FFFFFFFFFFFFFFD8h

It equivalent of sub rsp,40. Which is not divisible by 16, adding sub rsp,8 to my code fixes the crash.

WndProc proc
    LOCAL hWnd:QWORD
    LOCAL uMsg:DWORD
    LOCAL wParam:QWORD
    LOCAL lParam:QWORD
    LOCAL result:QWORD

    sub     rsp, 8

Is the best way to resolve this, or is there a way for masm to auto calculate correct value / or predict what needs to be corrected in advance?

Malcolm McCaffery
  • 2,468
  • 1
  • 22
  • 43

1 Answers1

1

You could use this to round rsp down to a 16 byte boundary:

        and     rsp,0fffffff0h

I tested using VS2015, and usage of LOCAL causes MASM to auto-generate that code. Using VS2015, with a wmain instead of WndProc, upon entry, the last nibble of rsp == 8, apparently anticipating that the entry code will use rbp as a frame pointer:

        push    rbp
        mov     rbp,rsp

putting rsp on a 16 byte boundary

If on entry, the last nibble of rsp is always 8, then you could use dummy LOCALS so that all the LOCALS take up 8 + some multiple of 16 bytes.

rcgldr
  • 27,407
  • 3
  • 36
  • 61
  • 1
    *apparently anticipating that the entry code will use rbp as a frame pointer:* - In the Windows x64 calling convention, it's guaranteed that `RSP % 16 == 8` on function entry. (So to maintain this invariant, you need RSP aligned by 16 *before* a `call` which pushes an 8-byte return address. This also means the shadow space, and stack args if any, are 16-byte aligned.) It's 100% normal for anything that's an actual function (which can `ret`) to have RSP % 16 on entry, and would be an ABI violation if it wasn't. Normal functions shouldn't waste instruction doing `and rsp` unless they need >=32 – Peter Cordes Jan 05 '22 at 14:04
  • 1
    The same `RSP & 0xF == 8` on function entry guarantee applies to the x86-64 System V ABI. [Why does the x86-64 / AMD64 System V ABI mandate a 16 byte stack alignment?](https://stackoverflow.com/q/49391001). Fun fact: on Linux, the actual process entry point, `_start`, is entered with RSP % 16 == 0. And it's not a function; RSP points at argc on the stack, not a return address. – Peter Cordes Jan 05 '22 at 14:08
  • @PeterCordes - For windows 64 bit calling convention, if frame pointers are omitted on a build (assuming Visual Studio), is RSP % 16 == 8 still true on function entry? – rcgldr Jan 06 '22 at 00:48
  • Yes. Using a frame pointer or not is something *purely* local to a single function, zero interaction with the ABI. Even stack-unwinding uses separate metadata, not a chain of RBP frame pointers. x86-64 SysV is the same in this respect, saying nothing about RBP or frame pointers, other than suggesting that `_start` zeros RBP on process entry so if other code does use frame pointers, the linked list has a 0 terminator. (In fact, modern i386 System V is the same; ESP % 16 == 12 on entry to functions, and EBP frame pointers are not part of the ABI even for stack unwinding on exceptions.) – Peter Cordes Jan 06 '22 at 02:11
  • (Not affecting the ABI is pretty clearly necessary if you think about separately-compiled code, e.g. different .obj files built with different options, or even calls to library functions in static or DLL libraries.) – Peter Cordes Jan 06 '22 at 02:28
  • @PeterCordes - I forgot that Windows X64 has a single ABI. For 32 bit mode, I think there are 3 ABIs (stdcall, fastcall, ???), which requires all files and libraries to be compiled in a compatible matter for each of the ABI (except for internal only stuff). So it would make sense that omitting frame pointers wouldn't change Windows X64 ABI. – rcgldr Jan 06 '22 at 04:24
  • Windows x64 has vectorcall as well as the default fastcall. They differ only in arg-passing for wider types like `__m128`. 32-bit mode Windows has multiple calling conventions, yes. There's also cdecl, vectorcall, and thiscall, possibly more. https://en.wikipedia.org/wiki/X86_calling_conventions#List_of_x86_calling_conventions. In any of these, using frame pointers or not within a function isn't part of the ABI, unless required for stack unwinding. (Although in Win32 IIRC ESP alignment is only guaranteed to be 4 bytes, so odd/even numbers of pushes are irrelevant.) – Peter Cordes Jan 06 '22 at 04:30
  • I wouldn't really call "stdcall" a whole separate ABI, though, since per-function attributes can declare what calling convention any given function should be called with. e.g. I think WinAPI functions are all stdcall, but variadic functions like printf must be cdecl (because stdcall is callee-pops, so it couldn't ignore extra args the format string doesn't reference like ISO C requires). So it's normal for 32-bit Windows programs to use a mix of calling conventions on a per-function basis. – Peter Cordes Jan 06 '22 at 04:32