0

I have started a week ago to "learn" Assembly using NASM on Windows10, so forgive me if this question is completely "wtf" for you.

I'm trying to use WinExec from kernel32.dll to run programs, like cmd.exe, calc.exe e etc...

Here I've found this piece of code that works like a charm, but only if the lpCmdLine is equal to 4 bytes, I think that the problem is something related to the stack alignament, but I don't understand how I can solve the problem. I've searched for 3 days a solution, read everything that I can found about assembly, but my brain is completely stuck.

In this sample code, I try to launch cmd.exe (7 bytes, 8 with the final 0x20 char [' '] to avoid the null byte), but doesen't works.

You can found the "problem" at line 62

    [BITS 64]
SECTION .text

%define B2W(b1,b2)                      (((b2) << 8) + (b1))
%define W2DW(w1,w2)                     (((w2) << 16) + (w1))
%define DW2QW(dw1,dw2)                  (((dw2) << 32) + (dw1))
%define B2DW(b1,b2,b3,b4)               ((B2W(b3, b4) << 16) + B2W(b1, b2))
%define B2QW(b1,b2,b3,b4,b5,b6,b7,b8)   ((B2DW(b5,b6,b7,b8) << 32) + B2DW(b1,b2,b3,b4))
%define W2QW(w1,w2,w3,w4)               ((W2DW(w3,w4) << 32) + W2DW(w1,w2))

; x64 WinExec *requires* 16 byte stack alignment and four QWORDS of stack space, which may be overwritten.
; http://msdn.microsoft.com/en-us/library/ms235286.aspx
%ifndef PLATFORM_INDEPENDENT
global shellcode
shellcode:
%ifdef FUNC                               ; assumes stack ends with 8 on entry, use STACK_ALIGN if it might not be.
%ifdef CLEAN                              ; 64-bit calling convention considers RAX, RCX, RDX, R8, R9, R10 and R11
    PUSH    RAX                           ; volatile. Use CLEAN if you want to preserve those as well.
    PUSH    RCX
    PUSH    RDX
%endif
    PUSH    RBX
    PUSH    RSI
    PUSH    RDI
    PUSH    RBP                           ; Stack now ends with 8 (!CLEAN) or is 16 byte (CLEAN) aligned
%endif
%ifdef STACK_ALIGN
%ifdef FUNC
    PUSH    RSP
    POP     RAX
%endif
    AND     SP, -16                       ; Align stack to 16 bytes
                                          ; (we can't force it to end with 8 without dummy push and then or)
    PUSH    RAX                           ; Force stack to end with 8 before next push, also saves RSP to restore stack
%elifdef CLEAN
    PUSH    RAX                           ; dummy push to make stack end with 8 before next push
%endif

; Note to SkyLined: instructions on 32-bit registers are automatically sign-extended to 64-bits.
; This means LODSD will set the high DWORD of RAX to 0 if top bit of EAX was 0, or 0xFFFFFFFF if it was 0x80000000.
    PUSH    BYTE 0x60                     ; Stack 
    POP     RDX                           ; RDX = 0x60
%else
%ifdef FUNC
%ifdef CLEAN
    PUSH    RAX                           ; exchanged RDX
    PUSH    RCX
%endif
    PUSH    RBX
    PUSH    RSI
    PUSH    RDI
    PUSH    RBP                           ; Stack now ends with 8 (!CLEAN) or is 16 byte (CLEAN) aligned
%endif
%ifdef CLEAN
%ifndef STACK_ALIGN
    PUSH    RAX                           ; dummy push to make stack end with 8 before next push
%endif
%endif
    MOV     DL, 0x60
%endif
%ifndef USE_COMMON
    ; > Here I don't understand why doesen't works <
    PUSH    0x20657865 ; (space)exe
    PUSH    0x2E646D63 ; .dmc
                       ; I have already tried to use the macro B2DW, the same, crash at startup.
    PUSH    RSP
    POP     RCX                           ; RCX = &("calc")
%endif
    SUB     RSP, RDX                      ; Stack was 16 byte aligned already and there are >4 QWORDS on the stack.
    MOV     RSI, [GS:RDX]                 ; RSI = [TEB + 0x60] = &PEB
    MOV     RSI, [RSI + 0x18]             ; RSI = [PEB + 0x18] = PEB_LDR_DATA
    MOV     RSI, [RSI + 0x10]             ; RSI = [PEB_LDR_DATA + 0x10] = LDR_MODULE InLoadOrder[0] (process)
    LODSQ                                 ; RAX = InLoadOrder[1] (ntdll)
    MOV     RSI, [RAX]                    ; RSI = InLoadOrder[2] (kernel32)
    MOV     RDI, [RSI + 0x30]             ; RDI = [InLoadOrder[2] + 0x30] = kernel32 DllBase
; Found kernel32 base address (RDI)
shellcode_common:
    ADD     EDX, DWORD [RDI + 0x3C]       ; RBX = 0x60 + [kernel32 + 0x3C] = offset(PE header) + 0x60
; PE header (RDI+RDX-0x60) = @0x00 0x04 byte signature
;                            @0x04 0x18 byte COFF header
;                            @0x18      PE32 optional header (= RDI + RDX - 0x60 + 0x18)
    MOV     EBX, DWORD [RDI + RDX - 0x60 + 0x18 + 0x70] ; RBX = [PE32+ optional header + offset(PE32+ export table offset)] = offset(export table)
; Export table (RDI+EBX) = @0x20 Name Pointer RVA
    MOV     ESI, DWORD [RDI + RBX + 0x20] ; RSI = [kernel32 + offset(export table) + 0x20] = offset(names table)
    ADD     RSI, RDI                      ; RSI = kernel32 + offset(names table) = &(names table)
; Found export names table (RSI)
    MOV     EDX, DWORD [RDI + RBX + 0x24] ; EDX = [kernel32 + offset(export table) + 0x24] = offset(ordinals table)
; Found export ordinals table (RDX)
find_winexec_x64:
; speculatively load ordinal (RBP)
    MOVZX   EBP, WORD [RDI + RDX]         ; RBP = [kernel32 + offset(ordinals table) + offset] = function ordinal
    LEA     EDX, [RDX + 2]                ; RDX = offset += 2 (will wrap if > 4Gb, but this should never happen)
    LODSD                                 ; RAX = &(names table[function number]) = offset(function name)
    CMP     DWORD [RDI + RAX], B2DW('W', 'i', 'n', 'E') ; *(DWORD*)(function name) == "WinE" ?
    JNE     find_winexec_x64              ;
    MOV     ESI, DWORD [RDI + RBX + 0x1C] ; RSI = [kernel32 + offset(export table) + 0x1C] = offset(address table)
    ADD     RSI, RDI                      ; RSI = kernel32 + offset(address table) = &(address table)
    MOV     ESI, [RSI + RBP * 4]          ; RSI = &(address table)[WinExec ordinal] = offset(WinExec)
    ADD     RDI, RSI                      ; RDI = kernel32 + offset(WinExec) = WinExec
; Found WinExec (RDI)
    CDQ                                   ; RDX = 0 (assuming EAX < 0x80000000, which should always be true)
    CALL    RDI                           ; WinExec(&("calc"), 0);
%ifdef FUNC
%ifdef CLEAN
%ifdef STACK_ALIGN
    ADD     RSP, 0x68                     ; reset stack to where it was after pushing registers
%else
    ADD     RSP, 0x70                     ; reset stack to where it was after pushing registers
%endif
%else
    ADD     RSP, 0x68                     ; reset stack to where it was after pushing registers
%endif
%ifndef PLATFORM_INDEPENDENT
%ifdef STACK_ALIGN
    POP     RSP
%endif
%endif
    POP     RBP                           ; POP registers
    POP     RDI
    POP     RSI
    POP     RBX
%ifndef PLATFORM_INDEPENDENT
%ifdef CLEAN
    POP     RDX                           ; POP additional registers
    POP     RCX
    POP     RAX
%endif
    RET                                   ; Return
%else
%ifdef CLEAN
    POP     RCX                           ; POP additional registers
    POP     RDX
%endif
%ifdef STACK_ALIGN
    POP     RSP
%endif
%ifdef CLEAN
    POP     RAX
%endif
    RET                                   ; Return
%endif
%endif

To test if everything works, I get the null byte shellcode and launch from C++ with

void *exec = VirtualAlloc(0, sizeof(shellcode), MEM_COMMIT, PAGE_EXECUTE_READWRITE);
memcpy(exec, shellcode, sizeof(shellcode));
((void(*)())exec)();

If I use push 0x636C6163 (calc) everything works.

Again, I'm sorry if this is a total noob question, but I can't figure out how to solve it.

Thank you for your time!

Mutu A.
  • 248
  • 3
  • 17
  • 1
    In 64-bit mode, 32-bit `push` isn't available, but the size of the immediate is limited to 32 bits. So you can't push a contiguous string; you'd have to `push 0x2E646D63` ; .dmc / `mov dword [rsp+4], 0x20657865` – Peter Cordes Jun 04 '18 at 08:29
  • @PeterCordes Thank you! Now works! I have searched for 3 entirely days and the solution was only to use mov, I'm feel really stupid ahahah, thank you again! – Mutu A. Jun 04 '18 at 08:41
  • Next time use a debugger to single-step and examine memory. You would have seen the 4 bytes of zeros. There are of course other possible solutions, like `mov r64, imm64` / `push` – Peter Cordes Jun 04 '18 at 08:50
  • @PeterCordes Sure! I'm sorry for the duplicate, but seems that I was searching the wrong answer. if you post your comment as answer I will mark as *accepted answer*. – Mutu A. Jun 04 '18 at 08:58
  • I closed it as a dup because this doesn't need a separate answer, just a comment to explain why it's a duplicate. (And I posted that comment before digging up the 2nd dup). You don't need to apologize; it's obviously a duplicate to an expert, but if you don't understand the problem then often you can't find the relevant duplicates. – Peter Cordes Jun 04 '18 at 09:02
  • @PeterCordes Oh, perfect! Have a nice day! :) – Mutu A. Jun 04 '18 at 09:08
  • @PeterCordes Hi. I have another small question relative to the main question, if I try to concatenate a bigger string as *C:\Windows\System32\cmd.exe* like that `PUSH 0x575C3A43 MOV DWORD [RSP+4], 0x6F646E69 MOV DWORD [RSP+8], 0x535C7377 MOV DWORD [RSP+12], 0x65747379 MOV DWORD [RSP+16], 0x5C32336D MOV DWORD [RSP+20], 0x2E646D63 MOV DWORD [RSP+24], 0x20657865` The final string is full of garbage bytes, I have already read the other 2 question that you have linked, but I really don't understand why the final string is full of garbage. I think assembly isn't for me... – Mutu A. Jun 04 '18 at 11:08
  • Did you reserve enough stack space? Looks like you overwrite your return address if you use `mov` stores above `rsp+8` after one push. Use your debugger to single-step, and/or set a watchpoint on the relevant memory to find out when it's being modified. And BTW, NASM allows `push 'calc'`, so you don't have to work out the hex codes if your bytes are all printable ASCII. – Peter Cordes Jun 04 '18 at 11:17
  • 1
    @PeterCordes Yes, the problem was that I wasn't reserving enough space for the stack, now I have solved, thank you again! – Mutu A. Jun 06 '18 at 09:39

0 Answers0