1

I know this question gets asked a lot but every answer I find doesn't work for me. I'm trying to load stage 2 of my OS, located at the second sector of my image file(0x200)

This is the code I tried to use:


bits 16                             ; Starting at 16 bits
org 0x0                               ; And starting at 0

jmp main                            ; Hop to main!


; TODO: copy comment from prev. loader
; args: SI
print:
    lodsb                           ; Load the next/first character to AL
    or al, al                       ; Is it 0?
    jz donePrint                    ; Yes - Done.
    mov ah, 0eh                     ; No - keep going.
    int 10h                         ; Print character.
    jmp print                       ; Repeat
donePrint:
    ret                             ; Return


; todo: args
readSector:
    mov ah, 02h
    mov al, 1
    mov dl, 0x80
    mov ch, 0
    mov dh, 0
    mov cl, 2

    mov bx, 0x500


    int 13h
    jnc good
    jmp fail

main:
    ; First, setup some registers.
    cli                             ; Clear interrupts
    mov ax, 0x07C0                  ; Point all registers to segment
    mov ds, ax
    mov es, ax
    mov fs, ax
    mov gs, ax

    ; Create the stack(0x0000-0xFFFF).
    mov ax, 0x0000
    mov ss, ax                      ; Point SS to 0x0000
    mov sp, 0xFFFF                  ; Stack pointer at 0xFFFF
    sti                             ; Restore interrupts

    mov si, LOADING
    call print

    call readSector

    

    
fail:
    mov si, FAILURE_MSG
    call print
    
good:
    mov si, LOADOK 
    call print
    jmp 0x500

LOADING        db 0x0D, 0x0A, "Booting loader...", 0x0D, 0x0A, 0x00
FAILURE_MSG    db 0x0D, 0x0A, "ERROR: Press any key to reboot.", 0x0A, 0x00
LOADOK    db 0x0D, 0x0A, "load ok", 0x0A, 0x00



TIMES 510 - ($-$$) DB 0
DW 0xAA55

But it just bootloops. I tried other solutions to no avail. What am I doing wrong? If I need to update the question please tell me.

Thank you!

EDIT #1: According to Sep Roland's answer, I updated my code, but it is still not working. I'm putting the updated code here if it's any help. Also, if asked for it, I can post my 2nd stage code. It should be using 0x500 as org. NEW CODE:

bits 16                             ; Starting at 16 bits
org 0x0                             ; And starting at 0

jmp main                            ; Hop to main!


; TODO: copy comment from prev. loader
; args: SI
print:
    lodsb                           ; Load the next/first character to AL
    or al, al                       ; Is it 0?
    jz donePrint                    ; Yes - Done.
    mov ah, 0eh                     ; No - keep going.
    int 10h                         ; Print character.
    jmp print                       ; Repeat
donePrint:
    ret                             ; Return


; todo: args
readSector:
    mov ah, 02h
    mov al, 1
    mov ch, 0
    mov dh, 0
    mov cl, 2

    mov bx, 0x500


    int 13h
    jnc good
    jmp fail

main:
    ; First, setup some registers.
    cli                             ; Clear interrupts
    mov ax, 0x07C0                  ; Point all registers to segment
    mov ds, ax
    mov es, ax
    mov fs, ax
    mov gs, ax

    ; Create the stack(0x0000-0xFFFF).
    mov ax, 0x0000
    mov ss, ax                      ; Point SS to 0x0000
    mov sp, 0xFFFE                  ; Stack pointer at 0xFFFE
    sti                             ; Restore interrupts

    mov si, LOADING
    call print

    call readSector

    

    
fail:
    mov si, FAILURE_MSG
    call print
end:
    cli
    hlt
    jmp end
    
good:
    mov si, LOADOK 
    call print
    jmp 0x07C0:0x0500

LOADING        db 0x0D, 0x0A, "Booting loader...", 0x0D, 0x0A, 0x00
FAILURE_MSG    db 0x0D, 0x0A, "ERROR: Press any key to reboot.", 0x0A, 0x00
LOADOK    db 0x0D, 0x0A, "load ok", 0x0A, 0x00



TIMES 510 - ($-$$) DB 0
DW 0xAA55

EDIT #2: Posting second stage code including gdt.inc because someone mentioned LGDT may have been causing a problem:

MAIN CODE(SOME PARTS HAVE BEEN CUT OUT BUT THEY ARE NOT REQUIRED, like strings)

bits 16                                 ; We start at 16 bits

org 0x500                               ; We are loaded in at 0x500

jmp main                                ; Jump to main code.


; ----------------------------------------
; Includes
; ----------------------------------------
%include "include/stdio.inc"
%include "include/gdt.inc"
%include "include/a20.inc"


; ---------------------------------------
; Data and strings
; ---------------------------------------

stringhidden db "Not showing string.", 0x0D, 0x0A, 0x00
stringhidden db "Not showing string.", 0x0D, 0x0A, 0x00

; ---------------------------------------------------------------------
; main - 16-bit entry point
; Installing GDT, storing BIOS info, and enabling protected mode
; ---------------------------------------------------------------------

main:
    ; Our goal is jump to main32 to become 32-bit

    ; Setup segments and stack
    cli                                 ; Clear interrupts
    xor ax, ax                          ; Null segments AX, DS, and ES
    mov ds, ax
    mov es, ax
    mov ax, 0x9000                      ; Stack begins at 0x9000-0xFFFF
    mov ss, ax
    mov sp, 0xFFFF                      ; Stack pointer is 0xFFFF
    sti                                 ; Enable interrupts

    

    ; Install the GDT
    call installGDT                     ; Install the GDT

    ; Enable A20
    call enableA20_KKbrd_Out            ; Enable A20 through output port

    ; Print loading messages
    mov si, msg1
    call print16                        ; Print the message

    mov si, msg2                        ; A message
    call print16                        ; Print the message

    ; Enter protected mode
    cli                                 ; Clear interrupts
    mov eax, cr0                        ; Set bit 0 in CR0--ENTER protected mode
    or eax, 1
    mov cr0, eax

    jmp CODE_DESC:main32                ; Far jump to fix CS
    
    ; We can't re-enable interrupts because that would triple-fault. This will be fixed in main32.


bits 32                                 ; We are now 32 bit!

%include "include/stdio32.inc"

main32:
    ; Set registers up
    mov ax, 0x10                        ; Setup data segments to 0x10(data selector)
    mov ds, ax
    mov ss, ax
    mov es, ax
    mov esp, 90000h                     ; Stack begins from 90000h
    
    call clear32                        ; Clear screen
    mov ebx, MSGHIDDEN                   ; Setup params for our message
    call puts32                         ; Call puts32 to print

    cli                                 ; Clear interrupts
    hlt                                 ; Halt the processor

LGDT CODE:

%ifndef __GDT_INC_67343546FDCC56AAB872_INCLUDED__
%define __GDT_INC_67343546FDCC56AAB872_INCLUDED__

bits 16                     ; We are in 16-bit mode


; -----------------------------------------
; installGDT - install the GDT
; -----------------------------------------
installGDT:
    cli                     ; Clear interrupts
    pusha                   ; Save the registers
    lgdt [toc]              ; Load GDT into GDTR
    sti                     ; Re-enable interrupts
    popa                    ; Restore registers
    ret                     ; Return!


; ----------------------------------------
; Global Descriptor Table data
; ----------------------------------------

gdt_data:
    dd 0                    ; Null descriptor
    dd 0
    
    ; GDT code starts here
    dw 0FFFFh               ; Limit low
    dw 0                    ; Base low
    db 0                    ; Base middle
    db 10011010b            ; Access
    db 11001111b            ; Granularity
    db 0                    ; Base high

    ; GDT data starts here(mostly same as code, only difference is access)
    dw 0FFFFh               ; Limit low, again.
    dw 0                    ; Base low
    db 0                    ; Base middle
    db 10010010b            ; Access - different
    db 11001111b            ; Granularity
    db 0

gdt_end:
toc:
    dw gdt_end - gdt_data - 1
    dd gdt_data             ; Base of GDT

; Descriptor offsets names

%define NULL_DESC 0
%define CODE_DESC 0x8
%define DATA_DESC 0x10

; End of GDT code.
%endif ;__GDT_INC_67343546FDCC56AAB872_INCLUDED__

EDIT #3: Possible problems with stdio and stdio32 so putting those here

stdio.inc:

; ==============================================
; stdio.inc - IO routines
; Thanks to BrokenThorn Entertainment
; ==============================================

; First, show that we are defining stdio.inc
%ifndef __STDIO_INC_67343546FDCC56AAB872_INCLUDED__
%define __STDIO_INC_67343546FDCC56AAB872_INCLUDED__

; ------------------------------------------------
; Print16 - printing a null terminated string
; SI - 0 terminated string
; ------------------------------------------------

print16:
    pusha                                       ; Save registers for later
.loop1:
    lodsb                                       ; Load the next byte from the string into AL
    or al, al                                   ; Is AL 0?
    jz print16done                              ; Yes - we are done.
    mov ah, 0eh                                 ; No - print next character
    int 10h                                     ; Call BIOS
    jmp .loop1                                  ; Repeat!
print16done:
    popa                                        ; Restore registers
    ret                                         ; Return



%endif ;__STDIO_INC_67343546FDCC56AAB872_INCLUDED__

stdio32.inc:

; ==================================================
; stdio32.inc - Handles 32-bit graphics
; ==================================================

%ifndef __GFX_INC_67343546FDCC56AAB872_INCLUDED__
%define __GFX_INC_67343546FDCC56AAB872_INCLUDED__

bits 32                                         ; 32-bits

%define VIDEO_MEMORY 0xB8000                    ; Video memory address
%define COLS 80                                 ; Width of the screen
%define LINES 25                                ; Height of the string
%define CHARACTER_ATTRIBURE 63                  ; White text on Cyan background

_CurrentXPos db 0
_CurrentYPos db 0

; ---------------------------------------------------------
; char32 - Print a character to the screen(32-bit)
;   BL - Character to print
; ---------------------------------------------------------

char32:
    pusha                                       ; Save registers
    mov edi, VIDEO_MEMORY                       ; Get the pointer to the video memory

    ; Get current position
    xor eax, eax                                ; Zero-out EAX

    mov ecx, COLS*2                             ; Mode 7 has 2 bytes per character - and so COLS*2 bytes per line.
    mov al, byte [_CurrentYPos]                 ; Get Y position
    mul ecx                                     ; Multiply COLS * Y
    push eax                                    ; Save EAX--the multiplication

    mov al, byte [_CurrentXPos]                 ; Multiply _CurrentXPos by 2 because 2 bytes per char(Mode 7)
    mov cl, 2
    mul cl
    pop ecx                                     ; Pop Y*COLS result
    add eax, ecx
    
    xor ecx, ecx
    add edi, eax                                ; Add to base address

    ; Watch for a new line!
    cmp bl, 0x0A                                ; 0x0A - newline character.
    je .row                                     ; Jump to .row if newline char

    ; Print the character
    mov dl, bl                                  ; Get character
    mov dh, CHARACTER_ATTRIBURE                 ; Change DH to Character Attribute
    mov word [edi], dx                          ; Write to video memory
    
    ; Update next pos
    inc byte [_CurrentXPos]                     ; Go to next character
    ;cmp byte [_CurrentXPos], COLS               ; EOL?
    ;je .row                                     ; Yep - move to next row
    jmp .done                                   ; Nope - BAIL!
.row:
    ; Goto next row.
    mov byte [_CurrentXPos], 0                  ; Return to col 0
    inc byte [_CurrentYPos]                     ; Go to next row.

.done:
    ; Return
    popa
    ret


; ---------------------------------------------------------
; puts32 - print a null terminated string
;   EBX - String to print
; ---------------------------------------------------------

puts32:

    ; Store registers(EBX and EDI)
    pusha                                       ; Save registers
    push ebx                                    ; Copy string
    pop edi

.loop:
    
    mov bl, byte [edi]                          ; Get next character
    cmp bl, 0                                   ; Check if it's null
    je .done                                    ; It is - done printing.

    call char32                                 ; It isn't - print the character

    inc edi                                     ; Increment EDI for next character
    jmp .loop                                   ; Restart loop

.done:
    ; Update the hardware cursor

    mov bh, byte [_CurrentXPos]                 ; BH and BL are the params for movecursor
    mov bl, byte [_CurrentYPos]                 
    call movecursor                             ; Update cursor position

    popa                                        ; Restore registers
    ret                                         ; Return!

bits 32

; ---------------------------------------------------------
; movecursor - Move the cursor to an X and Y position
;   BH - X position
;   BL - Y position
; ---------------------------------------------------------

movecursor:
    
    pusha                                       ; Save registers

    ; Get current position(BH and BL are relative to the current position on screen, not memory)

    xor eax, eax                                ; Clear EAX
    mov ecx, COLS                               ; Store COLS in ECX for multiplication
    mov al, bh                                  ; Get Y position
    mul ecx                                     ; Multiply Y by cols
    add al, bl                                  ; Add X
    mov ebx, eax

    ; Set low byte index to VGA register

    mov al, 0x0f
    mov dx, 0x03D4
    out dx, al

    mov al, bl
    mov dx, 0x03D5
    out dx, al

    ; Do the same but for high byte

    xor eax, eax
    
    mov al, 0x0e
    mov dx, 0x03D4
    out dx, al

    mov al, bl
    mov dx, 0x03D5
    out dx, al

    popa                                        ; Restore registers
    ret                                         ; Return



; ---------------------------------------------------------
; clear32 - clearing the screen
; ---------------------------------------------------------

clear32:
    pusha                                       ; Save registers

    cld
    mov edi, VIDEO_MEMORY                       ; Set EDI to video memory
    mov cx, 2000                                
    mov ah, CHARACTER_ATTRIBURE                 ; Clear screen with character attribute
    mov al, ' '                                 ; Replace all chars with space
    rep stosw

    mov byte [_CurrentXPos], 0                  ; Reset X and Y position
    mov byte [_CurrentYPos], 0
    popa                                        ; Restore registers
    ret

    

; ---------------------------------------------------------
; gotoxy - Set X and Y position
;   AL - X position
;   AH - Y position
; ---------------------------------------------------------

gotoxy:
    pusha
    mov [_CurrentXPos], al                      ; Set X and Y position
    mov [_CurrentYPos], ah                      
    popa
    ret

%endif ;__STDIO_INC_67343546FDCC56AAB872_INCLUDED__
Tech
  • 37
  • 1
  • 8
  • 1
    Can you provide your second stage. Any chance that second stage does an `lgdt` . If it does I can think of why this is failing. If you are using `lgdt` as part of getting into protected mode (I'm guessing) you may be encountering an issue cause by DS != 0 and the GDTR needing a linear address which would require a fixup of adding 0x7c00 to the base address of the GDT in the GDTR. Usually people who use 0x500 intend to put the second stage at 0x0000:0x0500. It is a bit unusual to see 0x07c0:0x0500 which is physical address 0x8100. – Michael Petch Oct 26 '22 at 03:39
  • If the LGDT is a problem the far jmp in stage 2 to get into protected mode may have to be adjusted as well. If you can show us stage 2 we can likely solve your problem. I'm not convinced the problem you are seeing is related to boot sector. – Michael Petch Oct 26 '22 at 04:20
  • 1
    I will update the code with stage 2 later in the day. Yes, that likely is the problem, LGDT and other methods are being called. – Tech Oct 26 '22 at 12:58
  • I'd personally set `org 0x7c00` in the bootloader, `org 0x500` in the second stage and then set all the segments (ES, DS to 0 instead of 0x07c0). This would also load your second stage to 0x0000:0x0500 rather than 0x07c0:0x0500. Using non zero segments comes with pitfalls that can be avoided (including issues you'd encounter with GDT etc) by using 0 instead. – Michael Petch Oct 26 '22 at 13:32
  • @MichaelPetch I updated the code to include Stage 2 and gdt.inc – Tech Oct 26 '22 at 22:20
  • Why do you use 0x07c0 for a segment in the bootloader and then use 0 in stage2? Your stage 2 is set up to assume it is loaded at 0x0000:0x0500 but your bootloader loaded the sectors to 0x07c0:0x0500. I recommend keeping your stage 2 as is. In the bootloader set ES and DS to 0x0000 rather than 0x07c0 and use `org 0x7c00` in the bootloader. Then change `jmp 0x07C0:0x0500` to `jmp 0x0000:0x0500` – Michael Petch Oct 26 '22 at 22:27
  • 1
    @MichaelPetch It worked! Thank you! Can you post this as an answer so I can accept it? – Tech Oct 26 '22 at 23:15
  • I'd recommend asking @SepRoland to update his answer with those changes. – Michael Petch Oct 26 '22 at 23:16
  • @MichaelPetch Actually, I'm experiencing one issue, but I don't know if it's your fault... The code should print a string and set the background to cyan, but the only thing I'm seeing is the cyan background. I tried it with a previous loader that I wasn't experiencing issues with(BrokenThorn's FAT bootloader) and no issues... Perhaps this is a fault of my own code, but is it something else that's wrong? **update:** Almost confirmed this is a problem with a kernel/loader interaction. QEMU loads the old loader just fine on same settings – Tech Oct 30 '22 at 15:00
  • After making a few small changes to the code, it prints the string, partially... The string is "! < ****** ** *** ***** > ! (newline) TEST 01." It prints "<! ****** **" and stops. – Tech Oct 30 '22 at 15:52
  • @Tech "Do the same but for **high** byte" in the HardwareCursor code is `out`ing BL twice instead of using BH the second time. And *clear32* moves 2000 in CX and forgets that the garbage in the high word can do a lot of harm. Use `mov ecx, 2000`. – Sep Roland Nov 02 '22 at 22:14
  • @Tech *movecursor* receives X in BH and Y in BL (which is opposite of what I would have expected), and you are multiplying X*80. This must be Y*80. – Sep Roland Nov 02 '22 at 22:23
  • @Tech I never remember, but for NASM is `pusha` the same as `pushad` when `bits 32` is active? It could be good to always use `pushad` to be absolutely sure the 32-bit registers are preserved. Now OoO... – Sep Roland Nov 02 '22 at 22:50

1 Answers1

8

EDIT0 reviews the 1st stage
EDIT1 reviews the 2nd stage
EDIT2 reviews the included stdio32.inc

[EDIT0]

The sector that you load yourself, was loaded at offset 0x500 in the extra segment at 0x7C0.
The jmp 0x500 instruction jumps to offset 0x500 in the code segment.
There's no guarantee that CS==0x7C0. Use a far jump instead:

jmp 0x07C0:0x0500

mov dl, 0x80

Are you sure about this drive number? It's always better to use the value that BIOS provided in the DL register when the bootloader is given control.


mov sp, 0xFFFF

A word-aligned stack pointer will be so much nicer!

Use mov sp, 0xFFFE or even xor sp, sp (trusting wraparound).


fail:
  mov si, FAILURE_MSG
  call print

good:
  mov si, LOADOK 
  call print
  jmp 0x500

In case loading the sector failed, you jump to fail, but after displaying the message you happily continue (fall through) with the code at good. You need to halt:

fail:
    mov  si, FAILURE_MSG
    call print
theEnd:
    cli
    hlt
    jmp  theEnd
good:
    mov  si, LOADOK
    call print
    jmp  0x07C0:0x0500

[EDIT1]

xor ax, ax   ; Null segments AX, DS, and ES
mov ds, ax
mov es, ax

You have applied corrections to the first stage bootloader. Control has successfully passed to 0x07C0:0x0500. Because the second stage uses an org 0x500 it is vital that the segment registers (at least DS) remain at 0x07C0. But the first thing I see is that you reload DS and ES with 0. This is not going to work since it creates a mismatch between the offsets (in accordance with the org) generated by the assembler and the offsets (relative DS) where the actual data resides.

The first manifestation of this mismatch is with the lgdt [toc] instruction.
All the data from the second stage resides in memory above the 0x8100 mark (0x7C00 + 0x0500).
The toc label on the other hand will have been translated by the assembler to an offset address of little over 0x0500. With DS=0, this addresses memory much lower than that 0x8100 mark. There's simply no valid data to act upon, hence the crash (or similar)!

The problem is not only with the lgdt [toc] instruction. mov si, msg1 and mov si, msg2 will fail in the same way, and mov ebx, MSGHIDDEN and dd gdt_data will fail on not being linear addresses at all. dw gdt_end - gdt_data - 1 is not impaired since the difference does not change when both magnitudes are wrong (in the same manner).

What you should do is either keep the segment registers at 0x07C0 in accordance with the org 0x0500, or much better zero the segment registers already in the first stage bootloader and use an org 0x7C00 for the first stage and an org 0x8100 for the second stage. These settings will leave everything at the same place in memory, with a 768-bytes gap between the first and second stages which is something we don't see everyday.
The much preferred way however is to zero the segment registers already in the first stage bootloader and use an org 0x7C00 for the first stage and an org 0x0600 for the second stage. This puts the second stage right behind the low memory BIOS variables. MS-DOS being our great example.

A further review

installGDT:
    cli                     ; Clear interrupts
    pusha                   ; Save the registers
    lgdt [toc]              ; Load GDT into GDTR
    sti                     ; Re-enable interrupts
    popa                    ; Restore registers
    ret                     ; Return!

It is redundant to preserve the general purpose registers in this code. The lgdt instruction doesn't change any of them.


; Create the stack(0x0000-0xFFFF).
mov ax, 0x0000
mov ss, ax        ; Point SS to 0x0000
mov sp, 0xFFFE    ; Stack pointer at 0xFFFE
mov ax, 0x9000    ; Stack begins at 0x9000-0xFFFF
mov ss, ax
mov sp, 0xFFFF    ; Stack pointer is 0xFFFF
mov ax, 0x10      ; Setup data segments to 0x10(data selector)
mov ds, ax
mov ss, ax
mov es, ax
mov esp, 90000h   ; Stack begins from 90000h

(1) Don't use an odd value for SP.
(2) Don't put anything between loading SS and ESP.
(3) Don't use the word "begin" both for the low end and the high end of the stack.

You are setting up the stack 3 times and each time in a different place and size! The first time it runs from 0x00000000 to 0x0000FFFD, the second time it runs from 0x00090000 to 0x0009FFFE, and the third time it runs from 0x00000000 to 0x0008FFFF.
I would advice to setup the stack such that the real mode addresses correspond to the protected mode addresses. At least for the top 64KB.
In the first stage use:

mov  ax, 0x8000
mov  ss, ax
xor  sp, sp           ; 0x00080000 - 0x0008FFFF (64KB)

In the second stage use:

mov  ax, DATA_DESC
mov  ss, ax
mov  esp, 0x00090000  ; 0x00000000 - 0x0008FFFF (576KB)

The first stage

bits 16
org  0x7C00

jmp  main

; args: SI
print:
    lodsb
    or   al, al
    jz   donePrint
    mov  bx, 0007h
    mov  ah, 0Eh
    int  10h
    jmp  print
donePrint:
    ret

main:
    cli
    xor  ax, ax
    mov  ds, ax
    mov  es, ax
    mov  fs, ax
    mov  gs, ax
    mov  ax, 0x8000    ; Stack between 0x8000:0x0000
    mov  ss, ax        ;           and 0x8000:0xFFFF (64KB)
    xor  sp, sp
    sti

    mov  si, LOADING
    call print

readSector:
    mov  dh, 0
    mov  cx, 0002h
    mov  bx, 0x0600     ; Sector buffer at 0x0000:0x0600
    mov  ax, 0201h
    int  13h
    jnc  good
    
fail:
    mov  si, FAILURE_MSG
    call print
end:
    cli
    hlt
    jmp  end
    
good:
    mov  si, LOADOK 
    call print
    jmp  0x0000:0x0600  ; Start second stage

LOADING     db 13, 10, "Booting loader...", 13, 10, 0
FAILURE_MSG db 13, 10, "ERROR: Press any key to reboot.", 10, 0
LOADOK      db 13, 10, "load ok", 10, 0

TIMES 510 - ($-$$) DB 0
DW 0xAA55

The second stage

bits 16
org  0x0600

jmp  main
; ---------------------------------------
; Includes
; ---------------------------------------
%include "include/stdio.inc"
%include "include/gdt.inc"
%include "include/a20.inc"
; ---------------------------------------
; Data and strings
; ---------------------------------------
stringhidden db "Not showing string.", 0x0D, 0x0A, 0x00
stringhidden db "Not showing string.", 0x0D, 0x0A, 0x00
; ---------------------------------------
; main - 16-bit entry point
; Installing GDT, storing BIOS info, and enabling protected mode
; ---------------------------------------

main:
    call installGDT
    call enableA20_KKbrd_Out
    mov  si, msg1
    call print16
    mov  si, msg2
    call print16

    ; Enter protected mode
    cli
    mov  eax, cr0
    or   eax, 1
    mov  cr0, eax

    jmp  CODE_DESC:main32 ; Far jump to fix CS
    
    ; We can't re-enable interrupts because that would triple-fault. This will be fixed in main32.


bits 32                   ; We are now 32 bit!

%include "include/stdio32.inc"

main32:
    ; Set registers up
    mov  ax, DATA_DESC
    mov  ds, ax
    mov  es, ax
    mov  fs, ax
    mov  gs, ax
    mov  ss, ax           ; Stack between 0x00000000
    mov  esp, 0x00090000  ;           and 0x0008FFFF (576KB)

    call clear32          ; Clear screen
    mov  ebx, MSGHIDDEN
    call puts32           ; Call puts32 to print

    cli
    hlt

    ...

[EDIT2]

@Sep Roland Your changes mostly work, but unfortunately there might be something wrong with my video code as well.. The system appears to stop typing after a little bit(24 bytes) and just gives up. I posted video code, but if this seems a like too much to ask I can stop here. Thank you!

I reviewed your stdio32.inc and have found a number of errors in it!

  • The char32 code mentions in a comment "; Mode 7 has 2 bytes per character..." If indeed you are working on the monochrome video mode 7 then VIDEO_MEMORY should be set to 0xB0000 instead of 0xB8000.
  • The clear32 code moves 2000 in CX but rep stosw will be using ECX. The garbage in the high word of ECX can do a lot of harm. Use mov ecx, 2000.
  • The movecursor code receives X in BH and Y in BL, which is opposite of what I would have expected, you are calculating X * 80 + Y which needs to be Y * 80 + X, the forementioned addition uses the wrong size, and you out twice the low byte instead of doing low byte then high byte.

The improved stdio32.inc

; ==================================================
; stdio32.inc - Handles 32-bit graphics
; ==================================================

%ifndef __GFX_INC_67343546FDCC56AAB872_INCLUDED__
%define __GFX_INC_67343546FDCC56AAB872_INCLUDED__

bits 32

%define VIDEO_MEMORY 0xB8000  ; Video memory address
%define COLS 80               ; Width of the screen
%define LINES 25              ; Height of the screen
%define ATTRIB 0x3F           ; WhiteOnCyan
_CurrentXPos db 0
_CurrentYPos db 0

; ---------------------------------------------------------
; char32 - Print a character to the screen(32-bit)
;   BL - Character to print
; ---------------------------------------------------------

char32:
    cmp   bl, 10
    je    .row
    push  eax
    push  edi
    movzx edi, byte [_CurrentYPos]
    imul  edi, COLS*2
    movzx eax, byte [_CurrentXPos]
    lea   edi, [VIDEO_MEMORY + edi + eax * 2]
    mov   al, bl
    mov   ah, ATTRIB
    mov   [edi], ax
    pop   edi
    pop   eax
    inc   byte [_CurrentXPos]
    cmp   byte [_CurrentXPos], COLS               ; EOL?
    je    .row
    ret
.row:
    mov   byte [_CurrentXPos], 0
    inc   byte [_CurrentYPos]
    ret

; ---------------------------------------------------------
; puts32 - print a null terminated string
;   EBX - String to print
; ---------------------------------------------------------

puts32:

    push  ebx
    push  edi
    mov   edi, ebx
    jmp   .start
.loop:
    call  char32
    inc   edi
.start:
    mov   bl, [edi]
    test  bl, bl
    jnz   .loop
    movzx ebx, word [_CurrentXPos] ; Load XPos and YPos together!
    call  movecursor               ; Update hardware cursor
    pop   edi
    pop   ebx
    ret

; ---------------------------------------------------------
; movecursor - Move the cursor to an X and Y position
;   BL - X position
;   BH - Y position
;   BH and BL are relative to the current position on screen
; ---------------------------------------------------------

movecursor:
    pushad
    movzx eax, bh             ; BH * COLS + BL
    imul  eax, COLS
    movzx ebx, bl
    add   ebx, eax
    ; Set low byte index to VGA register
    mov   al, 0x0F
    mov   dx, 0x03D4
    out   dx, al
    mov   al, bl
    inc   dx
    out   dx, al
    ; Do the same but for high byte
    mov   al, 0x0E
    dec   dx
    out   dx, al
    mov   al, bh
    inc   dx
    out   dx, al
    popad
    ret

; ---------------------------------------------------------
; clear32 - clearing the screen
; ---------------------------------------------------------

clear32:
    pushad
    mov   edi, VIDEO_MEMORY
    mov   ecx, 1000            ; 2000 words
    mov   eax, ((ATTRIB * 256 + 32) * 256 + ATTRIB) * 256 + 32
    rep stosd
    mov   [_CurrentXPos], cx   ; Reset XPos and YPos together!
    popad
    ret

; ---------------------------------------------------------
; gotoxy - Set X and Y position
;   AL - X position
;   AH - Y position
; ---------------------------------------------------------

gotoxy:
    mov   [_CurrentXPos], ax   ; Set XPos and YPos together!
    ret

%endif ;__STDIO_INC_67343546FDCC56AAB872_INCLUDED__

While optimizing the code, it has become much shorter.
What I am thinking now is that your code had gotten longer than 512 bytes and that the single-sector load from the first stage did not bring all of it into memory. That could certainly explain some partial string output that you are experiencing.

I never remember (and my manual doesn't mention it), but for NASM is pusha the same as pushad when bits 32 is active?
I deal with it this way:

  • Always writing pushad in 32-bit code.
  • Avoiding pushad and preferring to push individual registers as this is faster.
Sep Roland
  • 33,889
  • 7
  • 43
  • 76
  • 2
    As a matter of practice, I find it best to start the boot block with a jump to `07c0:xxxx` or `0000:7cxx` and set all the other segment registers accordingly. Different BIOSes always do different weird things, and you can never assume what you've been given. (for that matter, there have been BIOSes in the past that didn't even set `dl` correctly) – sj95126 Oct 23 '22 at 15:48
  • 2
    @sj95126 : not setting CS will only affect a small subset of instructions that use indirect near CALL/JMP to absolute memory offsets. It's not often something you see in a bootloader. If you know you aren't using any such instructions it is safe to not set CS, especially if you are running low of space in your bootloader. If space isn't a problem then setting CS explicitly isn't really any issue at all. I had taken a variety of questions from other sites and consolidated them in a self Q&A here: https://stackoverflow.com/questions/34548325/near-call-jump-tables-dont-always-work-in-a-bootloader – Michael Petch Oct 23 '22 at 17:01
  • This is an incredible answer, I'm attempting these changes now – Tech Oct 25 '22 at 23:32
  • Hi! Thank you for your answer, but it's unfortunately not working. I updated the code(check original question), but it's still bootlooping. What should I do? The 2nd stage code is located at 0x200 and uses `org 0x500` as an instruction if that helps or makes a difference. – Tech Oct 25 '22 at 23:41
  • 1
    Sep you might want to check out the addition of his Stage2 code which was recently added. His Stage2 expects to be loaded at 0x0000:0x0500. I recommended in the comment under the question that he change to using `org 0x7c00`, use 0 for DS and ES segments and modify his JMP 0x07c0:0x0500 to 0x0000:0x0500. He says those changes worked. You might consider updating your answer. – Michael Petch Oct 26 '22 at 23:21
  • Thank you, I will try this when I get the chance! – Tech Nov 01 '22 at 20:33
  • @Sep Roland Your changes mostly work, but unfortunately there might be something wrong with my video code as well.. The system appears to stop typing after a little bit(24 bytes) and just gives up. I posted video code, but if this seems a like too much to ask I can stop here. Thank you! – Tech Nov 02 '22 at 21:57
  • @Tech Then perhaps post the code for *print16*, *clear32*, and *puts32*. I'll take a quick look. – Sep Roland Nov 02 '22 at 22:00
  • 1
    NASM does treat `pusha`, much like `pushf`, as defaulting to the word size of the segment. Ie `pusha` in a `bits 32` part is `pushad`, `pusha` in a `bits 16` part is `pushaw`. – ecm Nov 03 '22 at 17:43
  • @ecm That is the same as FASM then. Thanks for your quick reply. – Sep Roland Nov 03 '22 at 17:51
  • Wow, it works! Thank you so much to everyone who helped me on this question! Everything looks good and it's now printing more than 20 chars. Accepting this answer now – Tech Nov 04 '22 at 14:19
  • Oh, and I'm quite sure that the single sector load was the problem, as it definitely still doesn't have a lot of space. – Tech Nov 04 '22 at 21:02