1

I'm developing my own bootloader + kernel. I create a project and placed it on github: https://github.com/rprata/ubootlua (branch tmp-libc-implemenation)

I tried to run my boot.bin using QEMU:

qemu-system-i386 -fda boot.bin -nographic -serial stdio -monitor none

However a crash happens:

> qemu-system-i386 -fda ./deploy/boot.bin -nographic -serial stdio -monitor none
> WARNING: Image format was not specified for './deploy/boot.bin' and probing guessed raw.
>         Automatically detecting the format is dangerous for raw images, write operations on block 0 will be restricted.
>         Specify the 'raw' format explicitly to remove the restrictions.
> qemu: fatal: Trying to execute code outside RAM or ROM at 0x000a0000
> 
> EAX=00000055 EBX=00018eb4 ECX=00018eb3 EDX=00000000
ESI=00000001 EDI=00000000 EBP=00016058 ESP=00015f94
EIP=0009ffae EFL=00000896 [-OS-AP-] CPL=0 II=0 A20=1 SMM=0 HLT=0
ES =0010 00000000 ffffffff 00cf9300 DPL=0 DS   [-WA]
CS =0008 00000000 ffffffff 00cf9a00 DPL=0 CS32 [-R-]
SS =0010 00000000 ffffffff 00cf9300 DPL=0 DS   [-WA]
DS =0010 00000000 ffffffff 00cf9300 DPL=0 DS   [-WA]
FS =0010 00000000 ffffffff 00cf9300 DPL=0 DS   [-WA]
GS =0010 00000000 ffffffff 00cf9300 DPL=0 DS   [-WA]
LDT=0000 00000000 0000ffff 00008200 DPL=0 LDT
TR =0000 00000000 0000ffff 00008b00 DPL=0 TSS32-busy
GDT=     00007c36 00000018
IDT=     00000000 000003ff
CR0=00000011 CR2=00000000 CR3=00000000 CR4=00000000
DR0=00000000 DR1=00000000 DR2=00000000 DR3=00000000 
DR6=ffff0ff0 DR7=00000400
CCS=00000055 CCD=000000d1 CCO=ADDB    
EFER=0000000000000000
FCW=037f FSW=0000 [ST=0] FTW=00 MXCSR=00001f80
FPR0=0000000000000000 0000 FPR1=0000000000000000 0000
FPR2=0000000000000000 0000 FPR3=0000000000000000 0000
FPR4=0000000000000000 0000 FPR5=0000000000000000 0000
FPR6=0000000000000000 0000 FPR7=0000000000000000 0000
XMM00=00000000000000000000000000000000 XMM01=00000000000000000000000000000000
XMM02=00000000000000000000000000000000 XMM03=00000000000000000000000000000000
XMM04=00000000000000000000000000000000 XMM05=00000000000000000000000000000000
XMM06=00000000000000000000000000000000 XMM07=00000000000000000000000000000000
> makefile:26: recipe for target 'run' failed
> make: *** [run] Aborted (core dumped)

My boot.asm and linker.ld:

section .boot
bits 16                     ; We're working at 16-bit mode here
global boot

boot:
    mov ax, 0x2401          
    int 0x15                ; Enable A20 bit 

    mov ax, 0x3             ; Set VGA text mode 3
    int 0x10                ; Otherwise, call interrupt for printing the char   

    mov [disk],dl

    mov ah, 0x2             ;read sectors
    mov al, 60              ;sectors to read
    mov ch, 0               ;cylinder idx
    mov dh, 0               ;head idx
    mov cl, 2               ;sector idx
    mov dl, [disk]          ;disk idx
    mov bx, copy_target     ;target pointer
    int 0x13

    cli                     ; Disable the interrupts
    lgdt [gdt_pointer]      ; Load the gdt table
    mov eax, cr0            ; Init swap cr0...
    or eax,0x1              ; Set the protected mode bit on special CPU reg cr0
    mov cr0, eax
    jmp CODE_SEG:boot32     ; Long jump to the code segment


; base a 32 bit value describing where the segment begins
; limit a 20 bit value describing where the segment ends, can be multiplied by 4096 if granularity = 1
; present must be 1 for the entry to be valid
; ring level an int between 0-3 indicating the kernel Ring Level
; direction:
;  > 0 = segment grows up from base, 1 = segment grows down for a data segment
;  > 0 = can only execute from ring level, 1 = prevent jumping to higher ring levels
; read/write if you can read/write to this segment
; accessed if the CPU has accessed this segment
; granularity 0 = limit is in 1 byte blocks, 1 = limit is multiples of 4KB blocks
; size 0 = 16 bit mode, 1 = 32 bit protected mode
gdt_start:
    dq 0x0
gdt_code:
    dw 0xFFFF
    dw 0x0
    db 0x0
    db 10011010b
    db 11001111b
    db 0x0
gdt_data:
    dw 0xFFFF
    dw 0x0
    db 0x0
    db 10010010b
    db 11001111b
    db 0x0
gdt_end:
gdt_pointer:
    dw gdt_end - gdt_start
    dd gdt_start
disk:
    db 0x0

CODE_SEG equ gdt_code - gdt_start
DATA_SEG equ gdt_data - gdt_start

;; Magic numbers
times 510 - ($ - $$) db 0

dw 0xaa55
copy_target:
bits 32
    msg:    db "Hello, World more than 512 bytes!", 0

boot32:
    mov ax, DATA_SEG
    mov ds, ax
    mov es, ax
    mov fs, ax
    mov gs, ax
    mov ss, ax  
    ;mov esi, msg            ; SI now points to our message
    ;mov ebx, 0xb8000       ; vga memory position (0) 

.loop   lodsb               ; Loads SI into AL and increments SI [next char]
    or al, al               ; Checks if the end of the string
    jz halt                 ; Jump to halt if the end
    or eax,0x0200           ; The top byte defines the character colour in the buffer as an int value from 0-15 with 0 = black, 1 = blue and 15 = white. 
                            ; The bottom byte defines an ASCII code point
    mov word [ebx], ax      
    add ebx, 2              
    jmp .loop               ; Next iteration of the loop

halt:   
    mov esp, kernel_stack_top
    extern __start
    call __start
    cli
    hlt                     ; CPU command to halt the execution

section .bss
align 4
kernel_stack_bottom: equ $
    resb 16384 ; 16 KB
kernel_stack_top:

    ENTRY(boot)
    OUTPUT_FORMAT("binary")
    SECTIONS {
        . = 0x7c00;
        .text :
        {
            *(.boot)
            *(.text)
        }

        .rodata :
        {
            *(.rodata)
        }

        .data :
        {
            *(.data)
        }

        .bss :
        {
            *(.bss)
        }
    }

The relevant part of my makefile is:

NASM:=nasm
CC:=gcc
SRC_NASM:=./src/init/boot.asm
SRC_C:=./src/init/boot.c ./src/init/init.c ./src/init/version.c
LINKER:=./src/init/linker.ld
DEPLOY=./deploy
BUILD:=./build
BIN:=$(DEPLOY)/boot.bin
OBJ_NASM:=$(BUILD)/boot.o
CFLAGS:=-Wall -Werror -m32 -fno-pie -ffreestanding -mno-red-zone -fno-exceptions -nostdlib -I./src/include
LDFLAGS:=

export ARCH:=i386
export ZLIB_SUPPORT:=false

DEPENDENCIES:=libc
ifeq ($(ZLIB_SUPPORT),true)
DEPENDENCIES:=$(DEPENDENCIES) zlib
endif

all: $(DEPENDENCIES)
    mkdir -p $(DEPLOY)
    mkdir -p $(BUILD)
    $(NASM) $(SRC_NASM) -f elf32 -o $(OBJ_NASM)
    $(CC) $(SRC_C) $(OBJ_NASM) -o $(BIN) $(CFLAGS) -T $(LINKER) $(LDFLAGS)

run:
    qemu-system-i386 -fda $(BIN) -nographic -serial stdio -monitor none

Why is it failing this way and how can I fix it?

Michael Petch
  • 46,082
  • 8
  • 107
  • 198
Renan Prata
  • 49
  • 1
  • 6
  • 2
    Your loop copies whatever data ESI points to wherever EBX points to. ESI is probably initialized to 0 under qemu and you set BX to `copy_target` so the loop would overwrite itself with the IVT. btw. you're assuming that segment registers are set to 0 at entry to your bootsector, but this is only true for emulators like qemu. It won't work on real hardware. – Ross Ridge Mar 26 '19 at 16:58

2 Answers2

5

The primary issue is that you aren't reading your entire kernel into memory. Your code eventually ends up executing uninitialised memory (most likely filled with zeroes), reaches the Extended BIOS Data Area (just below video memory at 0xa0000), and then eventually starts executing video memory at 0xa0000. QEMU doesn't permit executing video memory, thus the source of the error you get.

Fixing this isn't as easy as it may first seem. Your code on my system was about 47300 bytes. 1 sector for the MBR and 92 for the kernel. The first problem is that not all hardware (and emulators) can read 92 sectors at once. QEMU and BOCHs max out at 72 for floppy drives and 128 for hard drives. This number can be smaller for some hardware (as low as the number of sectors per track).

Some hardware will not read sectors:

  • That extend beyond a 64KiB segment limit.
  • That span more than one track. Not all BIOSes support multi-track reads and writes. QEMU and BOCHS do support them.
  • If the BIOS uses Direct Memory Access (DMA) transfers for disk access you may not be able to write a number of sectors that traverses a 64KiB boundary (in physical memory). This means you can't guarantee a write is successful if it starts before physical address 0x10000 and ends after. Same for 0x20000, 0x30000, 0x40000 ... 0x90000. QEMU and BOCHS do not allow disk transfers across such boundaries.

A simple hack to load a kernel up to 64KiB with BOCHS and QEMU is to read 64 sectors (32KiB) to physical address 0x0000:0x8000 and then do a second copy of 64 sectors to 0x1000:0x0000. You could read a bigger kernel by reading additional 32KiB chunks. the 512 bytes between 0x0000:0x7e00 and 0x0000:0x8000 would be unused. The only real catch is determining the Cylinder Head Sector (CHS) values1 to use for the Int 21h/AH=02 disk reads.

Other issues:

  • When reading disk sectors into memory you should set the stack (SS:SP) to a location that you won't inadvertently overwrite. If you load the kernel after the bootloader a good location is SS:SP 0x0000:0x7c000 below the bootloader. To avoid interrupts occurring while setting SS:SP, set SP in the instruction immediately following the instruction that loads SS.
  • Never rely on the value of any general purpose register or segment register containing the value you expect. DL is an exception since in almost all cases on modern hardware it will contain the boot drive number. See my bootloader tips for more information.
  • QEMU and other emulators may not read sectors that don't exist in the file. If you read more sectors than what is in the disk image the read sector may fail. To get around this create a disk image (a 1.44MiB floppy image is convenient) and copy the contents of the kernel and bootloader to the beginning of the file without truncating the disk image. DD can be used for this purpose.
  • To aid debugging rather than have your linker script output as binary, have it default to outputting in ELF. Use OBJCOPY to copy the ELF file to a binary file. The ELF file can be used to store debug information. This is useful if using QEMU and GDB as a remote debugger.
  • You can't rely on memory containing zeroes. GCC requires the .bss section be zeroed filled. Use a linker script to determine the extents of the .bss section and zero out the memory prior to calling your C entry point.
  • Before calling the C entry point, GCC requires the Direction Flag (DF) be cleared so that string instructions default to forward movement.
  • In your makefile you use GCC to do linking. If not using a cross compiler GCC may generate a special section called .note.gnu.build-id that can interfere with your linker script. To fix this you can tell GCC to suppress this special section with LDFLAGS:=-Wl,--build-id=none. If you linked with LD directly this section wouldn't be created.

Taking all these changes into account:

linker.ld:

ENTRY(boot)
SECTIONS {
    . = 0x7c00;
    .boot :
    {
        *(.boot)
    }
    /* Place kernel right after boot sector on disk but set the
     * VMA (ORiGin point) to 0x8000 */
    . = 0x8000;
    __kernel_start = .;
    __kernel_start_seg = __kernel_start >> 4;
    .text : AT(0x7e00)
    {
        *(.text.start)
        *(.text*)
    }
    .rodata :
    {
        *(.rodata*)
    }
    .data :
    {
        *(.data)
    }
    /* Compute number of sectors that the kernel uses */
    __kernel_end = .;
    __kernel_size_sectors = (__kernel_end - __kernel_start + 511) / 512;

    .bss :
    {
        __bss_start = .;
        *(COMMON)
        *(.bss)
        . = ALIGN(4);
        __bss_end = .;
        /* Compute number of DWORDS that BSS section uses */
        __bss_sizel = (__bss_end - __bss_start) / 4;
    }
}

boot.asm:

section .boot
bits 16                     ; We're working at 16-bit mode here
global boot

boot:
    xor ax, ax
    mov ds, ax
    mov ss, ax
    mov sp, 0x7c00          ; Set SS:SP just below bootloader

    cld                     ; DF=0 : string instruction forward movement
    mov ax, 0x2401
    int 0x15                ; Enable A20 bit

    mov ax, 0x3             ; Set VGA text mode 3
    int 0x10                ; Otherwise, call interrupt for printing the char

    mov [disk],dl

    ; Read 64 sectors from LBA 1, CHS=0,0,2 to address 0x0800:0
    mov ax, 0x0800
    mov es, ax              ;ES = 0x800

    mov ah, 0x2             ;read sectors
    mov al, 64              ;sectors to read
    mov ch, 0               ;cylinder idx
    mov dh, 0               ;head idx
    mov cl, 2               ;sector idx
    mov dl, [disk]          ;disk idx
    mov bx, 0               ;target pointer, ES:BX=0x0800:0x0000
    int 0x13

    ; Read 64 sectors from LBA 65, CHS=1,1,12 to address 0x1000:0
    mov ax, 0x1000
    mov es, ax              ;ES=0x1000

    mov ah, 0x2             ;read sectors
    mov al, 64              ;sectors to read
    mov ch, 1               ;cylinder idx
    mov dh, 1               ;head idx
    mov cl, 12              ;sector idx
    mov dl, [disk]          ;disk idx
    mov bx, 0x0000          ;target pointer, ES:BX=0x1000:0x0000
    int 0x13

    cli                     ; Disable the interrupts
    lgdt [gdt_pointer]      ; Load the gdt table
    mov eax, cr0            ; Init swap cr0...
    or eax,0x1              ; Set the protected mode bit on special CPU reg cr0
    mov cr0, eax
    jmp CODE_SEG:boot32     ; Long jump to the code segment


; base a 32 bit value describing where the segment begins
; limit a 20 bit value describing where the segment ends, can be multiplied by 4096
; if granularity = 1
; present must be 1 for the entry to be valid
; ring level an int between 0-3 indicating the kernel Ring Level
; direction:
;  > 0 = segment grows up from base, 1 = segment grows down for a data segment
;  > 0 = can only execute from ring level, 1 = prevent jumping to higher ring levels
; read/write if you can read/write to this segment
; accessed if the CPU has accessed this segment
; granularity 0 = limit is in 1 byte blocks, 1 = limit is multiples of 4KB blocks
; size 0 = 16 bit mode, 1 = 32 bit protected mode
gdt_start:
    dq 0x0
gdt_code:
    dw 0xFFFF
    dw 0x0
    db 0x0
    db 10011010b
    db 11001111b
    db 0x0
gdt_data:
    dw 0xFFFF
    dw 0x0
    db 0x0
    db 10010010b
    db 11001111b
    db 0x0
gdt_end:
gdt_pointer:
    dw gdt_end - gdt_start
    dd gdt_start
disk:
    db 0x0

CODE_SEG equ gdt_code - gdt_start
DATA_SEG equ gdt_data - gdt_start

;; Magic numbers
times 510 - ($ - $$) db 0
dw 0xaa55

section .data
msg: db "Hello, World more than 512 bytes!", 0

bits 32
section .text.start
boot32:
    mov ax, DATA_SEG
    mov ds, ax
    mov es, ax
    mov fs, ax
    mov gs, ax
    mov ss, ax
    mov esi, msg        ; SI now points to our message
    mov ebx, 0xb8000    ; vga memory position (0)

.loop:
    lodsb               ; Loads SI into AL and increments SI [next char]
    or al, al           ; Checks if the end of the string
    jz halt             ; Jump to halt if the end
    or eax,0x0200       ; The top byte defines the character colour in the buffer as
                        ; an int value from 0-15 with 0 = black, 1 = blue and 15 = white.
                        ; The bottom byte defines an ASCII code point
    mov word [ebx], ax
    add ebx, 2
    jmp .loop           ; Next iteration of the loop

halt:
    mov esp, kernel_stack_top
    extern __start
    extern __bss_start
    extern __bss_sizel

    ; Zero the BSS section
    mov ecx, __bss_sizel
    mov edi, __bss_start
    xor eax, eax
    rep stosd

    ; Call C entry point
    call __start
    cli
    hlt                 ; CPU command to halt the execution

section .bss
align 4
kernel_stack_bottom:
    resb 16384          ; 16 KB stack
kernel_stack_top:

Modify makefile by adding these make variables:

OC:=objcopy
DD:=dd
ELF:=$(DEPLOY)/boot.elf

Modify makefile by changing LDFLAGS to:

LDFLAGS:=-Wl,--build-id=none

Modify makefile by changing the all rule to:

all: $(DEPENDENCIES)
        mkdir -p $(DEPLOY)
        mkdir -p $(BUILD)
        $(NASM) $(SRC_NASM) -f elf32 -o $(OBJ_NASM)
        $(CC) $(SRC_C) $(OBJ_NASM) -o $(ELF) $(CFLAGS) -T $(LINKER) $(LDFLAGS)
        $(OC) -O binary $(ELF) $(BIN)
        $(DD) if=/dev/zero of=$(BIN).tmp count=1440 bs=1024
        $(DD) if=$(BIN) of=$(BIN).tmp conv=notrunc
        mv $(BIN).tmp $(BIN)

Alternative Solution

Given there are many ways that reading with Int 13/AH=2 can fail, one can avoid most of the issues by reading one sector at a time and always reading to a memory location evenly divisible by 512.

When using a linker script to build a bootloader along side the kernel you can use the linker to determine the size of the kernel and compute the number of sectors needed to be read.

A revision of the previous code above that could do the needed job could be as follows.

linker.ld

ENTRY(boot)
SECTIONS {
    . = 0x7c00;
    .boot :
    {
        *(.boot)
    }
    __kernel_start = .;
    __kernel_start_seg = __kernel_start >> 4;
    .text :
    {
        *(.text.start)
        *(.text*)
    }
    .rodata :
    {
        *(.rodata*)
    }
    .data :
    {
        *(.data)
    }
    /* Compute number of sectors that the kernel uses */
    __kernel_end = .;
    __kernel_size_sectors = (__kernel_end - __kernel_start + 511) / 512;

    .bss :
    {
        __bss_start = .;
        *(COMMON)
        *(.bss)
        . = ALIGN(4);
        __bss_end = .;
        /* Compute number of DWORDS that BSS section uses */
        __bss_sizel = (__bss_end - __bss_start) / 4;
    }
}

The main difference is that this linker script starts loading the kernel into physical memory at 0x07e00 instead of 0x08000. A more refined boot.asm can use the values generated by the linker to loop through the needed sectors reading them one at a time until complete:

extern __kernel_size_sectors    ; Size of kernel in 512 byte sectors
extern __kernel_start_seg       ; Segment start of kernel will be laoded at

global boot

STAGE2_LBA_START equ 1          ; Logical Block Address(LBA) Stage2 starts on
                                ;     LBA 1 = sector after boot sector
                                ; Logical Block Address(LBA) Stage2 ends at
STAGE2_LBA_END   equ STAGE2_LBA_START + __kernel_size_sectors
DISK_RETRIES     equ 3          ; Number of times to retry on disk error

bits 16
section .boot

boot:
; Include a BPB (1.44MB floppy with FAT12) to be more compatible with USB floppy media
;%include "src/init/bpb.inc"

boot_start:
    xor ax, ax                  ; DS=SS=ES=0 for stage2 loading
    mov ds, ax
    mov ss, ax                  ; Stack at 0x0000:0x7c00
    mov sp, 0x7c00
    cld                         ; Set string instructions to use forward movement

    ; Read Stage2 1 sector at a time until stage2 is completely loaded
load_stage2:
    mov [bootDevice], dl        ; Save boot drive
    mov di, __kernel_start_seg  ; DI = Current segment to read into
    mov si, STAGE2_LBA_START    ; SI = LBA that stage2 starts at
    jmp .chk_for_last_lba       ; Check to see if we are last sector in stage2

.read_sector_loop:
    mov bp, DISK_RETRIES        ; Set disk retry count

    call lba_to_chs             ; Convert current LBA to CHS
    mov es, di                  ; Set ES to current segment number to read into
    xor bx, bx                  ; Offset zero in segment

.retry:
    mov ax, 0x0201              ; Call function 0x02 of int 13h (read sectors)
                                ;     AL = 1 = Sectors to read
    int 0x13                    ; BIOS Disk interrupt call
    jc .disk_error              ; If CF set then disk error

.success:
    add di, 512>>4              ; Advance to next 512 byte segment (0x20*16=512)
    inc si                      ; Next LBA

.chk_for_last_lba:
    cmp si, STAGE2_LBA_END      ; Have we reached the last stage2 sector?
    jl .read_sector_loop        ;     If we haven't then read next sector

.stage2_loaded:
    jmp stage2                  ; Jump to second stage

.disk_error:
    xor ah, ah                  ; Int13h/AH=0 is drive reset
    int 0x13
    dec bp                      ; Decrease retry count
    jge .retry                  ; If retry count not exceeded then try again

error_end:
    ; Unrecoverable error; print drive error; enter infinite loop
    mov si, diskErrorMsg        ; Display disk error message
    call print_string
    cli
.error_loop:
    hlt
    jmp .error_loop

; Function: print_string
;           Display a string to the console on display page 0
;
; Inputs:   SI = Offset of address to print
; Clobbers: AX, BX, SI

print_string:
    mov ah, 0x0e                ; BIOS tty Print
    xor bx, bx                  ; Set display page to 0 (BL)
    jmp .getch
.repeat:
    int 0x10                    ; print character
.getch:
    lodsb                       ; Get character from string
    test al,al                  ; Have we reached end of string?
    jnz .repeat                 ;     if not process next character
.end:
    ret

;    Function: lba_to_chs
; Description: Translate Logical block address to CHS (Cylinder, Head, Sector).
;              Works for all valid FAT12 compatible disk geometries.
;
;   Resources: http://www.ctyme.com/intr/rb-0607.htm
;              https://en.wikipedia.org/wiki/Logical_block_addressing#CHS_conversion
;              https://stackoverflow.com/q/45434899/3857942
;              Sector    = (LBA mod SPT) + 1
;              Head      = (LBA / SPT) mod HEADS
;              Cylinder  = (LBA / SPT) / HEADS
;
;      Inputs: SI = LBA
;     Outputs: DL = Boot Drive Number
;              DH = Head
;              CH = Cylinder (lower 8 bits of 10-bit cylinder)
;              CL = Sector/Cylinder
;                   Upper 2 bits of 10-bit Cylinders in upper 2 bits of CL
;                   Sector in lower 6 bits of CL
;
;       Notes: Output registers match expectation of Int 13h/AH=2 inputs
;
lba_to_chs:
    push ax                     ; Preserve AX
    mov ax, si                  ; Copy LBA to AX
    xor dx, dx                  ; Upper 16-bit of 32-bit value set to 0 for DIV
    div word [sectorsPerTrack]  ; 32-bit by 16-bit DIV : LBA / SPT
    mov cl, dl                  ; CL = S = LBA mod SPT
    inc cl                      ; CL = S = (LBA mod SPT) + 1
    xor dx, dx                  ; Upper 16-bit of 32-bit value set to 0 for DIV
    div word [numHeads]         ; 32-bit by 16-bit DIV : (LBA / SPT) / HEADS
    mov dh, dl                  ; DH = H = (LBA / SPT) mod HEADS
    mov dl, [bootDevice]        ; boot device, not necessary to set but convenient
    mov ch, al                  ; CH = C(lower 8 bits) = (LBA / SPT) / HEADS
    shl ah, 6                   ; Store upper 2 bits of 10-bit Cylinder into
    or  cl, ah                  ;     upper 2 bits of Sector (CL)
    pop ax                      ; Restore scratch registers
    ret

; Uncomment these lines if not using a BPB (via bpb.inc)
%ifndef WITH_BPB
numHeads:        dw 2           ; 1.44MB Floppy has 2 heads & 18 sector per track
sectorsPerTrack: dw 18
%endif

bootDevice:      db 0x00
diskErrorMsg:    db "Unrecoverable disk error!", 0

; Pad boot sector to 510 bytes and add 2 byte boot signature for 512 total bytes
TIMES 510-($-$$) db  0
dw 0xaa55

section .data
msg: db "Hello, World more than 512 bytes!", 0

; base a 32 bit value describing where the segment begins
; limit a 20 bit value describing where the segment ends, can be multiplied by 4096
; if granularity = 1
; present must be 1 for the entry to be valid
; ring level an int between 0-3 indicating the kernel Ring Level
; direction:
;  > 0 = segment grows up from base, 1 = segment grows down for a data segment
;  > 0 = can only execute from ring level, 1 = prevent jumping to higher ring levels
; read/write if you can read/write to this segment
; accessed if the CPU has accessed this segment
; granularity 0 = limit is in 1 byte blocks, 1 = limit is multiples of 4KB blocks
; size 0 = 16 bit mode, 1 = 32 bit protected mode
gdt_start:
    dq 0x0
gdt_code:
    dw 0xFFFF
    dw 0x0
    db 0x0
    db 10011010b
    db 11001111b
    db 0x0
gdt_data:
    dw 0xFFFF
    dw 0x0
    db 0x0
    db 10010010b
    db 11001111b
    db 0x0
gdt_end:
gdt_pointer:
    dw gdt_end - gdt_start
    dd gdt_start
disk:
    db 0x0

CODE_SEG equ gdt_code - gdt_start
DATA_SEG equ gdt_data - gdt_start

bits 16
section .text.start
stage2:
    cli                         ; Disable the interrupts
    mov ax, 0x2401
    int 0x15                    ; Enable A20 bit

    lgdt [gdt_pointer]          ; Load the gdt table
    mov eax, cr0                ; Init swap cr0...
    or eax,0x1                  ; Set the protected mode bit on special CPU reg cr0
    mov cr0, eax
    jmp CODE_SEG:startpm        ; FAR JMP to the code segment

bits  32
startpm:
    mov ax, DATA_SEG
    mov ds, ax
    mov es, ax
    mov fs, ax
    mov gs, ax
    mov ss, ax
    mov esi, msg                ; SI now points to our message
    mov ebx, 0xb8000            ; vga memory position (0)

.loop:
    lodsb                       ; Loads SI into AL and increments SI [next char]
    or al, al                   ; Checks if the end of the string
    jz halt                     ; Jump to halt if the end
    or eax,0x0200               ; The top byte defines the character colour in the
                                ; buffer as an int value from 0-15 with 0 = black,
                                ; 1 = blue and 15 = white.
                                ; The bottom byte defines an ASCII code point
    mov word [ebx], ax
    add ebx, 2
    jmp .loop                   ; Next iteration of the loop

halt:
    mov esp, kernel_stack_top
    extern __start
    extern __bss_start
    extern __bss_sizel

    ; Zero the BSS section
    mov ecx, __bss_sizel
    mov edi, __bss_start
    xor eax, eax
    rep stosd

    ; Call C entry point
    call __start
    cli
    hlt                         ; CPU command to halt the execution

section .bss
align 4
kernel_stack_bottom:
    resb 16384                  ; 16 KB stack
kernel_stack_top:

This boot.asm is loosely based on the bootloader I proposed in another Stackoverflow question and answer. The main difference is that the linker computes much of the needed information through a linker script rather than being coded/included directly in the assembly file. This code also moves the enabling of the A20 line and entering protected mode to the second stage. This frees up space if you need to expand on the capabilities in the bootloader in the future.

If you are building your bootloader to be used on real hardware as unpartitioned media - a copy of a 1.44MiB BIOS Parameter Block (BPB) can be found in the file bpb.inc. This can be useful for booting on USB media using Floppy Disk Emulation (FDD). To enable it just remove the ; from this line:

; %include "src/init/bpb.inc"

Footnotes

  • 1There is a formula to convert a zero based Logical Bock Address to a set of CHS values:

    C = LBA ÷ (HPC × SPT)
    H = (LBA ÷ SPT) mod HPC
    S = (LBA mod SPT) + 1
    

    LBA 0 is the bootsector. If the kernel is in the contiguous sectors after the bootloader then the start of the kernel is at LBA 1. The second 32KiB chunk of the kernel would be at LBA 65(64+1). For a 1.44MiB floppy HPC=2 and SPT=18. From the calculation LBA 0=CHS(0,0,2) and LBA 65= CHS (1,1,12). Those are the values used by the 64 sector disk reads in the first version of boot.asm.

Michael Petch
  • 46,082
  • 8
  • 107
  • 198
  • Hi, now I'm understanding my problem. Thank you. However, I tried to compile with your suggests and this error happens: /usr/bin/ld: section .text loaded at [0000000000007e00,000000000000fff0] overlaps section .boot loaded at [0000000000007c24,0000000000007e23]. Is there something wrong yet? Sorry for the inconvenience, but I want to deep understand this. – Renan Prata Mar 27 '19 at 14:26
  • I updated my code. So, I believe that my code is using all data in second stage (I read about that in other documents). – Renan Prata Mar 27 '19 at 15:37
  • I get his error with new script now: /usr/bin/ld: section .text loaded at [0000000000007e00,0000000000010050] overlaps section .boot loaded at [0000000000007c24,0000000000007e23] . I'm using Linux Ubuntu 16.04. – Renan Prata Mar 27 '19 at 17:10
  • And also, I have other question. What means that line: .text : AT(0x7e00)? – Renan Prata Mar 27 '19 at 17:12
  • 1
    @RenanPrata In the comments I mention that the kernel's VMA (origin point is 0x8000) but the physical location on disk is at offset 0x7e00. The `AT(0x7e00)` tells the linker that although the VMA may be one thing, the physical location is another. Without the `AT(0x7e00)` there would would be 512 bytes of zeroes in the binary file between the bootloader and the kernel. That is fine, but you'd have to modify the code to read from CHS=0,0,3 rather than 0,0,2. I did that to save some space. – Michael Petch Mar 27 '19 at 17:18
  • 1
    @RenanPrata You can get rid of the AT(0x7e00) if you modify the first disk read to read from CHS 0,0,3 (instead of 0,0,2) and the second read from 1,1,13 (instead of 1,1,12). – Michael Petch Mar 27 '19 at 17:20
  • I removed 'AT(0x7e00)' from my linker.ld and change CHS (now I'm using 0,0,2 and 1,1,13). It compiled. However, My simple C code, does not called (I run qemu and nothing happens). Is there way to debug what is happen? If you prefere, I can send you an email or create a new question about this. – Renan Prata Mar 27 '19 at 17:32
  • @RenanPrata I have discovered what is wrong. It's not the linker script, it is the way you link (and also because you are on Ubuntu and because you aren't using a cross compiler). Try going back to the linker script as it is in my answer. In your `makefile` change `LDFLAGS:=` to `LDFLAGS:=-Wl,--build-id=none` . When linking with GCC, it creates a special section called `.note.gnu.build-id` . This causes troubles for linker scripts like this. If you linked with LD directly this section wouldn't be created. An i686 elf cross compiler wouldn't generate it either – Michael Petch Mar 27 '19 at 17:37
  • It works now (and I'm using AT(0x7e00) for data area. Thank you! So, I'll study more about this. It's my first step. – Renan Prata Mar 27 '19 at 17:45
  • @RenanPrata Now that you know it works, you can make the adjustment of removing `AT(0x7e00)` and changing the disk reads as mentioned earlier. I wanted you to try the script in my answer to make sure it worked. Both ways should work if done correctly – Michael Petch Mar 27 '19 at 17:47
  • 1
    @RenanPrata : I've update my answer with a second solution (including a modified linker script) and revamped boot.asm. The assembly code uses the info generated by the linker to compute the number of sectors to read to load the kernel. It also does it one sector at a time avoid all the pitfalls I mentioned at the top of my answer. It also moved the A20 and protected mode code into the second stage. – Michael Petch Mar 27 '19 at 19:43
  • 1
    Note that newer versions of QEMU can cope with executing from devices and other non-RAM bits of the address space, so the specific error message here has gone away. Instead the guest will just continue to execute garbage out of video RAM as it would on real hardware... – Peter Maydell Mar 28 '19 at 08:14
4

This error ("Trying to execute code outside RAM or ROM at 0x000a0000") typically indicates control flow problems - e.g. the CPU jumped or called or returned to a dodgy address, then started executing zeros in uninitialized RAM (which are interpreted by the CPU as add instructions) until the CPU reached the legacy VGA area (at 0x000A0000).

For the cause of the error, I didn't look too hard.

The reason I didn't really look is that it doesn't really matter. Eventually your boot loader must do things like getting a memory map from the BIOS (e.g. "int 0x15, eax=0xE820"), will want to auto-detect the size of the kernel (rather than assuming the kernel will always be exactly 30 KiB), will either want to handle kernels that are larger than 1 MiB (e.g. Linux is often larger than 5 MiB) or will want to also load some kind of "initial RAM disk" (for micro-kernels, which is the only likely case where you can assume kernel will be smaller than the ~640 KiB of RAM you can access in real mode), may want to decompress the kernel and/or "initial RAM disk", will want to check if the kernel is sane (e.g. possibly by checking headers and a CRC), and may want to be able to set up a nice graphical video mode (e.g. 1920*1600 with millions of colors). It will also either need a "BIOS Parameter Block" (for unpartitioned devices - e.g. floppy disk) or will have to handle a partitioning scheme (and not assume that the partition begins at the start of the disk).

All of these things (and more, like checking if A20 actually was enabled or not) will be too large to fit in 512 bytes (and all these things imply that switching to protected mode in the first 512 bytes is always a mistake).

This means that you'll need to redesign and then rewrite your code, and the existing code will get discarded regardless of whether you find/fix the current bug/s, so there's no reason to spend time finding/fixing the current bug/s.

Brendan
  • 35,656
  • 2
  • 39
  • 66