1

I'm making an OS that runs snake, and I'm in 32 bit protected mode, But I can't draw a pixel to the screen. I switch to Mode 0x13 (320x200x256) in real mode and the screen blanks. After entering protected mode the kernel runs and the pixel I am plotting doesn't appear.

I am drawing inspiration from this OSDev Article that switches to protected mode and plots pixels to the video display.

Here's kernel.c

#include "display.h"    // Include Display Drivers
#include "constants.h" // Include constants that C doesn't have included
#include "keyboard.h" // Include the custom keyboard driver

void _start(){} // Remove LD Warning

DisplayDetails globalDisplayDetails; // The display details.

int main(){
    // Init the display details
    DisplayDetails details = display_init();
    globalDisplayDetails = details;
    bool running = true;
    
    while(running){
        // This is the OS loop
        putpixel(100, 100, 0x00FFFFFF);
    }
    
    // return 0;
}

And Here's display.h (The actual display driver)

#define VRAM 0xA0000

typedef struct DisplayModeDetails {
    int width;            // Width of one line of pixels
    int height;          //  Height of display in pixels
    int colors;         //   How many colors are supported
    int pitch;         //    How many bytes of VRAM to skip when going 1 pixel down
    int pixelWidth;   //     How many bytes of VRAM to skip when going 1 pixel right
} DisplayDetails;

struct DisplayModeDetails display_init(){
    struct DisplayModeDetails details; // Setup display mode details
    details.width      = 640;
    details.height     = 480;
    details.colors     = 16;
    details.pitch      = 1;
    details.pixelWidth = 1;
    return details;
}

void putpixel(int pos_x, int pos_y, unsigned long VGA_COLOR)
{
    unsigned char* location = (unsigned char*)0xA0000 + 320 * pos_y + pos_x;
    *location = VGA_COLOR;
}

boot_sect.asm:

[bits 16]
[org 0x7C00]

mov [BOOTDRIVE], dl

; call clear_screen ; Clear screen
call load_kernel

load_kernel:
    mov bx, KERNELOFFSET
    mov dh, 0x05
    mov dl, [BOOTDRIVE]
    ; mov dl, 0x80
    call clear_screen     ; Clear Screen
    call disk_load        ; Load From Disk

    mov ah, 0x00          ; Start setting video mode
    mov al, 0x13          ; 320x200 256 color graphics
    int 0x10

    cli                   ; Disable Interrupts
    lgdt [gdt_descriptor] ; GDT start address
    mov eax, cr0
    or eax, 1
    mov cr0, eax          ; Jump to Protected 32 bit mode
    jmp CODESEG:start_protected_mode

    jmp $

clear_screen:
    pusha

    mov ah, 0x07     ; Scroll al lines; 0 = all
    mov bh, 0x0f     ; white on black
    mov cx, 0x00     ; row=0, col=0
    mov dx, 0x184f   ; row = 24, col = 79
    int 0x10         ; Call interrupt

    mov ah, 0x02
    mov dh, 0x00
    mov dl, 0x00
    mov bh, 0x00
    int 0x10

    popa
    ret

disk_load:
    pusha
    push dx
    mov ah, 0x02 ; read mode
    mov al, dh   ; read dh number of sects
    mov cl, 0x02 ; read from sect 2 (1 = boot)
    mov ch, 0x00 ; cylinder 0
    mov dh, 0x00 ; head 0

    int 0x13
    jc disk_error

    pop dx
    cmp al, dh

    jne sectors_error
    popa
    ret

disk_error:
    mov ah, '1' ; Error Code
    jmp err_loop
sectors_error:
    mov ah, '2' ; Error Code
    jmp err_loop
err_loop:
    call clear_screen
    mov dh, ah ; Print Error Message
    mov ah, 0x0e
    mov al, 'E'
    int 0x10
    mov al, 'r'
    int 0x10
    int 0x10
    mov al, ' '
    int 0x10
    mov al, dh ; Print Error Code
    int 0x10
    
    jmp $ ; create infinite loop

; Constants
KERNELOFFSET equ 0x1000
CODESEG equ gdt_code - gdt_start
DATASEG equ gdt_data - gdt_start

gdt_start:
    dq 0x0

    gdt_null:
        dd 0x0
        dd 0x0

    gdt_code:
        dw 0xffff
        dw 0x0
        db 0x0
        db 0b10011010
        db 0b11001111
        db 0x0
    
    gdt_data:
        dw 0xffff
        dw 0x0
        db 0x0
        db 0b10010010
        db 0b11001111
        db 0x0
    gdt_end:

    gdt_descriptor:
        dw gdt_end - gdt_start
        dd gdt_start

[bits 32]
start_protected_mode:
    ; Load the kernel
    mov ax, DATASEG
    mov dx, ax
    mov ss, ax
    mov es, ax
    mov fs, ax
    mov gs, ax
    
    mov ebp, 0x9000
    mov esp, ebp

    call KERNELOFFSET
    jmp $

BOOTDRIVE db 0

; Marking as bootable.
times 510-($-$$) db 0
dw 0xaa55

kernel_entry.asm:

[bits 32]
[extern main]
call main
jmp $

I build and test with this script:

gcc -m32 -fno-pie -ffreestanding -g -c kernel.c  -o obj/main.o

nasm -f elf kernel_entry.asm -o obj/entry.o
nasm -f bin boot_sect.asm -o obj/boot.bin

ld -m elf_i386 -o obj/kernel.bin -Ttext 0x1000 obj/main.o obj/entry.o  --oformat binary

cat obj/boot.bin obj/kernel.bin > bin/os.bin

qemu-system-x86_64 -drive format=raw,file=bin/os.bin -display sdl
Michael Petch
  • 46,082
  • 8
  • 107
  • 198
SKZI
  • 43
  • 1
  • 7
  • Using the awesome power of 32-bit hardware to run a snake game? Despite the dubiousness of the value, I'm interested ... :-) – paxdiablo Aug 03 '23 at 00:42
  • `void _start(){} // Remove LD Warning` - what? You want `ld` to find that `_start: ret` "function" as the ELF entry point? That `_start` doesn't call your `main`, it runs a `ret` with the stack pointer pointing at garbage. Single-step your code in a debugger. – Peter Cordes Aug 03 '23 at 04:06
  • @PeterCordes It doesn't run. My code specifically in my kernel_entry runs "main", not _start. I just added that to get rid of the warning. I wish I could get rid of the warning message without it, but that's the easiest way. – SKZI Aug 03 '23 at 05:06
  • @PeterCordes, I suspect it's not meant to run as an ELF, `ld` is being used to create a binary BLOB to be loaded into memory and jumped to. It *may* be that `ld` requires `_start` regardless of that which is why it's there. There's probably an `ld` flag that says to use zero as the entry point or something like that, bypassing `_start`. – paxdiablo Aug 03 '23 at 05:08
  • 1
    @paxdiablo: If https://github.com/skzidev/snake-os/blob/main/kernel_entry.asm is what gets assembled into `entry.o`, they could silence the warning by putting `global _start` / `_start:` in that file. Or they could have used `ld -e main` to misleadingly set the ELF entry point to `main` (although there isn't actually an ELF entry point since they use `--oformat binary`, good point.) Putting a useless function in a `.c` seems like the most confusing and wrong-looking way to silence the warning. (It's only a warning, not an error; `ld` defaults to the top of `.text` in that case.) – Peter Cordes Aug 03 '23 at 05:43
  • 1
    @paxdiablo: It's especially misleading for an [mcve] for an SO question that omits the asm and build scripts, since if it did get used, it would be the problem. And the question doesn't explain what symptoms they have or show any debugging details. – Peter Cordes Aug 03 '23 at 05:45
  • 1
    NASM, gcc and ld all reported no errors or warnings, it just didn’t show anything besides the boot screen for a slight second before the video mode switched. The only symptom i had was not drawing the pixel right. I didn’t suspect the boot loader because it seemed to be entirely unrelated, the build script was the same story. I also should probably add that I run ./nasm because at one point when creating the project i extracted the nasm binary to the same folder. I’ll probably fix that in a bit. As for the debugging details, gdb wasn’t connecting and i thought warnings would help me fix it. – SKZI Aug 03 '23 at 14:27
  • In assembly language, the assembler doesn't understand your program. It can only warn about individual instructions that might not be what you meant, like an immediate that gets truncated. Most asm bugs aren't something that will cause a warning, and can break things that C code assumes. Hopefully you now realize that a debugger will save you huge amounts of time. – Peter Cordes Aug 03 '23 at 19:56
  • Also, yes I realize that `ld` won't produce a warning with your current code, because you wrote `_start(){}` as a function in your C. But that function doesn't make sense in your actual program and isn't something you want in it. Other ways of avoiding that warning would be clearer and more correct. – Peter Cordes Aug 03 '23 at 19:59
  • 1
    Just another minor observation. It is usually bad form to place non-static (or non-inlined) functions that have global scope in header files. Since it appears you want to inline `putpixel`) I would declare `putpixel` with `static inline` like: `static inline void putpixel(int pos_x, int pos_y, unsigned long VGA_COLOR)` – Michael Petch Aug 03 '23 at 20:07

1 Answers1

8

First up, mode 13 is one byte per pixel, so I'm not sure why you're using 0x00ffffff unless you think it's an RGB/RGBA value, which is not the case for this mode.

When pushing this value into memory as a single byte, it will become 0xff and, according to the Mode 13 Wikipedia page, this is the color palette used:

enter image description here

It looks to me that 0xff (bottom right) is as black as 0x00 (top left). Hence I suspect you should be using 0x0f (top right) if you want white. That would be the first thing I'd try.

Beyond that, you probably need to ensure you're running in 32-bit flat mode (or equivalent), where the current selector used is based at physical address zero and large enough to reach video memory, the normal 32-bit flat mode 4G should do the trick :-)

That's because you're using an starting location of A0000. It's been a while since I did this level of graphics programming, long enough that I was using segment registers rather than selectors, hence had the register set to A000 and based the offsets at zero. But I do remember you needed to properly account for how logical addresses became physical ones.


Investigating the underlying OS code a little more, assuming you're using the code here as per your link, one thing I notice is the way the GDT is set up. The data segment entry is:

gdt_data:
    dw 0xffff        # Segment limit b0-15  = ffff.
    dw 0x0           # Segment base b0-15   = 0000.
    db 0x0           # Segment base b16-23  = 00.
    db 0b10010010
    db 0b11001111    # Segment limit b16-19 = f, granularity 4K.
    db 0x0           # Segment base b24-31  = 00

That means your data selector is based at 0x00000000 with a limit of 0xfffff (since granularity is 4K rather than a single byte, this is the entire 4G space).

So, in order to get to physical address A0000, you would use (as you have) 0xA0000 - there appears to be no problem there. However, I do notice one strange line elsewhere in that code:

start_protected_mode:
    ; Load the kernel
    mov ax, DATASEG
    mov dx, ax          ;; <<-- this one.
    mov ss, ax
    mov es, ax
    mov fs, ax
    mov gs, ax

This code segment appears to be setting up all the non-CS selectors so that they use your data segment, which is as expected. However, the second mov above moves the selector into dx rather than ds, and there's no other code that appears to modify ds.

As per the OSDev x86 system initialisation page,

There are very few things that are standardized about the state of the system, when the BIOS transfers control to the bootsector. The only things that are (nearly) certain are that the bootsector code is loaded and running at physical address 0x7c00, the CPU is in 16-bit Real Mode, the CPU register called DL contains the "drive number", and that only 512 bytes of the bootsector have been loaded.

No mention is made there of the ds register content and most boot code I've seen makes no assumptions, explicitly setting everything it needs.

If ds is not referencing the correct selector, the logical-to-physical mapping may not work, and writing to A0000 will go somewhere other than intended (or fault because the selector is invalid). So the line should probably set ds rather than dx.


And, just for completeness, incorporating other issues raised by Michael Petch in the comments:

There is another serious problem with your build. Make sure when linking that obj/entry.o is listed first so that the code properly starts your kernel (see second line below):

ld -m elf_i386 -o obj/kernel.bin -Ttext 0x1000
    obj/entry.o obj/main.o
    --oformat binary

Additionally, it's possible that not setting ds correctly didn't get caught in qemu as x86 and x86-64 software emulation may skip certain checks when running in user mode, such as exceeding segment limits.

However, running it with the --enable-kvm option, or in full system mode (or, of course, on actual hardware), may have seen a fault raised.

paxdiablo
  • 854,327
  • 234
  • 1,573
  • 1,953
  • I just put that there because I read something along those lines on a question, however it did nothing. I previously used 0x0F and I still had the same problem. – SKZI Aug 03 '23 at 01:09
  • @SKZI: then we probably need to look at how the selectors are set up so that we can confirm logical-to-physical address mapping is correct (see the second part of my answer). – paxdiablo Aug 03 '23 at 01:14
  • 3
    @SKZI : Although your Github repository isn't the latest code, there is a typo in boot_sect.asm. `mov ax, DATASEG` `mov dx, ax` and should have been `mov ax, DATASEG` `mov ds, ax`. There is another serious problem with your build. Make sure when linking that `obj/entry.o` is listed FIRST so that code properly starts your kernel. So `ld -m elf_i386 -o obj/kernel.bin -Ttext 0x1000 obj/main.o obj/entry.o --oformat binary` should be `ld -m elf_i386 -o obj/kernel.bin -Ttext 0x1000 obj/entry.o obj/main.o --oformat binary` . That is on top of Pax's comments about the color. – Michael Petch Aug 03 '23 at 01:26
  • It works! I probably should have caught that on my own. Thanks to both of you for pointing this stuff out. Happy Coding! – SKZI Aug 03 '23 at 01:42
  • Looks like @MichaelPetch solved it in parallel while I was investigating the source code. I had added the `dx/ds` issue independently but I'll also add the link order as well ... – paxdiablo Aug 03 '23 at 01:44
  • @paxdiablo : Regarding his `gdt_data`. While the limit value is 0xfffff the granularity bit is set (bit 7 of `db 0b11001111`). With the granularity bit set 0xfffff represents the number of 4KiB pages that are addressable. His GDT is correct and would make a flat 4GiB address space. Not setting DS correctly didn't get caught in QEMU since software emulation skips many checks including exceeding segment limits. Running QEMU with the `--enable-kvm` option would likely have seen a triple fault. – Michael Petch Aug 03 '23 at 01:58
  • 2
    Thanks, @MichaelPetch, have added that to the answer as well, *and* attributed your additions correctly (had mistakenly attributed to OP as I cut'n'pasted the wrong link). – paxdiablo Aug 03 '23 at 02:25
  • 1
    An equally serious problem with not setting DS is that the base might be wrong; it'll still be whatever base was set by the BIOS before switching back into real mode to load a legacy 16-bit MBR. (Or if you did set DS to match ORG at the top of the 16-bit bootloader, then it's still set from that, and actually will be consistent across machines.) But yeah, real hardware will raise a fault from the segment limit if you try to use a flat memory model with DS.base=0 and DS.limit=64K to access VGA framebuffer, so interesting point that QEMU skips that check. – Peter Cordes Aug 03 '23 at 04:47
  • For extra info, I know earlier versions of qemu did not check some things in user mode, segment limit and permission checking on every memory access would have been horrifically expensive. I can see PRs that added this for jumps (so you wouldn't attempt to run code that wasn't there) but there appears to be no general checking in any of the PRs my search-fu found. A proposal was made in 2019 to add this checking but it didn't seem to go anywhere. The dynamic generation of target platform code makes it hard to figure out if it's being done as a side-effect of something else. – paxdiablo Aug 03 '23 at 05:04
  • I have an SO answer that contains bootloader tips and assumptions one shouldn't make etc as well as what might be needed to work on real hardware. https://stackoverflow.com/a/32705076/3857942 – Michael Petch Aug 03 '23 at 21:03