1

I am writing a small 64-bit bootloader to explore assembly language and its interaction with C code. I am compiling the assembly part with NASM and the C part in GCC, then linking all together with ld, and extracting the pure code with objcopy. The code is meant to run without Grub or ony other bootloader: it is loading itself from floppy disk into memory. Currently, I am looking into how C functions can use symbols defined in NASM, and I am struggling in something I thought it was "easy":

I have defined a global variable in NASM that is placed in a custom section. The reason for this is that I want this variable to have a virtual address in the range > 0xffff800000000000 (the kernel space). I am taking care of the addressing in my linker script, see below. The variable is defined in the assembly file like this:

    section .kdata    
    global xyz_foo_bar
    xyz_foo_bar:
        dq 0

In the C code, I have declared a function that just increments that global variable:

    extern unsigned long xyz_foo_bar;
    void test_xyz_inc() {
        xyz_foo_bar++;
    }

This gets compiled and linked successfully - apparently. However, when I look at the disassembled function, I don't understand what I see.

    objdump.exe -M intel -d boot1.elf
    ...
    ffff800000008f73 <test_xyz_inc>:
    ffff800000008f73:       55                      push   rbp
    ffff800000008f74:       48 89 e5                mov    rbp,rsp
    ffff800000008f77:       48 8b 05 00 00 00 00    mov    rax,QWORD PTR [rip+0x0]        # ffff800000008f7e <test_xyz_inc+0xb>
    ffff800000008f7e:       48 8b 00                mov    rax,QWORD PTR [rax]
    ffff800000008f81:       48 8d 50 01             lea    rdx,[rax+0x1]
    ffff800000008f85:       48 8b 05 00 00 00 00    mov    rax,QWORD PTR [rip+0x0]        # ffff800000008f8c <test_xyz_inc+0x19>
    ffff800000008f8c:       48 89 10                mov    QWORD PTR [rax],rdx
    ffff800000008f8f:       90                      nop
    ffff800000008f90:       5d                      pop    rbp
    ffff800000008f91:       c3                      ret

Address 0xffff800000008f77: Am I right when I interpret that it is trying to dereference RIP with no displacement and use the resulting qword as an input for RAX? How does it make sense? My guess is that the displacement has not been calculated correctly by the compiler / linker.

Here is how I compile the code:

nasm -o boot1.o -l boot1.lst -f elf64 boot1.asm
gcc -ffreestanding -static-pie -c -mabi=sysv -Wall -o c_functions.o c_functions.c
ld -melf_x86_64 --build-id=none -static --unresolved-symbols=report-all -T boot1.ld boot1.o c_functions.o -o boot1.elf
objcopy -O binary boot1.elf boot1.bin

And just for the sake of completeness, here is the linker script:

OUTPUT_FORMAT("elf64-x86-64");
/* We define an entry point to keep the linker quiet. This entry point
 * has no meaning with a bootloader in the binary image we will eventually
 * generate. Bootloader will start executing at whatever is at 0x07c00 */
ENTRY(main);
INCLUDE boot1-vars.ldinc;

SECTIONS
{
    . = load_offset;
    .text : {
        /* Place the code in boot1.o before all other code */
        boot1.o(.text);     
    }
    
    _text_end = .;
    
    . += code_virtaddr;
    .ktext : AT(_ktext_physStart) {
        _ktext_physStart = . - code_virtaddr;
        boot1.o(.ktext);
        c_*.o(.text);
    }
    .kdata : {
        boot1.o(.kdata);
    }
    . -= code_virtaddr;

    /* Place the data after the code */
    .data : AT(_data_physStart) {
        _data_physStart = .;
        *(.data);
        *(.rodata*);
    }

    /* Place the uninitialised data in the area after our bootloader
     * The BIOS only reads the 512 bytes before this into memory */
    .bss : SUBALIGN(4) {
        __bss_start = .;
        *(COMMON);
        *(.bss)
        . = ALIGN(4);
        __bss_end = .;
    }
    __bss_sizeb = SIZEOF(.bss);

    /* Remove sections that won't be relevant to us */
    /DISCARD/ : {
        c_*.o(.*);
    }
    
    _end = .;
}

Is there anything basic I am missing?

PE: The contents of boot1-vars.ldinc, as requested:

load_offset = 0x7C00;
load_page = load_offset >> 12;
load_page_expand = load_page << 12;
pages_to_load = ((_end - load_page) >> 12) + 1;
sectors_to_load = ((_end - load_offset) >> 9) + 1;
mmap_special_page = load_page - 1;
mmap_special_page_virtaddr = mmap_special_page << 12;
mmap_special_page_pagetable = load_page - 2;
mmap_special_page_pagetable_virtaddr = mmap_special_page_pagetable << 12;
pmmalloc_special_page = load_page - 3;
pmmalloc_special_page_virtaddr = pmmalloc_special_page << 12;
pmmalloc_special_page_pagetable = load_page - 4;
pmmalloc_special_page_pagetable_virtaddr = pmmalloc_special_page_pagetable << 12;

mm_pml4_rm_segment = (load_page + pages_to_load) << 8;
mm_pml4_offset = 0;
mm_pml4_offset_0 = (mm_pml4_rm_segment << 4) + mm_pml4_offset;
mm_pml4_offset_1003 = mm_pml4_offset_0 + 0x1003;
mm_pml4_offset_2003 = mm_pml4_offset_0 + 0x2003;
mm_pml4_offset_3003 = mm_pml4_offset_0 + 0x3003;
mm_pml4_offset_4007 = mm_pml4_offset_0 + 0x4007;
mm_pml4_offset_5007 = mm_pml4_offset_0 + 0x5007;
mm_pml4_offset_6003 = mm_pml4_offset_0 + 0x6003;

/* kernel_stack_size = 0x2000; */

trap_div0_virtual = trap_div0;
trap_div0_virtual_16 = trap_div0_virtual & 0xffff;
trap_div0_virtual_shr16 = (trap_div0_virtual >> 16) & 0xffff;
trap_div0_virtual_shr32 = trap_div0_virtual >> 32;

trap_doubleFault_virtual = trap_doubleFault;
trap_doubleFault_virtual_16 = trap_doubleFault_virtual & 0xffff;
trap_doubleFault_virtual_shr16 = (trap_doubleFault_virtual >> 16) & 0xffff;
trap_doubleFault_virtual_shr32 = trap_doubleFault_virtual >> 32;

trap_invalidTSS_virtual = trap_invalidTSS;
trap_invalidTSS_virtual_16 = trap_invalidTSS_virtual & 0xffff;
trap_invalidTSS_virtual_shr16 = (trap_invalidTSS_virtual >> 16) & 0xffff;
trap_invalidTSS_virtual_shr32 = trap_invalidTSS_virtual >> 32;

trap_generalProtectionFault_virtual = trap_generalProtectionFault;
trap_generalProtectionFault_virtual_16 = trap_generalProtectionFault_virtual & 0xffff;
trap_generalProtectionFault_virtual_shr16 = (trap_generalProtectionFault_virtual >> 16) & 0xffff;
trap_generalProtectionFault_virtual_shr32 = trap_generalProtectionFault_virtual >> 32;

trap_pageFault_virtual = trap_pageFault;
trap_pageFault_virtual_16 = trap_pageFault_virtual & 0xffff;
trap_pageFault_virtual_shr16 = (trap_pageFault_virtual >> 16) & 0xffff;
trap_pageFault_virtual_shr32 = trap_pageFault_virtual >> 32;

trap_invalidSyscall_virtual = trap_invalidSyscall;
trap_invalidSyscall_virtual_16 = trap_invalidSyscall_virtual & 0xffff;
trap_invalidSyscall_virtual_shr16 = (trap_invalidSyscall_virtual >> 16) & 0xffff;
trap_invalidSyscall_virtual_shr32 = trap_invalidSyscall_virtual >> 32;

isr_spurious_virtual = isr_spurious;
isr_spurious_virtual_16 = isr_spurious_virtual & 0xffff;
isr_spurious_virtual_shr16 = (isr_spurious_virtual >> 16) & 0xffff;
isr_spurious_virtual_shr32 = isr_spurious_virtual >> 32;

isr_dummytmr_virtual = isr_dummytmr;
isr_dummytmr_virtual_16 = isr_dummytmr_virtual & 0xffff;
isr_dummytmr_virtual_shr16 = (isr_dummytmr_virtual >> 16) & 0xffff;
isr_dummytmr_virtual_shr32 = isr_dummytmr_virtual >> 32;

isr_userDummy_virtual = isr_userDummy;
isr_userDummy_virtual_16 = isr_userDummy_virtual & 0xffff;
isr_userDummy_virtual_shr16 = (isr_userDummy_virtual >> 16) & 0xffff;
isr_userDummy_virtual_shr32 = isr_userDummy_virtual >> 32;

tss_virtual = code_virtaddr + TSS;
tss_virtual_16 = tss_virtual & 0xffff;
tss_virtual_shr16_8 = (tss_virtual >> 16) & 0xff;
tss_virtual_shr24_8 = (tss_virtual >> 24) & 0xff;
tss_virtual_shr32 = tss_virtual >> 32;

dvdvdl
  • 41
  • 6

2 Answers2

4

You are compiling your C code with -static-pie. The code generated will require a dynamic loader to fill in the relocation entries. From the GCC documentation:

-static-pie

Produce a static position independent executable on targets that support it. A static position independent executable is similar to a static executable, but can be loaded at any address without a dynamic linker. For predictable results, you must also specify the same set of options used for compilation (-fpie, -fPIE, or model suboptions) when you specify this linker option.

Since you are ultimately generating a binary file all that relocation information is gone. I can conclude from that your bootloader can't be a dynamic loader. It likely just reads the binary directly into memory from the disk.

If you use objdump -rd and took a look at the test_xyz_inc you will discover that there are relocation entries for each access to the xyz_foo_bar variable. These values would normally be fixed-up by a dynamic loader when the code is loaded into memory.

What you really want to do is generate non-PIC static code. Replace -static-pie with -fno-pic when compiling your C files. I also recommend removing --unresolved-symbols=report-all when linking because I believe you were masking a problem by including this. I also believe you should make sure you aren't compiling your kernel code with the red-zone so I'd suggest the extra GCC option -mno-red-zone as well.

As an example:

gcc -ffreestanding -static-pie -c -mabi=sysv -Wall -o c_functions.o c_functions.c

Should be:

gcc -ffreestanding -fno-pic -mno-red-zone -c -mabi=sysv -Wall -o \
    c_functions.o c_functions.c

When linking I suggest changing:

ld -melf_x86_64 --build-id=none -static \
    --unresolved-symbols=report-all -T boot1.ld boot1.o c_functions.o -o boot1.elf

to:

ld -melf_x86_64 --build-id=none -static -T boot1.ld boot1.o c_functions.o -o boot1.elf

Cygwin Observations

After the OP mentioned they used Cygwin with GCC 10.2, I happened to update my Cygwin system and I can verify that even with replacing -static-pie with -fno-pic that the generated code produced is static and has all the displacements from RIP being set to 0 and the linker doesn't say there was any truncation. Trying -mcmodel=large didn't resolve the issue. I haven't had time to investigate why this is occurring, but this is a very good reason that using an x86-64 or i386/i686 ELF cross compiler for OS development is less problematic. I'd recommend building an x86-64 ELF cross compiler in Cygwin. There are general guidelines for building a cross compiler on the OSDev Wiki. I haven't attempted to use Cygwin to do such a build so I'm unsure if there are any roadblocks that make it more difficult than a build on Linux.

Michael Petch
  • 46,082
  • 8
  • 107
  • 198
  • As well as `-mno-redzone` and probably `-mno-sse`, I think you'd also want `-mcmodel=kernel` for position-dependent code that gets loaded in the high 2GiB of virtual address space. Putting static addresses into registers needs to be done with `mov r/m64, sign_extended_imm32`, not the normal (for user-space non-PIE) `mov r32, imm32` that zero-extends. Or with RIP-relative LEA. IDK if `-ffreestanding` implies any of those options. – Peter Cordes Oct 25 '20 at 04:08
  • I have changed the compiling and linking options as you suggested, thanks for that. However, the generated code is still the same. And you are right: the code is loading itself from the (floppy) disk into memory, I am not using grub or ony other helper bootloader. – dvdvdl Oct 25 '20 at 06:41
  • @dvdvdl : If it is the same code generated you have still done something wrong related to the options OR there is something unusual about your compiler. What does `gcc --version` give for output? I would verify that the binary file you generated is the same one your loading from the disk image. I can't see the changes you made or if you made them correctly. If you put your project in GitHub I could take a closer look. – Michael Petch Oct 25 '20 at 06:46
  • @dvdvdl The other thing you can do is put the output of this command `objdump -Dxr boot1.elf` into a https://pastebin and post a link in these comments. – Michael Petch Oct 25 '20 at 06:49
  • Well, it turns out that the problem is related to the GCC version I am using. I am compiling the whole thing using Cygwin GCC 10.2. Downgrading to 9.3 hasn't helped. On a Linux box with GCC 9.x, the displacement is set correctly, this is why I presume that it is a problem either with some default options set for Windows compilers, or with the compiler itself. gcc --version says: $ LANG=C gcc --version gcc (GCC) 9.3.0 Copyright (C) 2019 Free Software Foundation, Inc. – dvdvdl Oct 25 '20 at 09:23
  • Actually, it could be the linker as well, I would say. I'll analyze it further to see where exactly it is coming from. Thanks for your suggestions! – dvdvdl Oct 25 '20 at 10:09
  • @dvdvdl : By any chance are you getting `relocation truncated to fit` message when linking? Can you also add to your question the contents of `boot1-vars.ldinc` – Michael Petch Oct 25 '20 at 13:51
  • @MichaelPetch - I have added the code of the .ldinc file as requested. Thanks again! Regarding your question - that's the interesting part: I don't get any error from the linker, even though I used a symbol declared as external in the C code that never gets resolved. It definitely looks like a bug in the linker provided in the Cygwin binutils package - currently at version 2.34-2. – dvdvdl Oct 25 '20 at 14:41
  • @dvdvdl : is part of the ldinc file missing? I don't see `code_virtaddr` for example. I assume it is `0xffff800000000000` – Michael Petch Oct 25 '20 at 14:51
  • @MichaelPetch - your assumption is right. Code_virtaddr is defined as a NASM constant, however. I sense that I really should post the entire code. I will when I got some time later today. Thanks! – dvdvdl Oct 25 '20 at 15:02
3

End of story

After having taken some advice from @MichaelPetch, I built a cross compiler and binutils for the x86_64-elf target in Cygwin. I followed these OSDev Wiki pages:

The combination seems to work fine, since the missing RIP-relative displacements are set up correctly, and the calls to C functions from within assembly code do not result in a general protection fault anymore, like they used to.

Note: In order to get binutils working, I had to patch the source code as described here, otherwise gdb would not want to be linked:

Failed to build AVR and ARM GDB 9.1 under CygWin (..relocation truncated to fit: R_X86_64_PC32 against undefined symbol..)

Thank you very much!

dvdvdl
  • 41
  • 6