-1

I copied the bootasm.S from https://github.com/jeffallen/xv6/blob/master/bootasm.S,

#include "asm.h"

# Start the first CPU: switch to 32-bit protected mode, jump into C.
# The BIOS loads this code from the first sector of the hard disk into
# memory at physical address 0x7c00 and starts executing in real mode
# with %cs=0 %ip=7c00.

#define SEG_KCODE 1 // kernel code
#define SEG_KDATA 2 // kernel data+stack

#define CR0_PE 1 // protected mode enable bit

.code16 # Assemble for 16-bit mode
.globl start
start:
        cli # BIOS enabled interrupts; disable

        # Set up the important data segment registers (DS, ES, SS).
        xorw %ax,%ax # Segment number zero
        movw %ax,%ds # -> Data Segment
        movw %ax,%es # -> Extra Segment
        movw %ax,%ss # -> Stack Segment

        # Physical address line A20 is tied to zero so that the first PCs
        # with 2 MB would run software that assumed 1 MB. Undo that.
        seta20.1:
        inb $0x64,%al # Wait for not busy
        testb $0x2,%al
        jnz seta20.1

        movb $0xd1,%al # 0xd1 -> port 0x64
        outb %al,$0x64

        seta20.2:
        inb $0x64,%al # Wait for not busy
        testb $0x2,%al
        jnz seta20.2

        movb $0xdf,%al # 0xdf -> port 0x60
        outb %al,$0x60


        # Switch from real to protected mode. Use a bootstrap GDT that makes
        # virtual addresses map dierctly to physical addresses so that the
        # effective memory map doesn't change during the transition.
        lgdt gdtdesc
        movl %cr0, %eax
        orl $CR0_PE, %eax
        movl %eax, %cr0

        # Complete transition to 32-bit protected mode by using long jmp
        # to reload %cs and %eip. The segment registers are set up with no
        # translation, so that the mapping is still the identity mapping.
        ljmp $(SEG_KCODE<<3), $start32

.code32 # Tell assembler to generate 32-bit code now.
start32:
        # Set up the protected-mode data segment registers
        movw $(SEG_KDATA<<3), %ax # Our data segment selector
        movw %ax, %ds # -> DS: Data Segment
        movw %ax, %es # -> ES: Extra Segment
        movw %ax, %ss # -> SS: Stack Segment
        xor  %eax, %eax  # Zero segments not ready for use 
        movw %ax, %fs # -> FS
        movw %ax, %gs # -> GS

        ## sti TaoWang: It should NOT call STI here, since NO IDT is ready.
        # Set up the stack pointer and call into C.
        movl $start, %esp
        call bootmain

    spin:
        jmp spin

# Bootstrap GDT
.p2align 2 # force 4 byte alignment
gdt:
SEG_NULLASM # null seg
SEG_ASM(STA_X|STA_R, 0x0, 0xffffffff) # code seg
SEG_ASM(STA_W, 0x0, 0xffffffff) # data seg

gdtdesc:
.word (gdtdesc - gdt - 1) # sizeof(gdt) - 1
.long gdt # address gdt

.fill 510-(.-start)
.word 0xaa55

and change the bootmain.c as follows,

#include "types.h"

char    serial_buffer[256];

static void my_memcpy(void *dst, void *src, u32 length)
{
    u32 i = 0;

    for (i = 0; i < length; i ++) {
        *(char *)dst = *(char *)src;
    }

    if (serial_buffer[0] == 'A') {
        asm ("cli\nhlt\n");
    } else {
        asm ("vmcall");
    }
}

int bootmain(void)
{
    my_memcpy(serial_buffer, "Abcedife", 8);
    return 0;
}

void handle_page_fault(void)
{
    return;
}

After the code is built through the Makefile (I listed below), the code to load the output binary is here,

unsigned char tempbuf[0x400];

void file_load(char *vmfname)
{
    int    vmfd = -1;
    size_t cnt = 0, offset = 0;

    vmfd = open( vmfname, O_RDWR );
    if (vmfd < 0) {
        exit(2);
    }

    do {
        cnt = read(vmfd, tempbuf, sizeof(tempbuf));

        // initialize the virtual-machine registers
        memcpy((void *)(CODE_START + offset), tempbuf, cnt);
        offset += cnt;
    } while (cnt > 0);
    close(vmfd);

    printf("Loading %ld bytes of VM to run\n", offset);
}

To my surprise, the while loop does NOT execute at all.
Here is my linker.ld, and I run them in Linux 4.4.0.

ENTRY(start);
SECTIONS
{
    . = 0x7C00;
    .text : AT(0x7C00)
    {
        _text = .;
        *(.text);
        _text_end = .;
    }
    .data :
    {
        _data = .;
        *(.bss);
        *(.bss*);
        *(.data);
        *(.rodata*);
        *(COMMON)
        _data_end = .;
    }
        PROVIDE(data = .);

        /* The data segment */
        .data : {
                *(.data)
        }

        PROVIDE(edata = .);

        .bss : {
                *(.bss)
        }

        PROVIDE(end = .);

        /DISCARD/ : {
                *(.eh_frame .note.GNU-stack)
        }
}

The Makefile,

all: test
OBJDUMP=objdump
OBJCOPY=objcopy

CFLAGS = -fno-pic -static -fno-builtin -fno-strict-aliasing -Wall -MD -ggdb -m32 -Werror -fno-omit-frame-pointer
CFLAGS += $(shell $(CC) -fno-stack-protector -E -x c /dev/null >/dev/null 2>&1 && echo -fno-stack-protector)
ASFLAGS = -m32 -gdwarf-2 -Wa,-divide
LDFLAGS += -m $(shell $(LD) -V | grep elf_i386 2>/dev/null)

guest: test_app.c
        $(CC) -g2 -Wall -Wextra -Werror $^ -o $@
        $(CC) $(CFLAGS) -fno-pic -nostdinc -I. -c bootasm.S
        $(CC) $(CFLAGS) -fno-pic -I. -c bootmain.c
        $(LD) $(LDFLAGS) -N -e start -Tlinker.ld -o bootblock.o bootasm.o bootmain.o
        $(OBJDUMP) -S bootblock.o > bootblock.asm
        $(OBJCOPY) -S -O binary -j .text bootblock.o bootblock.bin

clean:
        rm -f *.o
        rm -f *.d
        rm -f test
        rm -f *.bin
        rm -f bootblock.asm

I don't know why the constant string failed to be passed as the parameter or its content is all '0' ?
If I use an array of char, put the array name as the parameter to myfputs(chararray), it will work well.

wangt13
  • 959
  • 7
  • 17
  • surprised it compiles. `str` is not defined. – Tibrogargan May 24 '18 at 02:02
  • Neither is `port`. How do you know it doesn't work? What port are you writing to? – David Wohlferd May 24 '18 at 02:51
  • Fixed the code typo. In my testing, I put other codes in while (*str != 0) {}, which will crash the code, but the code did NOT crash at all. If I changed to use an array of char, and put some non-zero in the array, and called myfputs() with it, the code in while (*str != 0) will crash. That is why I need your help. – wangt13 May 24 '18 at 03:37
  • 2
    `asm volatile("cpuid");` clobbers EAX/EBX/ECX/EDX without telling the compiler. Why are you using that anyway? `outb` is already very strongly ordered. Not exactly a serializing instruction, but I don't know what difference it would make. – Peter Cordes May 24 '18 at 03:38
  • Look at the compiler's asm output ([How to remove "noise" from GCC/clang assembly output?](https://stackoverflow.com/q/38552116)) or single-step with a debugger (e.g. inside BOCHS) and the problem should be obvious. Any time you have trouble with GNU C inline asm, it makes sense to check the compiler's asm output to see what effect your constraints / clobbers had on the compiler's code-gen. – Peter Cordes May 24 '18 at 03:43
  • thanks peter. I am using these codes as a part of my own virtual machine. And i want to support something like printf in the very beginning of the system bootup. readelf -a bootblock.o shows the "XXX..X" is in data section, is it correct? And I don't know how to use BOCHS to source-debug the bootblock.bin. – wangt13 May 24 '18 at 04:09
  • Are you sure that all of your code and data was read from disk into memory? If you had a github project of your code I could take a look. As it stands though Peter's observations about CPUID stand out. – Michael Petch May 24 '18 at 17:13
  • Hi Michael, I have not created github for this test. So I just updated the code of bootmain.c in the question part, to simplify debugging and discussion. With the code, I expected to get 'hlt' be called, but instead, 'cpuid' is being called. That is the problem. I just used the same Makefile to build the binary. – wangt13 May 24 '18 at 18:36
  • 1
    re: your update: `asm ("cpuid");` will still break your code. Use `asm volatile("cpuid" ::: "eax", "ebx", "ecx", "edx");` to tell your compiler it clobbers registers. Or better, don't use it at all because it makes no sense to run it there. Use a debugger to single-step your code. – Peter Cordes May 24 '18 at 18:53
  • Geez, okay looking more closely it is rather peculiar you link to a file called `bootblock.o` - that caught me off guard. If you are generating elf executables maybe calling `bootblock.o` `bootblock.elf` would be better. – Michael Petch May 24 '18 at 19:18
  • With that being said this looks very suspicious. You do this: `$(OBJCOPY) -S -O binary -j .text bootblock.o bootblock.bin` You appear to be taking bootblock.o (which is actually not an object file) and outputting a binary file called `bootblock.bin`. The problem though is that `-j .text` will make objdump only output the `.text` section. That's a problem if your variables are in the `.data` section. – Michael Petch May 24 '18 at 19:30
  • 2
    My guess is if you do `hexdump -C bootblock.bin` you will find that the string `Abcedife` doesn't appear. Either drop `-j .text` from your command line or also add `-j .data` so you have both (`-j` can be used multiple times) – Michael Petch May 24 '18 at 19:39
  • Hi Peter, thanks for your comments. OK, I just changed the 'cpuid' to 'vmcall', which needs no other registers. And I re-run the code, this time, i get vmcall, the same as before. So I doubt it is from the assembly code. Let me put all codes here for your reference. – wangt13 May 24 '18 at 22:02
  • Of course if you aren't using an i386 or i686 cross compiler and you are on64-bit Linux you'll want to pass `-m32` to GCC to generate 32-bit code rather than 64-bit. And another concern would be that if you are booting this up as a bootloader (floppydisk or hard disk image) then only the first 512 bytes of your bootloader will be loaded in unless you load more sectors in your bootup assembly code before entering protected mode (if you do it before you can use BIOS interrupt Int 13h/ah=02h) – Michael Petch May 24 '18 at 22:11
  • After copying the first byte in the `my_memcpy` called from `bootmain`, `asm ("cli\nhlt\n");` runs. Nothing will happen after that. And why would you think that `vmcall` would return without having modified any registers or memory? http://felixcloutier.com/x86/VMCALL.html says it causes a VM exit, but you haven't defined any register values so how is the hypervisor supposed to know what operation it's supposed to do. Sounds like a good way to get unpredictable behaviour, like executing `syscall` with random garbage in `rax` in user-space under Linux. Try `nop` as a placeholder. – Peter Cordes May 24 '18 at 22:18
  • Firstly, thank you all for comments. Michael, could you please post a normal answer about adding -j .data to Makefile, so that I can mark it as the right answer? And for hlt, cpuid, vmcall instructions, I am running them as a VM, so that my VMM will exit on them, so that I can check what happened. – wangt13 May 24 '18 at 22:39

1 Answers1

1

I answered this question by referring to Michael's answer about adding -j .data to Makefile, so that data section will be added to the final binary, which can solve the problem.
With the change in the Makefile, now the code can work as expected.
Here is the command line for building the final binary.

 guest: test_app.c 
    $(CC) -g2 -Wall -Wextra -Werror $^ -o $@
    $(CC) $(CFLAGS) -nostdinc -I. -c bootasm.S
    $(CC) $(CFLAGS) -I. -c bootmain.c
    $(LD) $(LDFLAGS) -N -e start -Tlinker.ld -o bootblock.o bootasm.o bootmain.o
    $(OBJDUMP) -S bootblock.o > bootblock.asm
    $(OBJCOPY) -S -O binary -j .text -j .data -j .bss bootblock.o bootblock.bin

wangt13
  • 959
  • 7
  • 17