I was writing a function which initializes an array of two structs. This function works perfectly with O0 and O1 optimization levels, but it breaks with O2, causing an Invalid opcode exception on the line I indicated with an arrow.
This code runs inside QEMU in a 32-bit bare metal setup. I can't understand why it crashes after executing the movdqa
instruction. What could be causing this error? Could it be a compiler error? Or are we maybe missing something (e.g. configuring the FPU or something like that)?
C code
OSS: This is not the real code, but only a reproducible example which compiles to the same assembly.
/**
* gcc version 12.2
* CFLAGS: -ffreestanding -m32 -O2 -std=gnu99 -Wall
* -Wextra -Werror -fno-pie -fno-stack-protector -g
*/
#include <stdint.h>
#include <string.h>
#define BUTOS_START 0
#define BUTOS_SIZE 0x25
#define FSTAB_SECTOR 0x24
#define FILESYSTEM_START 0x25
#define FILESYSTEM_SIZE 0x10
#define FS_BLOCK(START, \
SIZE) (struct fs_block){.start=START, .size=SIZE}
struct fs_block
{
uint32_t start; // LBA
uint32_t size; // Size in sectors
};
void fstab_write(uint8_t *buff_sector)
{
struct fs_block fstab[] = {
FS_BLOCK(BUTOS_START, BUTOS_SIZE),
FS_BLOCK(FILESYSTEM_START, FILESYSTEM_SIZE)
};
memcpy(buff_sector, fstab, sizeof(fstab));
}
Assembly output
O1:
fstab_write:
sub esp, 32
mov DWORD PTR [esp+4], 0
mov DWORD PTR [esp+8], 37
mov DWORD PTR [esp+12], 37
mov DWORD PTR [esp+16], 16
push 16
lea eax, [esp+8]
push eax
push DWORD PTR [esp+44]
call memcpy
add esp, 44
ret
O2:
fstab_write:
sub esp, 32
==> movdqa xmm0, XMMWORD PTR .LC0 /*point of crash */
movaps XMMWORD PTR [esp+4], xmm0
push 16
lea eax, [esp+8]
push eax
push DWORD PTR [esp+44]
call memcpy
add esp, 44
ret
.LC0:
.long 0
.long 37
.long 37
.long 16