There are some efforts to deal with this problem. It's really annoying when you are not instrested in the irq/exception handlers.
QEMU
Actually, QEMU has already considered this situation. QEMU gdbstub internally has two flags: NOIRQ and NOTIMER. These two flags will prevent irqs been injected to guest and pause timer clock emulation in single step mode.
You can query the capbility of your qemu by:
(gdb) maintenance packet qqemu.sstepbits
sending: "qqemu.sstepbits"
received: "ENABLE=1,NOIRQ=2,NOTIMER=4"
For KVM, you probably need linux kernel v5.12+ for your host to support NOIRQ, which implements ioctl KVM_CAP_SET_GUEST_DEBUG2.
But plase note, NOIRQ only prevents irqs, execptions/traps are still been injected.
GDB
The NOIRQ and NOTIMER flags in QEMU still can not prevent exception/trap handler been executed. So you probablly go into 'unexpected' code out of a sudden. For example, a store instruction can lead to a page-fault exception.
So, it's better to solve it in gdb client, that is using breakpoints to soft stepping instead of hardware stepping.
GDB has different stepping implementation for various architectures:
- x86: always uses hardware stepping.
- arm: prefer hardware stepping if supported by target. Otherwise, soft stepping is used.
- aarch64: always uses hardware stepping but atomic sequence.
- loongarch: always uses soft stepping.
- riscv: always uses soft stepping.
So you never hit such issue for loongarch and riscv. Here I wrote a gdb extension as 'gdb-os-helper.py' which can basically support soft stepping for x86/arm/aarch64 architectures.
# -*- coding: utf-8 -*-
"""gdb command extensions for better stepping with qemu guest os.
The main purpose is to get rid of the influence of cpu exceptions.
Provided commands:
- bni/bsi: stepping over/into next instruction.
- bn/bs: stepping over/into next source line.
Copyright (C) 2022 Author Changbin Du <changbin.du@gmail.com>
"""
try:
from capstone import *
from capstone.arm import *
from capstone.arm64 import *
from capstone.x86 import *
except ModuleNotFoundError:
print("python module 'capstone' is not installed")
class BniBreakpoint(gdb.Breakpoint):
"""
Our special breakpoint.
"""
def __init__(self, addr):
if hasattr(gdb, 'BP_HARDWARE_BREAKPOINT'):
# BP_HARDWARE_BREAKPOINT is not supported on old gdb
type = gdb.BP_HARDWARE_BREAKPOINT
else:
type = gdb.BP_BREAKPOINT
super().__init__(f'*{addr}', type = type, internal = True, temporary = False)
class BreakpointBasedNextInstruction(gdb.Command):
"""
Stepping with breakpoints. Useful for debugging OS in QEMU.
"""
def __init__(self, name, step_into):
super().__init__(name, gdb.COMMAND_BREAKPOINTS, gdb.COMPLETE_NONE, False)
self.step_into = step_into
def invoke(self, arg, from_tty):
frame = gdb.selected_frame()
arch = frame.architecture()
pc = frame.pc()
# print(arch.disassemble(pc)[0]['asm'])
if arch.name() == 'aarch64':
pcs = self.do_aarch64(frame, pc)
elif arch.name() == 'armv7':
pcs = self.do_arm(frame, pc)
elif arch.name() == 'i386:x86-64':
pcs = self.do_x86(frame, pc, CS_MODE_64)
elif arch.name() == 'i386':
pcs = self.do_x86(frame, pc, CS_MODE_32)
else:
print(f'not supported arch {arch.name()}')
return
# setup breakpoints on all possible pc
bps = []
for addr in pcs:
bps.append(BniBreakpoint(addr))
# go
gdb.execute('continue')
# delete breakpoints after stopped
for bp in bps:
bp.delete()
def do_x86(self, frame, pc, mode):
insn_len = frame.architecture().disassemble(pc)[0]['length']
insn = gdb.selected_inferior().read_memory(pc, insn_len)
md = Cs(CS_ARCH_X86, mode)
md.detail = True
insn = next(md.disasm(insn.tobytes(), pc))
pcs = [pc + insn_len,]
if insn.group(X86_GRP_JUMP) or (self.step_into and insn.group(X86_GRP_CALL)):
if insn.operands[0].type == X86_OP_REG:
addr = frame.read_register(insn.reg_name(insn.operands[0].reg))
pcs.append(addr)
elif insn.operands[0].type == X86_OP_IMM:
pcs.append(insn.operands[0].imm)
else:
print(f'unsupported insn {insn}')
elif insn.group(X86_GRP_RET):
# get return address from stack
addr = gdb.selected_inferior().read_memory(frame.read_register('sp'),
8 if mode == CS_MODE_64 else 4)
addr = int.from_bytes(addr.tobytes(), "little")
pcs.append(addr)
return pcs
def do_arm(self, frame, pc):
insn = gdb.selected_inferior().read_memory(pc, 4)
md = Cs(CS_ARCH_ARM, CS_MODE_ARM)
md.detail = True
insn = next(md.disasm(insn.tobytes(), pc))
# deal with multiple load
def _ldm(rn, reglist, step, inc):
addr = frame.read_register(rn) + inc
for i, opd in enumerate(reglist):
if opd.type == ARM_OP_REG and opd.reg == ARM_REG_PC:
pc = gdb.selected_inferior().read_memory(addr + step * i, 4)
pc = int.from_bytes(pc.tobytes(), "little")
return pc
return None
pcs = [pc + 4,]
if insn.id == ARM_INS_B or (self.step_into and insn.id == ARM_INS_BL):
pcs.append(insn.operands[0].imm)
elif insn.id == ARM_INS_BX or (self.step_into and insn.id == ARM_INS_BLX):
addr = frame.read_register(insn.reg_name(insn.operands[0].reg))
pcs.append(addr)
elif insn.id in (ARM_INS_CBZ, ARM_INS_CBNZ):
pcs.append(insn.operands[1].imm)
elif insn.id == ARM_INS_POP:
addr = _ldm('sp', insn.operands, 4, 0)
pcs.append(addr)
elif insn.id in (ARM_INS_LDM, ARM_INS_LDMIB, ARM_INS_LDMDA, ARM_INS_LDMDB):
step = (4 if insn.id in (ARM_INS_LDM, ARM_INS_LDMIB) else -4)
inc = (0 if insn.id in (ARM_INS_LDM, ARM_INS_LDMDA) else 1) * step
addr = _ldm(insn.reg_name(insn.operands[0].reg),
insn.operands[1:], step, inc)
pcs.append(addr)
elif insn.group(ARM_GRP_JUMP):
print(f'unsupported insn {insn}')
return pcs
def do_aarch64(self, frame, pc):
insn = gdb.selected_inferior().read_memory(pc, 4)
md = Cs(CS_ARCH_ARM64, CS_MODE_ARM)
md.detail = True
insn = next(md.disasm(insn.tobytes(), pc))
pcs = [pc + 4,]
if insn.id == ARM64_INS_B or (self.step_into and insn.id == ARM64_INS_BL):
pcs.append(insn.operands[0].imm)
elif insn.id == ARM64_INS_BR or (self.step_into and insn.id == ARM64_INS_BLR):
addr = frame.read_register(insn.reg_name(insn.operands[0].reg))
pcs.append(addr)
elif insn.id in (ARM64_INS_CBZ, ARM64_INS_CBNZ):
pcs.append(insn.operands[1].imm)
elif insn.id in (ARM64_INS_TBZ, ARM64_INS_TBNZ):
pcs.append(insn.operands[2].imm)
elif insn.id == ARM64_INS_RET:
reg = insn.reg_name(insn.operands[0].reg) if len(insn.operands) > 0 else 'lr'
pcs.append(frame.read_register(reg))
elif insn.group(ARM64_GRP_JUMP):
print(f'unsupported insn {insn}')
return pcs
class BreakpointBasedNextLine(gdb.Command):
"""
Run until next line. Soure level stepping with breakpoints.
"""
def __init__(self, name, step_into):
super().__init__(name, gdb.COMMAND_BREAKPOINTS, gdb.COMPLETE_NONE, False)
self.step_into = step_into
def do_step(self):
gdb.execute('bsi' if self.step_into else 'bni', to_string = True)
def invoke(self, arg, from_tty):
pc = gdb.selected_frame().pc()
cur_line = gdb.current_progspace().find_pc_line(pc)
if cur_line.symtab is None:
# on source info, stepping by instruction
self.do_step()
else:
# okay, stepping until leaving current line
while True:
self.do_step()
pc = gdb.selected_frame().pc()
line = gdb.current_progspace().find_pc_line(pc)
if line.symtab is None or line.line != cur_line.line:
break
BreakpointBasedNextInstruction('bni', False)
BreakpointBasedNextInstruction('bsi', True)
BreakpointBasedNextLine('bn', False)
BreakpointBasedNextLine('bs', True)
print("""usage:
- bni/bsi: stepping over/into next instruction.
- bn/bs: stepping over/into next source line.""")
You can use it as below:
(gdb) target remote :1234
Remote debugging using :1234
0xffffffff81eb4234 in default_idle () at arch/x86/kernel/process.c:731
731 }
=> 0xffffffff81eb4234 <default_idle+20>: c3 ret
0xffffffff81eb4235: 66 66 2e 0f 1f 84 00 00 00 00 00 data16 cs nopw 0x0(%rax,%rax,1)
(gdb) source ~/work/gdb-os-helper.py
usage:
- bni/bsi: stepping over/into next instruction.
- bn/bs: stepping over/into next source line.
(gdb) bni
[Switching to Thread 1.5]
Thread 5 hit Breakpoint -2, default_idle_call () at kernel/sched/idle.c:117
117 raw_local_irq_disable();
(gdb) bn
Thread 1 hit Breakpoint -3, default_idle_call () at kernel/sched/idle.c:119
119 ct_idle_exit();
=> 0xffffffff81eb4562 <default_idle_call+114>: e8 e9 d0 fe ff call 0xffffffff81ea1650 <ct_idle_exit>
(gdb)
Thread 1 hit Breakpoint -4, default_idle_call () at kernel/sched/idle.c:121
121 raw_local_irq_enable();
Enjoy!