I am interested in inprocess isolation in x86/linux via memory protection keys (MPK) and the protection key register PKRU.
My setup includes administrator code that allocates memory with specific access privileges via MPK, moves the user stack pointer to that memory, and subsequently invokes user code. If the user code causes an exception, I want the signal handler to pass execution back to the administrator.
This I got to work and a minimal implementation demonstrating the administrator/user switch and signal handling is attached, i.e.:
- (1) allocate pkey 1 and associated memory, setup user stack in that memory
- (2) switch RSP to user stack
- (3a) user causes exception triggering signal handler
- (4) signal handler returns to administrator
Let's assume the user code needs to access no system resources. Now, to limit damage the user can do, I would like to disable write for all memory that does not belong to that user, in particular disabling write on pages with pkey 0 (setting WD0=true i.e. PKRU= 0x55555552). Thus, replacing 3a by either 3b or 3c
- (3b) WD0=true // user causes no exception // WD0=false => works just fine
- (3c) WD0=true // user causes FPE or SEGV // WD0=false => system calls related to signal handling crash, see details below/or in code
Is there a solution or mistake I am making? Or is it generally not possible to successfully handle exceptions if WD0=true -- defeating the purpose of MPK?
Thank you for any help!!
Minimal implementation: Write-disable pkey0 and exception caused by user are controlled by two switches PROTECT_WD0 and INVOKE_SEGV, respectively. Switching between administrator/user happens in main()
- INVOKE_SEGV=0/PROTECT_WD0=0: user process causes FPE; PKRU is 0x55555550
- INVOKE_SEGV=1/PROTECT_WD0=0: user process causes SEGV
- INVOKE_SEGV=0/PROTECT_WD0=1: user process causes FPE, PKRU is 0x55555552 => crashes upon entering signal handler handler_asm()
- INVOKE_SEGV=1/PROTECT_WD0=1: user process causes SEGV => crashes during unblock_signal => __GI___sigprocmask => __GI___pthread_sigmask => syscall 14
#include <stdexcept>
#include <signal.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/mman.h>
// gcc -O0 -fexceptions -fnon-call-exceptions -g main2.cpp
// gdb ./a.out
// after exception continue to handler via "signal SIGSEGV" or "signal SIGFPE"
// PROTECT_WD0=1 will write disable pkey 0
#define PROTECT_WD0 1
// INVOKE_SEGV=1: user code causes SEGV, =0: user code causes FPE
#define INVOKE_SEGV 1
uint8_t* ustack, *ostack;
void *ripret;
/////////////////////////////////////////////////////////////////////////////////////
// init_handler installs signal handler "handler_asm"
// handler_asm resets pkru before calling handler
// handler modify RIP to return to after the error
// unblock_signal ensure signal can be resent
//
// modified from https://github.com/Plaristote/segvcatch
// except handler_asm, modified from https://github.com/IAIK/Donky
struct kernel_sigaction {
void (*k_sa_sigaction)(int,siginfo_t *,void *);
unsigned long k_sa_flags;
void (*k_sa_restorer) (void);
sigset_t k_sa_mask;
};
# define RESTORE(name, syscall) RESTORE2 (name, syscall)
# define RESTORE2(name, syscall) \
asm ( \
".text\n" \
".byte 0 # Yes, this really is necessary\n" \
".align 16\n" \
"__" #name ":\n" \
" movq $" #syscall ", %rax\n" \
" syscall\n" \
);
/* The return code for realtime-signals. */
RESTORE (restore_rt, __NR_rt_sigreturn)
void restore_rt (void) asm ("__restore_rt")
__attribute__ ((visibility ("hidden")));
static void unblock_signal(int signum __attribute__((__unused__))) {
sigset_t sigs;
sigemptyset(&sigs);
sigaddset(&sigs, signum);
// SIGSEGV crashes at sigprocmask => __GI___sigprocmask => __GI___pthread_sigmask => syscall 14
sigprocmask(SIG_UNBLOCK, &sigs, NULL);
}
// Exception handler
void handler(int s, siginfo_t *, void *_p __attribute__ ((__unused__))) {
ucontext_t *_uc = (ucontext_t *)_p;
gregset_t &_gregs = _uc->uc_mcontext.gregs;
unblock_signal(s);
_gregs[REG_RIP] = (greg_t)ripret;
}
// kernel resets pkru to 0x55555554: give full access before handling
void __attribute__((naked)) handler_asm(int, siginfo_t*, void *) {
// SIGFPE crashes here
__asm__ volatile(
"mov %%rdx, %%r14\n" // save ucontext
"xorl %%eax, %%eax; xorl %%ecx, %%ecx; xorl %%edx, %%edx; wrpkru;" // full access
"mov %%r14, %%rdx\n" // restore ucontext
"jmp %P0\n" :: "i"(handler));
}
// install signal handlers
void init_handler(int signal) {
struct kernel_sigaction act;
act.k_sa_sigaction = handler_asm;
sigemptyset (&act.k_sa_mask);
act.k_sa_flags = SA_SIGINFO|0x4000000;
act.k_sa_restorer = restore_rt;
syscall (SYS_rt_sigaction, signal, &act, NULL, _NSIG / 8);
}
/////////////////////////////////////////////////////////////////////////////////////
int main(int argc, char *argv[]) {
// allocate pkey (assumed:1) and associated stack ustack
int pku=pkey_alloc(0,0);
ustack = (uint8_t*)mmap(NULL, 0x10000, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS ,-1, 0);
pkey_mprotect(ustack, 0x10000, PROT_READ | PROT_WRITE, pku);
ustack += 0xFFF0;
// initialize handling seg vault and float exception
init_handler(SIGSEGV);
init_handler(SIGFPE);
ripret = &&ret;
// ADMINISTRATOR: switch to user stack and write-disable pkey 0
asm("mov %%rsp, %0; mov %1, %%rsp" : "=g" (ostack) : "g" (ustack));
#if PROTECT_WD0
asm("xorl %%ecx, %%ecx; rdpkru; xorl $2, %%eax; wrpkru" :::);
#endif
// USER: segment vault or float exception
#if INVOKE_SEGV
*(int*) 0 = 0;
#else
ustack[0] = 0;
ustack[0] = 10/ustack[0];
#endif
ret:
// ADMINISTRATOR: write-enable pkey 0 and switch back to original stack
#if PROTECT_WD0
asm("xorl %%ecx, %%ecx; rdpkru; xorl $2, %%eax; wrpkru" :::);
#endif
asm("mov %0, %%rsp" : : "g" (ostack));
printf("done\n");
return 0;
}