So, as I said in the comments above, it appears that the function change_memory_common()
(which is used by set_memory_ro/rw()
) does a check before applying the requested permissions. This is documented with a comment:
/*
* Kernel VA mappings are always live, and splitting live section
* mappings into page mappings may cause TLB conflicts. This means
* we have to ensure that changing the permission bits of the range
* we are operating on does not result in such splitting.
*
* Let's restrict ourselves to mappings created by vmalloc (or vmap).
* Those are guaranteed to consist entirely of page mappings, and
* splitting is never needed.
*
* So check whether the [addr, addr + size) interval is entirely
* covered by precisely one VM area that has the VM_ALLOC flag set.
*/
area = find_vm_area((void *)addr);
if (!area ||
end > (unsigned long)area->addr + area->size ||
!(area->flags & VM_ALLOC))
return -EINVAL;
The function seems to only work for mappings created through vmalloc()
or vmap()
, and the sys_call_table
does not reside in a mapping of such kind.
The concern seems to be around TLB conflicts. I'm really not sure why this would cause TLB conflicts since the function seems to properly call flush_tlb_kernel_range()
, but I am no ARM expert, so I might be missing something. I suppose one could call flush_tlb_all()
, but it seems unnecessary. Any additional insight is welcome!
In any case, for the purpose of your exercise on syscall hijacking, you can re-write your own version of set_memory_common()
and set_memory_rw/ro()
avoiding this check. An easier way would be to just get the appropriate PTE for the desired address and then change the permissions, but I didn't look through all the countless macros for that.
Last, but not least, since the sys_call_table
could end up crossing a page boundary, it's better to use syscall_table + __NR_read
instead of just sys_call_table
when applying changes to the page.
Here's a working example:
// SPDX-License-Identifier: GPL-3.0
#include <linux/init.h> // module_{init,exit}()
#include <linux/module.h> // THIS_MODULE, MODULE_VERSION, ...
#include <linux/kernel.h> // printk(), pr_*()
#include <linux/kallsyms.h> // kallsyms_lookup_name()
#include <asm/syscall.h> // syscall_fn_t, __NR_*
#include <asm/ptrace.h> // struct pt_regs
#include <asm/tlbflush.h> // flush_tlb_kernel_range()
#include <asm/pgtable.h> // {clear,set}_pte_bit(), set_pte()
#include <linux/vmalloc.h> // vm_unmap_aliases()
#include <linux/mm.h> // struct mm_struct, apply_to_page_range()
#include <linux/kconfig.h> // IS_ENABLED()
#ifdef pr_fmt
#undef pr_fmt
#endif
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
static struct mm_struct *init_mm_ptr;
static syscall_fn_t *syscall_table;
static syscall_fn_t original_read;
/********** HELPERS **********/
// From arch/arm64/mm/pageattr.c.
struct page_change_data {
pgprot_t set_mask;
pgprot_t clear_mask;
};
// From arch/arm64/mm/pageattr.c.
static int change_page_range(pte_t *ptep, unsigned long addr, void *data)
{
struct page_change_data *cdata = data;
pte_t pte = READ_ONCE(*ptep);
pte = clear_pte_bit(pte, cdata->clear_mask);
pte = set_pte_bit(pte, cdata->set_mask);
set_pte(ptep, pte);
return 0;
}
// From arch/arm64/mm/pageattr.c.
static int __change_memory_common(unsigned long start, unsigned long size,
pgprot_t set_mask, pgprot_t clear_mask)
{
struct page_change_data data;
int ret;
data.set_mask = set_mask;
data.clear_mask = clear_mask;
ret = apply_to_page_range(init_mm_ptr, start, size, change_page_range, &data);
flush_tlb_kernel_range(start, start + size);
return ret;
}
// Simplified set_memory_rw() from arch/arm64/mm/pageattr.c.
static int set_page_rw(unsigned long addr)
{
vm_unmap_aliases();
return __change_memory_common(addr, PAGE_SIZE, __pgprot(PTE_WRITE), __pgprot(PTE_RDONLY));
}
// Simplified set_memory_ro() from arch/arm64/mm/pageattr.c.
static int set_page_ro(unsigned long addr)
{
vm_unmap_aliases();
return __change_memory_common(addr, PAGE_SIZE, __pgprot(PTE_RDONLY), __pgprot(PTE_WRITE));
}
/********** ACTUAL MODULE **********/
static long myread(const struct pt_regs *regs)
{
pr_info("read() called\n");
return original_read(regs);
}
static int __init modinit(void)
{
int res;
pr_info("init\n");
// Shouldn't fail.
init_mm_ptr = (struct mm_struct *)kallsyms_lookup_name("init_mm");
syscall_table = (syscall_fn_t *)kallsyms_lookup_name("sys_call_table");
original_read = syscall_table[__NR_read];
res = set_page_rw((unsigned long)(syscall_table + __NR_read) & PAGE_MASK);
if (res != 0) {
pr_err("set_page_rw() failed: %d\n", res);
return res;
}
syscall_table[__NR_read] = myread;
res = set_page_ro((unsigned long)(syscall_table + __NR_read) & PAGE_MASK);
if (res != 0) {
pr_err("set_page_ro() failed: %d\n", res);
return res;
}
pr_info("init done\n");
return 0;
}
static void __exit modexit(void)
{
int res;
pr_info("exit\n");
res = set_page_rw((unsigned long)(syscall_table + __NR_read) & PAGE_MASK);
if (res != 0) {
pr_err("set_page_rw() failed: %d\n", res);
return;
}
syscall_table[__NR_read] = original_read;
res = set_page_ro((unsigned long)(syscall_table + __NR_read) & PAGE_MASK);
if (res != 0)
pr_err("set_page_ro() failed: %d\n", res);
pr_info("goodbye\n");
}
module_init(modinit);
module_exit(modexit);
MODULE_VERSION("0.1");
MODULE_DESCRIPTION("Syscall hijack on arm64.");
MODULE_AUTHOR("Marco Bonelli");
MODULE_LICENSE("GPL");