exitz syscall

Message ID	20231111125126.11665-1-yjnworkstation@gmail.com (mailing list archive)
State	Changes Requested
Headers	show Return-Path: <linux-security-module-owner@vger.kernel.org> From: York Jasper Niebuhr <yjnworkstation@gmail.com> To: akpm@linux-foundation.org Cc: linux-kernel@vger.kernel.org, linux-api@vger.kernel.org, linux-security-module@vger.kernel.org, torvalds@linux-foundation.org, York Jasper Niebuhr <yjnworkstation@gmail.com> Subject: [PATCH] exitz syscall Date: Sat, 11 Nov 2023 13:51:26 +0100 Message-Id: <20231111125126.11665-1-yjnworkstation@gmail.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	exitz syscall \| expand exitz syscall

diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index c8fac5205803..8be9d1471b5c 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -461,3 +461,4 @@ 454 i386 futex_wake sys_futex_wake 455 i386 futex_wait sys_futex_wait 456 i386 futex_requeue sys_futex_requeue +457 i386 exitz sys_exitz diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 8cb8bf68721c..e6aeca443a88 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -378,6 +378,7 @@ 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait 456 common futex_requeue sys_futex_requeue +457 common exitz sys_exitz # # Due to a historical design error, certain syscalls are numbered differently diff --git a/include/linux/exitz.h b/include/linux/exitz.h new file mode 100644 index 000000000000..b1a5ad194839 --- /dev/null +++ b/include/linux/exitz.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifdef CONFIG_EXITZ_SYSCALL + +/* + * Zero resource on exit flags + */ +#define EZ_NONE 0x00000000 +#define EZ_MEM 0x00000001 /* Memory pages are cleared on exit */ +#define EZ_FLAGS (EZ_MEM) + +/* + * Overwrite current process memory range with zeros (end excluded). + */ +int memz_range(unsigned long start, unsigned long end); + +/* + * Overwrite all flagged resources with zeros. + */ +void exit_z(void); + +/* + * Set task_struct flags to zero flagged resources on exit. + */ +void do_exitz(int flags); + +#endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 292c31697248..cbe8c198f28e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -766,6 +766,10 @@ struct task_struct { refcount_t usage; /* Per task flags (PF_*), defined further below: */ unsigned int flags; +#ifdef CONFIG_EXITZ_SYSCALL + /* Zero resource on exit flags (EZ_*). */ + unsigned int ezflags; +#endif unsigned int ptrace; #ifdef CONFIG_SMP diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index fd9d12de7e92..8c29b9ea3677 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -949,6 +949,7 @@ asmlinkage long sys_cachestat(unsigned int fd, struct cachestat_range __user *cstat_range, struct cachestat __user *cstat, unsigned int flags); asmlinkage long sys_map_shadow_stack(unsigned long addr, unsigned long size, unsigned int flags); +asmlinkage long sys_exitz(int flags); /* * Architecture-specific system calls diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 756b013fb832..782222ffa0d7 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -828,9 +828,11 @@ __SYSCALL(__NR_futex_wake, sys_futex_wake) __SYSCALL(__NR_futex_wait, sys_futex_wait) #define __NR_futex_requeue 456 __SYSCALL(__NR_futex_requeue, sys_futex_requeue) +#define __NR_exitz 457 +__SYSCALL(__NR_exitz, sys_exitz) #undef __NR_syscalls -#define __NR_syscalls 457 +#define __NR_syscalls 458 /* * 32 bit systems traditionally used different diff --git a/kernel/Makefile b/kernel/Makefile index 3947122d618b..17602af88adc 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -158,3 +158,5 @@ $(obj)/kheaders_data.tar.xz: FORCE $(call cmd,genikh) clean-files := kheaders_data.tar.xz kheaders.md5 + +obj-$(CONFIG_EXITZ_SYSCALL) += exitz.o diff --git a/kernel/exit.c b/kernel/exit.c index ee9f43bed49a..35469decd9e9 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -69,6 +69,7 @@ #include <linux/rethook.h> #include <linux/sysfs.h> #include <linux/user_events.h> +#include <linux/exitz.h> #include <linux/uaccess.h> #include <asm/unistd.h> @@ -808,6 +809,10 @@ static void synchronize_group_exit(struct task_struct *tsk, long code) void __noreturn do_exit(long code) { +#ifdef CONFIG_EXITZ_SYSCALL + exit_z(); +#endif + struct task_struct *tsk = current; int group_dead; diff --git a/kernel/exitz.c b/kernel/exitz.c new file mode 100644 index 000000000000..33a0b16f93a9 --- /dev/null +++ b/kernel/exitz.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/exitz.h> +#include <linux/syscalls.h> +#include <linux/sched.h> +#include <linux/mm.h> +#include <linux/mm_types.h> +#include <linux/slab.h> +#include <linux/highmem.h> +#include <linux/string.h> + +#define EZ_MAX_PAGES_ARRAY_COUNT 16 +#define EZ_MAX_KMALLOC_PAGES (PAGE_SIZE * 2) +#define EZ_MAX_PAGES_PER_LOOP (EZ_MAX_KMALLOC_PAGES / sizeof(struct page *)) + +/* + * Overwrite a range of process memory with zeros (end excluded). + */ +int memz_range(unsigned long start, unsigned long end) +{ + if (end <= start) + return 0; + + unsigned long nr_pages = (end - 1) / PAGE_SIZE - start / PAGE_SIZE + 1; + + struct page *pages_stack[EZ_MAX_PAGES_ARRAY_COUNT]; + struct page **pages = pages_stack; + + if (nr_pages > EZ_MAX_PAGES_ARRAY_COUNT) { + /* For reliability, cap kmalloc size */ + pages = kmalloc(min_t(size_t, EZ_MAX_KMALLOC_PAGES, + sizeof(struct page *) * nr_pages), + GFP_KERNEL); + + if (!pages) + return -ENOMEM; + } + + unsigned long page_address = start & PAGE_MASK; + + while (nr_pages) { + long pinned_pages = min(nr_pages, EZ_MAX_PAGES_PER_LOOP); + + pinned_pages = pin_user_pages(page_address, pinned_pages, FOLL_WRITE, pages); + + if (pinned_pages <= 0) + return -EFAULT; + + /* Map and zero each page */ + for (long i = 0; i < pinned_pages; i++) { + void *kaddr = kmap_local_page(pages[i]); + unsigned long page_offset = 0; + + if (page_address < start) + page_offset = min_t(unsigned long, start - page_address, PAGE_SIZE); + + unsigned long page_part = + min_t(unsigned long, PAGE_SIZE, end - page_address) - page_offset; + + memset(kaddr + page_offset, 0, page_part); + + kunmap_local(kaddr); + page_address += PAGE_SIZE; + } + + nr_pages -= pinned_pages; + + unpin_user_pages_dirty_lock(pages, pinned_pages, 1); + } + + if (pages != pages_stack) + kfree(pages); + + return 0; +} + +/* + * Overwrite any memory associated to current process with zeros. + */ +static void exit_memz(void) +{ + if (!(current->ezflags & EZ_MEM)) + return; + + struct vm_area_struct *vma; + + VMA_ITERATOR(vmi, current->mm, 0); + + for_each_vma(vmi, vma) { + memz_range(vma->vm_start, vma->vm_end); + } +} + +/* + * Overwrite all flagged resources with zeros. + */ +void exit_z(void) +{ + exit_memz(); +} + +/* + * Set task_struct flags to zero flagged resources on exit. + */ +void do_exitz(int flags) +{ + current->ezflags = flags; +} + +#ifdef CONFIG_EXITZ_SYSCALL +SYSCALL_DEFINE1(exitz, int, flags) +{ + if (flags & ~EZ_FLAGS) + return -EINVAL; + + do_exitz(flags); + return 0; +} +#endif diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index e1a6e3c675c0..ff5468f1d2f2 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -254,6 +254,9 @@ COND_SYSCALL(pkey_free); /* memfd_secret */ COND_SYSCALL(memfd_secret); +/* exitz */ +COND_SYSCALL(exitz); + /* * Architecture specific weak syscall entries. */ diff --git a/mm/mmap.c b/mm/mmap.c index 4f1cb814586d..d66bd314aca9 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -47,6 +47,7 @@ #include <linux/oom.h> #include <linux/sched/mm.h> #include <linux/ksm.h> +#include <linux/exitz.h> #include <linux/uaccess.h> #include <asm/cacheflush.h> @@ -225,6 +226,12 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) /* Always allow shrinking brk. */ if (brk <= mm->brk) { + /* Overwrite memory with zeros */ +#ifdef CONFIG_EXITZ_SYSCALL + if (current->ezflags & EZ_MEM) + memz_range(brk, mm->brk); +#endif + /* Search one past newbrk */ vma_iter_init(&vmi, mm, newbrk); brkvma = vma_find(&vmi, oldbrk); @@ -3001,6 +3008,11 @@ unsigned long mmap_region(struct file *file, unsigned long addr, static int __vm_munmap(unsigned long start, size_t len, bool unlock) { +#ifdef CONFIG_EXITZ_SYSCALL + if (current->ezflags & EZ_MEM) + memz_range(start, start + len); +#endif + int ret; struct mm_struct *mm = current->mm; LIST_HEAD(uf); diff --git a/security/Kconfig b/security/Kconfig index 52c9af08ad35..3509bb5fb2f4 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -249,5 +249,14 @@ config LSM source "security/Kconfig.hardening" +config EXITZ_SYSCALL + bool "Exitz syscall" if EXPERT + default y + help + sys_exitz is a system call to flag a process' resources to be erased + on exit. It can be used to harden the system against memory forensics + attacks after a process has finished. It is meant to be a more fine + grained alternative to CONFIG_INIT_ON_FREE_DEFAULT_ON. + endmenu

Commit Message

Comments

Patch