@@ -30,6 +30,7 @@
#include <linux/kasan.h>
#include <linux/memremap.h>
#include <linux/slab.h>
+#include <uapi/linux/mman.h>
struct mempolicy;
struct anon_vma;
@@ -257,9 +258,17 @@ extern struct rw_semaphore nommu_region_sem;
extern unsigned int kobjsize(const void *objp);
#endif
+/*
+ * MM_SEAL_ALL is all supported flags in mseal().
+ */
+#define MM_SEAL_ALL ( \
+ MM_SEAL_SEAL | \
+ MM_SEAL_BASE | \
+ MM_SEAL_PROT_PKEY)
+
/*
* vm_flags in vm_area_struct, see mm_types.h.
- * When changing, update also include/trace/events/mmflags.h
+ * When changing, update also include/trace/events/mmflags.h.
*/
#define VM_NONE 0x00000000
@@ -3308,6 +3317,40 @@ static inline void mm_populate(unsigned long addr, unsigned long len)
static inline void mm_populate(unsigned long addr, unsigned long len) {}
#endif
+#ifdef CONFIG_MSEAL
+static inline bool check_vma_seals_mergeable(unsigned long vm_seals)
+{
+ /*
+ * Set sealed VMA not mergeable with another VMA for now.
+ * This will be changed in later commit to make sealed
+ * VMA also mergeable.
+ */
+ if (vm_seals & MM_SEAL_ALL)
+ return false;
+
+ return true;
+}
+
+/*
+ * return the valid sealing (after mask).
+ */
+static inline unsigned long vma_seals(struct vm_area_struct *vma)
+{
+ return (vma->vm_seals & MM_SEAL_ALL);
+}
+
+#else
+static inline bool check_vma_seals_mergeable(unsigned long vm_seals1)
+{
+ return true;
+}
+
+static inline unsigned long vma_seals(struct vm_area_struct *vma)
+{
+ return 0;
+}
+#endif
+
/* These take the mm semaphore themselves */
extern int __must_check vm_brk(unsigned long, unsigned long);
extern int __must_check vm_brk_flags(unsigned long, unsigned long, unsigned long);
@@ -687,6 +687,13 @@ struct vm_area_struct {
struct vma_numab_state *numab_state; /* NUMA Balancing state */
#endif
struct vm_userfaultfd_ctx vm_userfaultfd_ctx;
+#ifdef CONFIG_MSEAL
+ /*
+ * bit masks for seal.
+ * need this since vm_flags is full.
+ */
+ unsigned long vm_seals; /* seal flags, see mm.h. */
+#endif
} __randomize_layout;
#ifdef CONFIG_SCHED_MM_CID
@@ -812,6 +812,8 @@ asmlinkage long sys_process_mrelease(int pidfd, unsigned int flags);
asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
unsigned long prot, unsigned long pgoff,
unsigned long flags);
+asmlinkage long sys_mseal(unsigned long start, size_t len, unsigned long types,
+ unsigned long flags);
asmlinkage long sys_mbind(unsigned long start, unsigned long len,
unsigned long mode,
const unsigned long __user *nmask,
@@ -55,4 +55,8 @@ struct cachestat {
__u64 nr_recently_evicted;
};
+#define MM_SEAL_SEAL _BITUL(0)
+#define MM_SEAL_BASE _BITUL(1)
+#define MM_SEAL_PROT_PKEY _BITUL(2)
+
#endif /* _UAPI_LINUX_MMAN_H */
@@ -195,6 +195,7 @@ COND_SYSCALL(migrate_pages);
COND_SYSCALL(move_pages);
COND_SYSCALL(set_mempolicy_home_node);
COND_SYSCALL(cachestat);
+COND_SYSCALL(mseal);
COND_SYSCALL(perf_event_open);
COND_SYSCALL(accept4);
@@ -1258,6 +1258,15 @@ config LOCK_MM_AND_FIND_VMA
bool
depends on !STACK_GROWSUP
+config MSEAL
+ default n
+ bool "Enable mseal() system call"
+ depends on MMU
+ help
+ Enable the virtual memory sealing.
+ This feature allows sealing each virtual memory area separately with
+ multiple sealing types.
+
source "mm/damon/Kconfig"
endmenu
@@ -120,6 +120,7 @@ obj-$(CONFIG_PAGE_EXTENSION) += page_ext.o
obj-$(CONFIG_PAGE_TABLE_CHECK) += page_table_check.o
obj-$(CONFIG_CMA_DEBUGFS) += cma_debug.o
obj-$(CONFIG_SECRETMEM) += secretmem.o
+obj-$(CONFIG_MSEAL) += mseal.o
obj-$(CONFIG_CMA_SYSFS) += cma_sysfs.o
obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o
@@ -740,6 +740,9 @@ static inline bool is_mergeable_vma(struct vm_area_struct *vma,
return false;
if (!anon_vma_name_eq(anon_vma_name(vma), anon_name))
return false;
+ if (!check_vma_seals_mergeable(vma_seals(vma)))
+ return false;
+
return true;
}
new file mode 100644
@@ -0,0 +1,257 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Implement mseal() syscall.
+ *
+ * Copyright (c) 2023 Google, Inc.
+ *
+ * Author: Jeff Xu <jeffxu@chromium.org>
+ */
+
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/syscalls.h>
+#include <linux/sched.h>
+#include "internal.h"
+
+static bool can_do_mseal(unsigned long types, unsigned long flags)
+{
+ /* check types is a valid bitmap. */
+ if (types & ~MM_SEAL_ALL)
+ return false;
+
+ /* flags isn't used for now. */
+ if (flags)
+ return false;
+
+ return true;
+}
+
+/*
+ * Check if a seal type can be added to VMA.
+ */
+static bool can_add_vma_seals(struct vm_area_struct *vma, unsigned long newSeals)
+{
+ /* When SEAL_MSEAL is set, reject if a new type of seal is added. */
+ if ((vma->vm_seals & MM_SEAL_SEAL) &&
+ (newSeals & ~(vma_seals(vma))))
+ return false;
+
+ return true;
+}
+
+static int mseal_fixup(struct vma_iterator *vmi, struct vm_area_struct *vma,
+ struct vm_area_struct **prev, unsigned long start,
+ unsigned long end, unsigned long addtypes)
+{
+ int ret = 0;
+
+ if (addtypes & ~(vma_seals(vma))) {
+ /*
+ * Handle split at start and end.
+ * For now sealed VMA doesn't merge with other VMAs.
+ * This will be updated in later commit to make
+ * sealed VMA also mergeable.
+ */
+ if (start != vma->vm_start) {
+ ret = split_vma(vmi, vma, start, 1);
+ if (ret)
+ goto out;
+ }
+
+ if (end != vma->vm_end) {
+ ret = split_vma(vmi, vma, end, 0);
+ if (ret)
+ goto out;
+ }
+
+ vma->vm_seals |= addtypes;
+ }
+
+out:
+ *prev = vma;
+ return ret;
+}
+
+/*
+ * Check for do_mseal:
+ * 1> start is part of a valid vma.
+ * 2> end is part of a valid vma.
+ * 3> No gap (unallocated address) between start and end.
+ * 4> requested seal type can be added in given address range.
+ */
+static int check_mm_seal(unsigned long start, unsigned long end,
+ unsigned long newtypes)
+{
+ struct vm_area_struct *vma;
+ unsigned long nstart = start;
+
+ VMA_ITERATOR(vmi, current->mm, start);
+
+ /* going through each vma to check. */
+ for_each_vma_range(vmi, vma, end) {
+ if (vma->vm_start > nstart)
+ /* unallocated memory found. */
+ return -ENOMEM;
+
+ if (!can_add_vma_seals(vma, newtypes))
+ return -EACCES;
+
+ if (vma->vm_end >= end)
+ return 0;
+
+ nstart = vma->vm_end;
+ }
+
+ return -ENOMEM;
+}
+
+/*
+ * Apply sealing.
+ */
+static int apply_mm_seal(unsigned long start, unsigned long end,
+ unsigned long newtypes)
+{
+ unsigned long nstart, nend;
+ struct vm_area_struct *vma, *prev = NULL;
+ struct vma_iterator vmi;
+ int error = 0;
+
+ vma_iter_init(&vmi, current->mm, start);
+ vma = vma_find(&vmi, end);
+
+ prev = vma_prev(&vmi);
+ if (start > vma->vm_start)
+ prev = vma;
+
+ nstart = start;
+
+ /* going through each vma to update. */
+ for_each_vma_range(vmi, vma, end) {
+ nend = vma->vm_end;
+ if (nend > end)
+ nend = end;
+
+ error = mseal_fixup(&vmi, vma, &prev, nstart, nend, newtypes);
+ if (error)
+ break;
+
+ nstart = vma->vm_end;
+ }
+
+ return error;
+}
+
+/*
+ * mseal(2) seals the VM's meta data from
+ * selected syscalls.
+ *
+ * addr/len: VM address range.
+ *
+ * The address range by addr/len must meet:
+ * start (addr) must be in a valid VMA.
+ * end (addr + len) must be in a valid VMA.
+ * no gap (unallocated memory) between start and end.
+ * start (addr) must be page aligned.
+ *
+ * len: len will be page aligned implicitly.
+ *
+ * types: bit mask for sealed syscalls.
+ * MM_SEAL_BASE: prevent VMA from:
+ * 1> Unmapping, moving to another location, and shrinking
+ * the size, via munmap() and mremap(), can leave an empty
+ * space, therefore can be replaced with a VMA with a new
+ * set of attributes.
+ * 2> Move or expand a different vma into the current location,
+ * via mremap().
+ * 3> Modifying sealed VMA via mmap(MAP_FIXED).
+ * 4> Size expansion, via mremap(), does not appear to pose any
+ * specific risks to sealed VMAs. It is included anyway because
+ * the use case is unclear. In any case, users can rely on
+ * merging to expand a sealed VMA.
+ *
+ * The MM_SEAL_PROT_PKEY:
+ * Seal PROT and PKEY of the address range, in other words,
+ * mprotect() and pkey_mprotect() will be denied if the memory is
+ * sealed with MM_SEAL_PROT_PKEY.
+ *
+ * The MM_SEAL_SEAL
+ * MM_SEAL_SEAL denies adding a new seal for an VMA.
+ *
+ * The kernel will remember which seal types are applied, and the
+ * application doesn’t need to repeat all existing seal types in
+ * the next mseal(). Once a seal type is applied, it can’t be
+ * unsealed. Call mseal() on an existing seal type is a no-action,
+ * not a failure.
+ *
+ * flags: reserved.
+ *
+ * return values:
+ * zero: success.
+ * -EINVAL:
+ * invalid seal type.
+ * invalid input flags.
+ * addr is not page aligned.
+ * addr + len overflow.
+ * -ENOMEM:
+ * addr is not a valid address (not allocated).
+ * end (addr + len) is not a valid address.
+ * a gap (unallocated memory) between start and end.
+ * -EACCES:
+ * MM_SEAL_SEAL is set, adding a new seal is rejected.
+ *
+ * Note:
+ * user can call mseal(2) multiple times to add new seal types.
+ * adding an already added seal type is a no-action (no error).
+ * adding a new seal type after MM_SEAL_SEAL will be rejected.
+ * unseal() or removing a seal type is not supported.
+ */
+static int do_mseal(unsigned long start, size_t len_in, unsigned long types,
+ unsigned long flags)
+{
+ int ret = 0;
+ unsigned long end;
+ struct mm_struct *mm = current->mm;
+ size_t len;
+
+ /* MM_SEAL_BASE is set when other seal types are set. */
+ if (types & MM_SEAL_PROT_PKEY)
+ types |= MM_SEAL_BASE;
+
+ if (!can_do_mseal(types, flags))
+ return -EINVAL;
+
+ start = untagged_addr(start);
+ if (!PAGE_ALIGNED(start))
+ return -EINVAL;
+
+ len = PAGE_ALIGN(len_in);
+ /* Check to see whether len was rounded up from small -ve to zero. */
+ if (len_in && !len)
+ return -EINVAL;
+
+ end = start + len;
+ if (end < start)
+ return -EINVAL;
+
+ if (end == start)
+ return 0;
+
+ if (mmap_write_lock_killable(mm))
+ return -EINTR;
+
+ ret = check_mm_seal(start, end, types);
+ if (ret)
+ goto out;
+
+ ret = apply_mm_seal(start, end, types);
+
+out:
+ mmap_write_unlock(current->mm);
+ return ret;
+}
+
+SYSCALL_DEFINE4(mseal, unsigned long, start, size_t, len, unsigned long, types, unsigned long,
+ flags)
+{
+ return do_mseal(start, len, types, flags);
+}