@@ -1538,6 +1538,17 @@
Permit 'security.evm' to be updated regardless of
current integrity status.
+ exec.seal_system_mappings = [KNL]
+ Format: { no | yes }
+ Seal system mappings: vdso, vvar, sigpage, vsyscall,
+ uprobe.
+ - 'no': do not seal system mappings.
+ - 'yes': seal system mappings.
+ This overrides CONFIG_SEAL_SYSTEM_MAPPINGS=(y/n)
+ If not specified or invalid, default is the value set by
+ CONFIG_SEAL_SYSTEM_MAPPINGS.
+ This option has no effect if CONFIG_64BIT=n
+
early_page_ext [KNL,EARLY] Enforces page_ext initialization to earlier
stages so cover more early boot allocations.
Please note that as side effect some optimizations
@@ -130,6 +130,10 @@ Use cases
- Chrome browser: protect some security sensitive data structures.
+- seal system mappings:
+ kernel config CONFIG_SEAL_SYSTEM_MAPPINGS seals system mappings such
+ as vdso, vvar, sigpage, uprobes and vsyscall.
+
When not to use mseal
=====================
Applications can apply sealing to any virtual memory region from userspace,
@@ -44,6 +44,7 @@ config ARM64
select ARCH_HAS_SETUP_DMA_OPS
select ARCH_HAS_SET_DIRECT_MAP
select ARCH_HAS_SET_MEMORY
+ select ARCH_HAS_SEAL_SYSTEM_MAPPINGS
select ARCH_STACKWALK
select ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_HAS_STRICT_MODULE_RWX
@@ -26,6 +26,7 @@ config X86_64
depends on 64BIT
# Options that are inherently 64-bit kernel only:
select ARCH_HAS_GIGANTIC_PAGE
+ select ARCH_HAS_SEAL_SYSTEM_MAPPINGS
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
select ARCH_SUPPORTS_PER_VMA_LOCK
select ARCH_SUPPORTS_HUGE_PFNMAP if TRANSPARENT_HUGEPAGE
@@ -366,8 +366,12 @@ void __init map_vsyscall(void)
set_vsyscall_pgtable_user_bits(swapper_pg_dir);
}
- if (vsyscall_mode == XONLY)
- vm_flags_init(&gate_vma, VM_EXEC);
+ if (vsyscall_mode == XONLY) {
+ unsigned long vm_flags = VM_EXEC;
+
+ vm_flags |= seal_system_mappings();
+ vm_flags_init(&gate_vma, vm_flags);
+ }
BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
(unsigned long)VSYSCALL_ADDR);
@@ -4238,4 +4238,16 @@ int arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *st
int arch_set_shadow_stack_status(struct task_struct *t, unsigned long status);
int arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status);
+#ifdef CONFIG_64BIT
+/*
+ * return VM_SEALED if seal system mapping is enabled.
+ */
+unsigned long seal_system_mappings(void);
+#else
+static inline unsigned long seal_system_mappings(void)
+{
+ return 0;
+}
+#endif
+
#endif /* _LINUX_MM_H */
@@ -1860,6 +1860,31 @@ config ARCH_HAS_MEMBARRIER_CALLBACKS
config ARCH_HAS_MEMBARRIER_SYNC_CORE
bool
+config ARCH_HAS_SEAL_SYSTEM_MAPPINGS
+ bool
+ help
+ Control SEAL_SYSTEM_MAPPINGS access based on architecture.
+
+ A 64-bit kernel is required for the memory sealing feature.
+ No specific hardware features from the CPU are needed.
+
+ To enable this feature, the architecture needs to be tested to
+ confirm that it doesn't unmap/remap system mappings during the
+ the life time of the process. After the architecture enables this,
+ a distribution can set CONFIG_SEAL_SYSTEM_MAPPING to manage access
+ to the feature.
+
+ The CONFIG_SEAL_SYSTEM_MAPPINGS already checks the CHECKPOINT_RESTORE
+ feature, which is known to remap/unmap vdso. Thus, the presence of
+ CHECKPOINT_RESTORE is not considered a factor in enabling
+ ARCH_HAS_SEAL_SYSTEM_MAPPINGS for a architecture.
+
+ For complete list of system mappings, please see
+ CONFIG_SEAL_SYSTEM_MAPPINGS.
+
+ For complete descriptions of memory sealing, please see
+ Documentation/userspace-api/mseal.rst
+
config HAVE_PERF_EVENTS
bool
help
@@ -2133,6 +2133,16 @@ struct vm_area_struct *_install_special_mapping(
unsigned long addr, unsigned long len,
unsigned long vm_flags, const struct vm_special_mapping *spec)
{
+ /*
+ * At present, all mappings (vdso, vvar, sigpage, and uprobe) that
+ * invoke the _install_special_mapping function can be sealed.
+ * Therefore, it is logical to call the seal_system_mappings_enabled()
+ * function here. In the future, if this is not the case, i.e. if certain
+ * mappings cannot be sealed, then it would be necessary to move this
+ * check to the calling function.
+ */
+ vm_flags |= seal_system_mappings();
+
return __install_special_mapping(mm, addr, len, vm_flags, (void *)spec,
&special_mapping_vmops);
}
@@ -7,6 +7,7 @@
* Author: Jeff Xu <jeffxu@chromium.org>
*/
+#include <linux/fs_parser.h>
#include <linux/mempolicy.h>
#include <linux/mman.h>
#include <linux/mm.h>
@@ -266,3 +267,41 @@ SYSCALL_DEFINE3(mseal, unsigned long, start, size_t, len, unsigned long,
{
return do_mseal(start, len, flags);
}
+
+/*
+ * Kernel cmdline override for CONFIG_SEAL_SYSTEM_MAPPINGS
+ */
+enum seal_system_mappings_type {
+ SEAL_SYSTEM_MAPPINGS_DISABLED,
+ SEAL_SYSTEM_MAPPINGS_ENABLED
+};
+
+static enum seal_system_mappings_type seal_system_mappings_v __ro_after_init =
+ IS_ENABLED(CONFIG_SEAL_SYSTEM_MAPPINGS) ? SEAL_SYSTEM_MAPPINGS_ENABLED :
+ SEAL_SYSTEM_MAPPINGS_DISABLED;
+
+static const struct constant_table value_table_sys_mapping[] __initconst = {
+ { "no", SEAL_SYSTEM_MAPPINGS_DISABLED},
+ { "yes", SEAL_SYSTEM_MAPPINGS_ENABLED},
+ { }
+};
+
+static int __init early_seal_system_mappings_override(char *buf)
+{
+ if (!buf)
+ return -EINVAL;
+
+ seal_system_mappings_v = lookup_constant(value_table_sys_mapping,
+ buf, seal_system_mappings_v);
+ return 0;
+}
+
+early_param("exec.seal_system_mappings", early_seal_system_mappings_override);
+
+unsigned long seal_system_mappings(void)
+{
+ if (seal_system_mappings_v == SEAL_SYSTEM_MAPPINGS_ENABLED)
+ return VM_SEALED;
+
+ return 0;
+}
@@ -51,6 +51,30 @@ config PROC_MEM_NO_FORCE
endchoice
+config SEAL_SYSTEM_MAPPINGS
+ bool "seal system mappings"
+ default n
+ depends on 64BIT
+ depends on ARCH_HAS_SEAL_SYSTEM_MAPPINGS
+ depends on !CHECKPOINT_RESTORE
+ help
+ Seal system mappings such as vdso, vvar, sigpage, vsyscall, uprobes.
+
+ A 64-bit kernel is required for the memory sealing feature.
+ No specific hardware features from the CPU are needed.
+
+ Depends on the ARCH_HAS_SEAL_SYSTEM_MAPPINGS.
+
+ CHECKPOINT_RESTORE might relocate vdso mapping during restore,
+ and remap/unmap will fail when the mapping is sealed, therefore
+ !CHECKPOINT_RESTORE is added as dependency.
+
+ Kernel command line exec.seal_system_mappings=(no/yes) overrides
+ this.
+
+ For complete descriptions of memory sealing, please see
+ Documentation/userspace-api/mseal.rst
+
config SECURITY
bool "Enable different security models"
depends on SYSFS