@@ -846,6 +846,12 @@ ifdef CONFIG_LIVEPATCH
KBUILD_CFLAGS += $(call cc-option, -flive-patching=inline-clone)
endif
+ifdef CONFIG_SHADOW_CALL_STACK
+CC_FLAGS_SCS := -fsanitize=shadow-call-stack
+KBUILD_CFLAGS += $(CC_FLAGS_SCS)
+export CC_FLAGS_SCS
+endif
+
# arch Makefile may override CC so keep this after arch Makefile is included
NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)
@@ -521,6 +521,39 @@ config STACKPROTECTOR_STRONG
about 20% of all kernel functions, which increases the kernel code
size by about 2%.
+config ARCH_SUPPORTS_SHADOW_CALL_STACK
+ bool
+ help
+ An architecture should select this if it supports Clang's Shadow
+ Call Stack, has asm/scs.h, and implements runtime support for shadow
+ stack switching.
+
+config SHADOW_CALL_STACK_VMAP
+ bool
+ depends on SHADOW_CALL_STACK
+ help
+ Use virtually mapped shadow call stacks. Selecting this option
+ provides better stack exhaustion protection, but increases per-thread
+ memory consumption as a full page is allocated for each shadow stack.
+
+config SHADOW_CALL_STACK
+ bool "Clang Shadow Call Stack"
+ depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
+ help
+ This option enables Clang's Shadow Call Stack, which uses a
+ shadow stack to protect function return addresses from being
+ overwritten by an attacker. More information can be found from
+ Clang's documentation:
+
+ https://clang.llvm.org/docs/ShadowCallStack.html
+
+ Note that security guarantees in the kernel differ from the ones
+ documented for user space. The kernel must store addresses of shadow
+ stacks used by other tasks and interrupt handlers in memory, which
+ means an attacker capable reading and writing arbitrary memory may
+ be able to locate them and hijack control flow by modifying shadow
+ stacks that are not currently in use.
+
config HAVE_ARCH_WITHIN_STACK_FRAMES
bool
help
@@ -42,3 +42,9 @@
* compilers, like ICC.
*/
#define barrier() __asm__ __volatile__("" : : : "memory")
+
+#if __has_feature(shadow_call_stack)
+# define __noscs __attribute__((__no_sanitize__("shadow-call-stack")))
+#else
+# define __noscs
+#endif
@@ -202,6 +202,10 @@ struct ftrace_likely_data {
# define randomized_struct_fields_end
#endif
+#ifndef __noscs
+# define __noscs
+#endif
+
#ifndef asm_volatile_goto
#define asm_volatile_goto(x...) asm goto(x)
#endif
new file mode 100644
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#ifndef _LINUX_SCS_H
+#define _LINUX_SCS_H
+
+#include <linux/gfp.h>
+#include <linux/sched.h>
+#include <asm/page.h>
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+
+/*
+ * In testing, 1 KiB shadow stack size (i.e. 128 stack frames on a 64-bit
+ * architecture) provided ~40% safety margin on stack usage while keeping
+ * memory allocation overhead reasonable.
+ */
+#define SCS_SIZE 1024UL
+#define GFP_SCS (GFP_KERNEL | __GFP_ZERO)
+
+/*
+ * A random number outside the kernel's virtual address space to mark the
+ * end of the shadow stack.
+ */
+#define SCS_END_MAGIC 0xaf0194819b1635f6UL
+
+#define task_scs(tsk) (task_thread_info(tsk)->shadow_call_stack)
+
+static inline void task_set_scs(struct task_struct *tsk, void *s)
+{
+ task_scs(tsk) = s;
+}
+
+extern void scs_init(void);
+extern void scs_task_reset(struct task_struct *tsk);
+extern int scs_prepare(struct task_struct *tsk, int node);
+extern bool scs_corrupted(struct task_struct *tsk);
+extern void scs_release(struct task_struct *tsk);
+
+#else /* CONFIG_SHADOW_CALL_STACK */
+
+#define task_scs(tsk) NULL
+
+static inline void task_set_scs(struct task_struct *tsk, void *s) {}
+static inline void scs_init(void) {}
+static inline void scs_task_reset(struct task_struct *tsk) {}
+static inline int scs_prepare(struct task_struct *tsk, int node) { return 0; }
+static inline bool scs_corrupted(struct task_struct *tsk) { return false; }
+static inline void scs_release(struct task_struct *tsk) {}
+
+#endif /* CONFIG_SHADOW_CALL_STACK */
+
+#endif /* _LINUX_SCS_H */
@@ -11,6 +11,7 @@
#include <linux/mm.h>
#include <linux/audit.h>
#include <linux/numa.h>
+#include <linux/scs.h>
#include <asm/pgtable.h>
#include <linux/uaccess.h>
@@ -184,6 +185,13 @@ struct task_struct init_task
};
EXPORT_SYMBOL(init_task);
+#ifdef CONFIG_SHADOW_CALL_STACK
+unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] __init_task_data
+ __aligned(SCS_SIZE) = {
+ [(SCS_SIZE / sizeof(long)) - 1] = SCS_END_MAGIC
+};
+#endif
+
/*
* Initial thread structure. Alignment of this is handled by a special
* linker map entry.
@@ -102,6 +102,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-$(CONFIG_CPU_PM) += cpu_pm.o
obj-$(CONFIG_BPF) += bpf/
+obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
obj-$(CONFIG_PERF_EVENTS) += events/
@@ -94,6 +94,7 @@
#include <linux/livepatch.h>
#include <linux/thread_info.h>
#include <linux/stackleak.h>
+#include <linux/scs.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -451,6 +452,8 @@ void put_task_stack(struct task_struct *tsk)
void free_task(struct task_struct *tsk)
{
+ scs_release(tsk);
+
#ifndef CONFIG_THREAD_INFO_IN_TASK
/*
* The task is finally done with both the stack and thread_info,
@@ -834,6 +837,8 @@ void __init fork_init(void)
NULL, free_vm_stack_cache);
#endif
+ scs_init();
+
lockdep_init_task(&init_task);
uprobes_init();
}
@@ -893,6 +898,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
if (err)
goto free_stack;
+ err = scs_prepare(tsk, node);
+ if (err)
+ goto free_stack;
+
#ifdef CONFIG_SECCOMP
/*
* We must handle setting up seccomp filters once we're under
@@ -11,6 +11,7 @@
#include <linux/nospec.h>
#include <linux/kcov.h>
+#include <linux/scs.h>
#include <asm/switch_to.h>
#include <asm/tlb.h>
@@ -6018,6 +6019,7 @@ void init_idle(struct task_struct *idle, int cpu)
idle->se.exec_start = sched_clock();
idle->flags |= PF_IDLE;
+ scs_task_reset(idle);
kasan_unpoison_task_stack(idle);
#ifdef CONFIG_SMP
new file mode 100644
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#include <linux/cpuhotplug.h>
+#include <linux/kasan.h>
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/scs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <asm/scs.h>
+
+static inline void *__scs_base(struct task_struct *tsk)
+{
+ /*
+ * To minimize risk the of exposure, architectures may clear a
+ * task's thread_info::shadow_call_stack while that task is
+ * running, and only save/restore the active shadow call stack
+ * pointer when the usual register may be clobbered (e.g. across
+ * context switches).
+ *
+ * The shadow call stack is aligned to SCS_SIZE, and grows
+ * upwards, so we can mask out the low bits to extract the base
+ * when the task is not running.
+ */
+ return (void *)((unsigned long)task_scs(tsk) & ~(SCS_SIZE - 1));
+}
+
+static inline unsigned long *scs_magic(void *s)
+{
+ return (unsigned long *)(s + SCS_SIZE) - 1;
+}
+
+static inline void scs_set_magic(void *s)
+{
+ *scs_magic(s) = SCS_END_MAGIC;
+}
+
+#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
+
+/* Matches NR_CACHED_STACKS for VMAP_STACK */
+#define NR_CACHED_SCS 2
+static DEFINE_PER_CPU(void *, scs_cache[NR_CACHED_SCS]);
+
+static void *scs_alloc(int node)
+{
+ int i;
+ void *s;
+
+ for (i = 0; i < NR_CACHED_SCS; i++) {
+ s = this_cpu_xchg(scs_cache[i], NULL);
+ if (s) {
+ memset(s, 0, SCS_SIZE);
+ goto out;
+ }
+ }
+
+ /*
+ * We allocate a full page for the shadow stack, which should be
+ * more than we need. Check the assumption nevertheless.
+ */
+ BUILD_BUG_ON(SCS_SIZE > PAGE_SIZE);
+
+ s = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
+ VMALLOC_START, VMALLOC_END,
+ GFP_SCS, PAGE_KERNEL, 0,
+ node, __builtin_return_address(0));
+
+out:
+ if (s)
+ scs_set_magic(s);
+ /* TODO: poison for KASAN, unpoison in scs_free */
+
+ return s;
+}
+
+static void scs_free(void *s)
+{
+ int i;
+
+ for (i = 0; i < NR_CACHED_SCS; i++)
+ if (this_cpu_cmpxchg(scs_cache[i], 0, s) == NULL)
+ return;
+
+ vfree_atomic(s);
+}
+
+static int scs_cleanup(unsigned int cpu)
+{
+ int i;
+ void **cache = per_cpu_ptr(scs_cache, cpu);
+
+ for (i = 0; i < NR_CACHED_SCS; i++) {
+ vfree(cache[i]);
+ cache[i] = NULL;
+ }
+
+ return 0;
+}
+
+void __init scs_init(void)
+{
+ WARN_ON(cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "scs:scs_cache", NULL,
+ scs_cleanup));
+}
+
+#else /* !CONFIG_SHADOW_CALL_STACK_VMAP */
+
+static struct kmem_cache *scs_cache;
+
+static inline void *scs_alloc(int node)
+{
+ void *s;
+
+ s = kmem_cache_alloc_node(scs_cache, GFP_SCS, node);
+ if (s) {
+ scs_set_magic(s);
+ /*
+ * Poison the allocation to catch unintentional accesses to
+ * the shadow stack when KASAN is enabled.
+ */
+ kasan_poison_object_data(scs_cache, s);
+ }
+
+ return s;
+}
+
+static inline void scs_free(void *s)
+{
+ kasan_unpoison_object_data(scs_cache, s);
+ kmem_cache_free(scs_cache, s);
+}
+
+void __init scs_init(void)
+{
+ scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE,
+ 0, NULL);
+ WARN_ON(!scs_cache);
+}
+
+#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+
+void scs_task_reset(struct task_struct *tsk)
+{
+ /*
+ * Reset the shadow stack to the base address in case the task
+ * is reused.
+ */
+ task_set_scs(tsk, __scs_base(tsk));
+}
+
+int scs_prepare(struct task_struct *tsk, int node)
+{
+ void *s;
+
+ s = scs_alloc(node);
+ if (!s)
+ return -ENOMEM;
+
+ task_set_scs(tsk, s);
+ return 0;
+}
+
+bool scs_corrupted(struct task_struct *tsk)
+{
+ unsigned long *magic = scs_magic(__scs_base(tsk));
+
+ return READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC;
+}
+
+void scs_release(struct task_struct *tsk)
+{
+ void *s;
+
+ s = __scs_base(tsk);
+ if (!s)
+ return;
+
+ WARN_ON(scs_corrupted(tsk));
+
+ task_set_scs(tsk, NULL);
+ scs_free(s);
+}