Message ID | 20220505161011.1801596-4-ardb@kernel.org (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | arm64: dynamic shadow call stack support | expand |
On Thu, May 5, 2022 at 9:10 AM Ard Biesheuvel <ardb@kernel.org> wrote: > > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig > index b6302f7cd73f..df7a7aff456a 100644 > --- a/arch/arm64/Kconfig > +++ b/arch/arm64/Kconfig > @@ -357,6 +357,14 @@ config KASAN_SHADOW_OFFSET > config UNWIND_TABLES > bool > > +config UNWIND_PATCH_PAC_INTO_SCS > + def_bool y > + depends on CC_IS_CLANG && CLANG_VERSION >= 150000 Consider adding a comment that links to the corresponding GCC bug report, which can be replaced with a version check once fixed. > + depends on SHADOW_CALL_STACK > + depends on ARM64_PTR_AUTH_KERNEL > + select UNWIND_TABLES > + select DYNAMIC_SCS > + > source "arch/arm64/Kconfig.platforms" > > menu "Kernel Features" > diff --git a/arch/arm64/kernel/patch-scs.c b/arch/arm64/kernel/patch-scs.c > new file mode 100644 > index 000000000000..8c534630c2a1 > --- /dev/null > +++ b/arch/arm64/kernel/patch-scs.c > @@ -0,0 +1,257 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * Copyright (C) 2022 - Google LLC > + * Author: Ard Biesheuvel <ardb@google.com> > + */ > + > +#include <linux/bug.h> > +#include <linux/errno.h> > +#include <linux/init.h> > +#include <linux/linkage.h> > +#include <linux/printk.h> > +#include <linux/types.h> > + > +#include <asm/cpufeature.h> > + > +#define DW_CFA_nop 0x00 > +#define DW_CFA_set_loc 0x01 > +#define DW_CFA_advance_loc1 0x02 > +#define DW_CFA_advance_loc2 0x03 > +#define DW_CFA_advance_loc4 0x04 > +#define DW_CFA_offset_extended 0x05 > +#define DW_CFA_restore_extended 0x06 > +#define DW_CFA_undefined 0x07 > +#define DW_CFA_same_value 0x08 > +#define DW_CFA_register 0x09 > +#define DW_CFA_remember_state 0x0a > +#define DW_CFA_restore_state 0x0b > +#define DW_CFA_def_cfa 0x0c > +#define DW_CFA_def_cfa_register 0x0d > +#define DW_CFA_def_cfa_offset 0x0e > +#define DW_CFA_def_cfa_expression 0x0f > +#define DW_CFA_expression 0x10 > +#define DW_CFA_offset_extended_sf 0x11 > +#define DW_CFA_def_cfa_sf 0x12 > +#define DW_CFA_def_cfa_offset_sf 0x13 > +#define DW_CFA_val_offset 0x14 > +#define DW_CFA_val_offset_sf 0x15 > +#define DW_CFA_val_expression 0x16 > +#define DW_CFA_lo_user 0x1c > +#define DW_CFA_negate_ra_state 0x2d > +#define DW_CFA_GNU_args_size 0x2e > +#define DW_CFA_GNU_negative_offset_extended 0x2f > +#define DW_CFA_hi_user 0x3f Might be more reusable to put these in their own header. Though, this is currently the only user, so perhaps "YAGNI." If there's some documentation to these values, consider adding a comment with a link. > + > +extern const u8 __eh_frame_start[], __eh_frame_end[]; > + > +struct fde_frame { > + s32 initial_loc; > + s32 range; > +}; > + > +enum { > + PACIASP = 0xd503233f, > + AUTIASP = 0xd50323bf, > + SCS_PUSH = 0xf800865e, > + SCS_POP = 0xf85f8e5e, > +}; Is there anything we can reuse from arch/arm64/include/asm/insn.h rather than hardcoding these values? > + > +static void __always_inline scs_patch_loc(u64 loc) > +{ > + u32 insn = le32_to_cpup((void *)loc); > + > + switch (insn) { > + case PACIASP: > + *(u32 *)loc = cpu_to_le32(SCS_PUSH); > + break; > + case AUTIASP: > + *(u32 *)loc = cpu_to_le32(SCS_POP); > + break; > + default: > + /* > + * While the DW_CFA_negate_ra_state directive is guaranteed to > + * appear right after a PACIASP/AUTIASP instruction, it may > + * also appear after a DW_CFA_restore_state directive that > + * restores a state that is only partially accurate, and is > + * followed by DW_CFA_negate_ra_state directive to toggle the > + * PAC bit again. So we permit other instructions here, and ignore > + * them. > + */ > + break; > + } > +} > + > +/* > + * Skip one uleb128/sleb128 encoded quantity from the opcode stream. All bytes > + * except the last one have bit #7 set. Consider using the BIT macro to express that. > + */ > +static int __always_inline skip_xleb128(const u8 **opcode, int size) > +{ > + u8 c; > + > + do { > + c = *(*opcode)++; > + size--; > + } while (c & 0x80); > + > + return size; > +} > + > +static int noinstr scs_handle_frame(const u8 eh_frame[], u32 size) > +{ > + const struct fde_frame *fde; > + const u8 *opcode; > + u64 loc; > + > + /* > + * For patching PAC opcodes, we only care about the FDE records, and > + * not the CIE, which carries the initial CFA directives but they only > + * pertain to which register is the stack pointer. > + * TODO this is not 100% true - we need the augmentation string and the > + * encoding but they are always the same in practice. > + */ > + if (*(u32 *)eh_frame == 0) > + return 0; > + > + fde = (const struct fde_frame *)(eh_frame + 4); > + loc = (u64)offset_to_ptr(&fde->initial_loc); > + opcode = (const u8 *)(fde + 1); > + > + // TODO check augmentation data > + WARN_ON(*opcode++); > + size -= sizeof(u32) + sizeof(*fde) + 1; > + > + /* > + * Starting from 'loc', apply the CFA opcodes that advance the location > + * pointer, and identify the locations of the PAC instructions. > + */ > + do { > + switch (*opcode & 0xC0) { > + case 0: > + // handle DW_CFA_xxx opcodes > + switch (*opcode) { > + case DW_CFA_nop: > + case DW_CFA_remember_state: > + case DW_CFA_restore_state: > + break; > + > + case DW_CFA_advance_loc1: > + loc += *++opcode; > + size--; > + break; > + > + case DW_CFA_advance_loc2: > + loc += *++opcode; > + loc += *++opcode << 8; > + size -= 2; > + break; > + > + case DW_CFA_def_cfa: > + opcode++; > + size = skip_xleb128(&opcode, --size); > + size = skip_xleb128(&opcode, size); > + continue; > + case DW_CFA_def_cfa_offset: > + case DW_CFA_def_cfa_offset_sf: > + case DW_CFA_def_cfa_register: > + case DW_CFA_same_value: > + opcode++; > + size = skip_xleb128(&opcode, --size); > + continue; > + > + case DW_CFA_negate_ra_state: > + scs_patch_loc(loc - 4); > + break; > + > + default: > + pr_err("unhandled opcode: %02x in FDE frame %lx\n", *opcode, (uintptr_t)eh_frame); I'm curious, if we made these identifiers enum values, then we could get coverage from -Wswitch. Though then there is perhaps a risk that new values do start getting produced by toolchains, and we'd miss adding the enum values in those cases. These nested switch statements make it hard to tell which values are handled where without doing some arithmetic. Would one level of switch statements with more cases be more readable? Or is there a goal to have a smaller initial-level switch table? > + return -ENOEXEC; > + } > + opcode++; > + size--; > + break; > + > + case 0x40: > + // advance loc > + loc += *opcode++ & 0x3f; > + size--; > + break; > + > + case 0x80: > + opcode++; > + size = skip_xleb128(&opcode, --size); > + continue; > + > + default: > + // ignore > + opcode++; > + size--; > + break; > + } > + } while (size > 0); > + > + return 0; > +} > + > +int noinstr scs_patch(const u8 eh_frame[], int size) > +{ > + const u8 *p = eh_frame; > + > + while (size > 4) { > + const u32 *frame_size = (const u32 *)p; > + int ret; > + > + if (*frame_size != -1 && *frame_size <= size) { > + ret = scs_handle_frame(p + 4, *frame_size); > + if (ret) > + return ret; > + p += 4 + *frame_size; > + size -= 4 + *frame_size; > + } > + } > + return 0; > +} > + > +extern struct arm64_ftr_override id_aa64isar1_override; > +extern struct arm64_ftr_override id_aa64isar2_override; > +extern struct arm64_ftr_override id_aa64pfr1_override; Are these linker defined symbols, like __eh_frame_start/__eh_frame_end?
On Thu, May 05, 2022 at 06:10:11PM +0200, Ard Biesheuvel wrote: > [...] > + /* > + * We only enable the shadow call stack dynamically if we are running > + * on a system that does not implement PAC or BTI. PAC and SCS roughly > + * provide the same level of protection, and BTI relies on the PACIASP > + * instructions serving as landing pads, preventing us from patching > + * those instructions into something else. > + */ If BTI relies on PAC, then we only need to check for PAC, yes? I.e. there isn't going to be a device with BTI but without PAC.
On Fri, 6 May 2022 at 02:00, Kees Cook <keescook@chromium.org> wrote: > > On Thu, May 05, 2022 at 06:10:11PM +0200, Ard Biesheuvel wrote: > > [...] > > + /* > > + * We only enable the shadow call stack dynamically if we are running > > + * on a system that does not implement PAC or BTI. PAC and SCS roughly > > + * provide the same level of protection, and BTI relies on the PACIASP > > + * instructions serving as landing pads, preventing us from patching > > + * those instructions into something else. > > + */ > > If BTI relies on PAC, then we only need to check for PAC, yes? I.e. > there isn't going to be a device with BTI but without PAC. > BTI does not rely on PAC, but PACIASP serves as an implicit BTI instruction as well as a 'sign return address' instruction, given that it usually appears at the start of a function. So we cannot patch it away in that case, but I expect this to be a rare case anyway.
On Thu, May 5, 2022 at 2:01 PM Nick Desaulniers <ndesaulniers@google.com> wrote: > > On Thu, May 5, 2022 at 9:10 AM Ard Biesheuvel <ardb@kernel.org> wrote: > > +enum { > > + PACIASP = 0xd503233f, > > + AUTIASP = 0xd50323bf, > > + SCS_PUSH = 0xf800865e, > > + SCS_POP = 0xf85f8e5e, > > +}; > > Is there anything we can reuse from arch/arm64/include/asm/insn.h > rather than hardcoding these values? I think hardcoding the instructions should be fine. You could use aarch64_insn_gen_hint for the first two, but the last time I checked we didn't have code for generating the SCS push/pop instructions. Sami
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b6302f7cd73f..df7a7aff456a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -357,6 +357,14 @@ config KASAN_SHADOW_OFFSET config UNWIND_TABLES bool +config UNWIND_PATCH_PAC_INTO_SCS + def_bool y + depends on CC_IS_CLANG && CLANG_VERSION >= 150000 + depends on SHADOW_CALL_STACK + depends on ARM64_PTR_AUTH_KERNEL + select UNWIND_TABLES + select DYNAMIC_SCS + source "arch/arm64/Kconfig.platforms" menu "Kernel Features" diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h index 8297bccf0784..09aed251e695 100644 --- a/arch/arm64/include/asm/scs.h +++ b/arch/arm64/include/asm/scs.h @@ -24,6 +24,18 @@ .endm #endif /* CONFIG_SHADOW_CALL_STACK */ + +#else + + +#ifdef CONFIG_UNWIND_PATCH_PAC_TO_SCS +extern bool should_disable_dynamic_scs; +#else +#define should_disable_dynamic_scs (false) +#endif + +int scs_patch(const u8 eh_frame[], int size); + #endif /* __ASSEMBLY __ */ #endif /* _ASM_SCS_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 986837d7ec82..d7074470031a 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -74,6 +74,8 @@ obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o obj-$(CONFIG_ARM64_MTE) += mte.o obj-y += vdso-wrap.o obj-$(CONFIG_COMPAT_VDSO) += vdso32-wrap.o +obj-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) += patch-scs.o +CFLAGS_patch-scs.o += -mbranch-protection=none obj-y += probes/ head-y := head.o diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 6a98f1a38c29..e9601c8a1bcd 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -453,6 +453,9 @@ SYM_FUNC_START_LOCAL(__primary_switched) mov x0, x21 // pass FDT address in x0 bl early_fdt_map // Try mapping the FDT early bl init_feature_override // Parse cpu feature overrides +#ifdef CONFIG_UNWIND_PATCH_PAC_INTO_SCS + bl scs_patch_vmlinux +#endif #ifdef CONFIG_RANDOMIZE_BASE tst x23, ~(MIN_KIMG_ALIGN - 1) // already running randomized? b.ne 0f diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index bda49430c9ea..c284ec35c27c 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -39,7 +39,7 @@ static void init_irq_scs(void) { int cpu; - if (!IS_ENABLED(CONFIG_SHADOW_CALL_STACK)) + if (!scs_is_enabled()) return; for_each_possible_cpu(cpu) diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index f2d4bb14bfab..7e9e63600d28 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -18,6 +18,7 @@ #include <linux/vmalloc.h> #include <asm/alternative.h> #include <asm/insn.h> +#include <asm/scs.h> #include <asm/sections.h> void *module_alloc(unsigned long size) @@ -529,5 +530,14 @@ int module_finalize(const Elf_Ehdr *hdr, if (s) apply_alternatives_module((void *)s->sh_addr, s->sh_size); + if (IS_ENABLED(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) && + !system_supports_address_auth() && + (!IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) || !system_supports_bti())) { + + s = find_section(hdr, sechdrs, ".init.eh_frame"); + if (s) + scs_patch((void *)s->sh_addr, s->sh_size); + } + return module_init_ftrace_plt(hdr, sechdrs, me); } diff --git a/arch/arm64/kernel/patch-scs.c b/arch/arm64/kernel/patch-scs.c new file mode 100644 index 000000000000..8c534630c2a1 --- /dev/null +++ b/arch/arm64/kernel/patch-scs.c @@ -0,0 +1,257 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2022 - Google LLC + * Author: Ard Biesheuvel <ardb@google.com> + */ + +#include <linux/bug.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/linkage.h> +#include <linux/printk.h> +#include <linux/types.h> + +#include <asm/cpufeature.h> + +#define DW_CFA_nop 0x00 +#define DW_CFA_set_loc 0x01 +#define DW_CFA_advance_loc1 0x02 +#define DW_CFA_advance_loc2 0x03 +#define DW_CFA_advance_loc4 0x04 +#define DW_CFA_offset_extended 0x05 +#define DW_CFA_restore_extended 0x06 +#define DW_CFA_undefined 0x07 +#define DW_CFA_same_value 0x08 +#define DW_CFA_register 0x09 +#define DW_CFA_remember_state 0x0a +#define DW_CFA_restore_state 0x0b +#define DW_CFA_def_cfa 0x0c +#define DW_CFA_def_cfa_register 0x0d +#define DW_CFA_def_cfa_offset 0x0e +#define DW_CFA_def_cfa_expression 0x0f +#define DW_CFA_expression 0x10 +#define DW_CFA_offset_extended_sf 0x11 +#define DW_CFA_def_cfa_sf 0x12 +#define DW_CFA_def_cfa_offset_sf 0x13 +#define DW_CFA_val_offset 0x14 +#define DW_CFA_val_offset_sf 0x15 +#define DW_CFA_val_expression 0x16 +#define DW_CFA_lo_user 0x1c +#define DW_CFA_negate_ra_state 0x2d +#define DW_CFA_GNU_args_size 0x2e +#define DW_CFA_GNU_negative_offset_extended 0x2f +#define DW_CFA_hi_user 0x3f + +extern const u8 __eh_frame_start[], __eh_frame_end[]; + +struct fde_frame { + s32 initial_loc; + s32 range; +}; + +enum { + PACIASP = 0xd503233f, + AUTIASP = 0xd50323bf, + SCS_PUSH = 0xf800865e, + SCS_POP = 0xf85f8e5e, +}; + +static void __always_inline scs_patch_loc(u64 loc) +{ + u32 insn = le32_to_cpup((void *)loc); + + switch (insn) { + case PACIASP: + *(u32 *)loc = cpu_to_le32(SCS_PUSH); + break; + case AUTIASP: + *(u32 *)loc = cpu_to_le32(SCS_POP); + break; + default: + /* + * While the DW_CFA_negate_ra_state directive is guaranteed to + * appear right after a PACIASP/AUTIASP instruction, it may + * also appear after a DW_CFA_restore_state directive that + * restores a state that is only partially accurate, and is + * followed by DW_CFA_negate_ra_state directive to toggle the + * PAC bit again. So we permit other instructions here, and ignore + * them. + */ + break; + } +} + +/* + * Skip one uleb128/sleb128 encoded quantity from the opcode stream. All bytes + * except the last one have bit #7 set. + */ +static int __always_inline skip_xleb128(const u8 **opcode, int size) +{ + u8 c; + + do { + c = *(*opcode)++; + size--; + } while (c & 0x80); + + return size; +} + +static int noinstr scs_handle_frame(const u8 eh_frame[], u32 size) +{ + const struct fde_frame *fde; + const u8 *opcode; + u64 loc; + + /* + * For patching PAC opcodes, we only care about the FDE records, and + * not the CIE, which carries the initial CFA directives but they only + * pertain to which register is the stack pointer. + * TODO this is not 100% true - we need the augmentation string and the + * encoding but they are always the same in practice. + */ + if (*(u32 *)eh_frame == 0) + return 0; + + fde = (const struct fde_frame *)(eh_frame + 4); + loc = (u64)offset_to_ptr(&fde->initial_loc); + opcode = (const u8 *)(fde + 1); + + // TODO check augmentation data + WARN_ON(*opcode++); + size -= sizeof(u32) + sizeof(*fde) + 1; + + /* + * Starting from 'loc', apply the CFA opcodes that advance the location + * pointer, and identify the locations of the PAC instructions. + */ + do { + switch (*opcode & 0xC0) { + case 0: + // handle DW_CFA_xxx opcodes + switch (*opcode) { + case DW_CFA_nop: + case DW_CFA_remember_state: + case DW_CFA_restore_state: + break; + + case DW_CFA_advance_loc1: + loc += *++opcode; + size--; + break; + + case DW_CFA_advance_loc2: + loc += *++opcode; + loc += *++opcode << 8; + size -= 2; + break; + + case DW_CFA_def_cfa: + opcode++; + size = skip_xleb128(&opcode, --size); + size = skip_xleb128(&opcode, size); + continue; + case DW_CFA_def_cfa_offset: + case DW_CFA_def_cfa_offset_sf: + case DW_CFA_def_cfa_register: + case DW_CFA_same_value: + opcode++; + size = skip_xleb128(&opcode, --size); + continue; + + case DW_CFA_negate_ra_state: + scs_patch_loc(loc - 4); + break; + + default: + pr_err("unhandled opcode: %02x in FDE frame %lx\n", *opcode, (uintptr_t)eh_frame); + return -ENOEXEC; + } + opcode++; + size--; + break; + + case 0x40: + // advance loc + loc += *opcode++ & 0x3f; + size--; + break; + + case 0x80: + opcode++; + size = skip_xleb128(&opcode, --size); + continue; + + default: + // ignore + opcode++; + size--; + break; + } + } while (size > 0); + + return 0; +} + +int noinstr scs_patch(const u8 eh_frame[], int size) +{ + const u8 *p = eh_frame; + + while (size > 4) { + const u32 *frame_size = (const u32 *)p; + int ret; + + if (*frame_size != -1 && *frame_size <= size) { + ret = scs_handle_frame(p + 4, *frame_size); + if (ret) + return ret; + p += 4 + *frame_size; + size -= 4 + *frame_size; + } + } + return 0; +} + +extern struct arm64_ftr_override id_aa64isar1_override; +extern struct arm64_ftr_override id_aa64isar2_override; +extern struct arm64_ftr_override id_aa64pfr1_override; + +bool __initdata should_disable_dynamic_scs = true; + +asmlinkage void __init scs_patch_vmlinux(void) +{ + /* + * We only enable the shadow call stack dynamically if we are running + * on a system that does not implement PAC or BTI. PAC and SCS roughly + * provide the same level of protection, and BTI relies on the PACIASP + * instructions serving as landing pads, preventing us from patching + * those instructions into something else. + */ + u64 reg = read_sysreg_s(SYS_ID_AA64ISAR1_EL1); + + reg &= ~id_aa64isar1_override.mask; + reg |= id_aa64isar1_override.val; + + if (reg & ((0xf << ID_AA64ISAR1_APA_SHIFT) | + (0xf << ID_AA64ISAR1_API_SHIFT))) + return; + + reg = read_sysreg_s(SYS_ID_AA64ISAR2_EL1); + reg &= ~id_aa64isar2_override.mask; + reg |= id_aa64isar2_override.val; + + if (reg & (0xf << ID_AA64ISAR2_APA3_SHIFT)) + return; + + if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) { + reg = read_sysreg_s(SYS_ID_AA64PFR1_EL1); + reg &= ~id_aa64pfr1_override.mask; + reg |= id_aa64pfr1_override.val; + + if (reg & (0xf << ID_AA64PFR1_BT_SHIFT)) + return; + } + + WARN_ON(scs_patch(__eh_frame_start, __eh_frame_end - __eh_frame_start)); + should_disable_dynamic_scs = false; +} diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index d20620a1c51a..30f3c7563694 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -144,7 +144,7 @@ static int init_sdei_scs(void) int cpu; int err = 0; - if (!IS_ENABLED(CONFIG_SHADOW_CALL_STACK)) + if (!scs_is_enabled()) return 0; for_each_possible_cpu(cpu) { diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 3505789cf4bd..17fad5749f4a 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -30,6 +30,7 @@ #include <linux/efi.h> #include <linux/psci.h> #include <linux/sched/task.h> +#include <linux/scs.h> #include <linux/mm.h> #include <asm/acpi.h> @@ -42,6 +43,7 @@ #include <asm/cpu_ops.h> #include <asm/kasan.h> #include <asm/numa.h> +#include <asm/scs.h> #include <asm/sections.h> #include <asm/setup.h> #include <asm/smp_plat.h> @@ -323,6 +325,9 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) jump_label_init(); parse_early_param(); + if (should_disable_dynamic_scs) + static_branch_disable(&dynamic_scs_enabled); + /* * Unmask asynchronous aborts and fiq after bringing up possible * earlycon. (Report possible System Errors once we can report this
Implement dynamic shadow call stack support on Clang, by parsing the unwind tables at init time to locate all occurrences of PACIASP/AUTIASP instructions, and replacing them with the shadow call stack push and pop instructions, respectively. This is useful because the overhead of the shadow call stack is difficult to justify on hardware that implements pointer authentication (PAC), and given that the PAC instructions are executed as NOPs on hardware that doesn't, we can just replace them without breaking anything. As PACIASP/AUTIASP are guaranteed to be paired with respect to manipulations of the return address, replacing them 1:1 with shadow call stack pushes and pops is guaranteed to result in the expected behavior. Signed-off-by: Ard Biesheuvel <ardb@kernel.org> --- arch/arm64/Kconfig | 8 + arch/arm64/include/asm/scs.h | 12 + arch/arm64/kernel/Makefile | 2 + arch/arm64/kernel/head.S | 3 + arch/arm64/kernel/irq.c | 2 +- arch/arm64/kernel/module.c | 10 + arch/arm64/kernel/patch-scs.c | 257 ++++++++++++++++++++ arch/arm64/kernel/sdei.c | 2 +- arch/arm64/kernel/setup.c | 5 + 9 files changed, 299 insertions(+), 2 deletions(-)