Message ID | 20210702194518.2048539-1-pcc@google.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | [v2] arm64: mte: switch GCR_EL1 on task switch rather than entry/exit | expand |
On Fri, Jul 2, 2021 at 9:45 PM Peter Collingbourne <pcc@google.com> wrote: > > Accessing GCR_EL1 and issuing an ISB can be expensive on some > microarchitectures. To avoid taking this performance hit on every > kernel entry/exit, switch GCR_EL1 on task switch rather than > entry/exit. This is essentially a revert of commit bad1e1c663e0 > ("arm64: mte: switch GCR_EL1 in kernel entry and exit"). > > This requires changing how we generate random tags for HW tag-based > KASAN, since at this point IRG would use the user's exclusion mask, > which may not be suitable for kernel use. In this patch I chose to take > the modulus of CNTVCT_EL0, however alternative approaches are possible. > > Signed-off-by: Peter Collingbourne <pcc@google.com> > Link: https://linux-review.googlesource.com/id/I560a190a74176ca4cc5191dad08f77f6b1577c75 > --- > v2: > - rebase onto v9 of the tag checking mode preference series > > arch/arm64/include/asm/mte-kasan.h | 15 ++++++--- > arch/arm64/include/asm/mte.h | 2 -- > arch/arm64/kernel/entry.S | 41 ----------------------- > arch/arm64/kernel/mte.c | 54 +++++++++++------------------- > 4 files changed, 29 insertions(+), 83 deletions(-) > > diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h > index ddd4d17cf9a0..e9b3c1bdbba3 100644 > --- a/arch/arm64/include/asm/mte-kasan.h > +++ b/arch/arm64/include/asm/mte-kasan.h > @@ -13,6 +13,8 @@ > > #ifdef CONFIG_ARM64_MTE > > +extern u64 mte_tag_mod; > + > /* > * These functions are meant to be only used from KASAN runtime through > * the arch_*() interface defined in asm/memory.h. > @@ -37,15 +39,18 @@ static inline u8 mte_get_mem_tag(void *addr) > return mte_get_ptr_tag(addr); > } > > -/* Generate a random tag. */ > +/* > + * Generate a random tag. We can't use IRG because the user's GCR_EL1 is still > + * installed for performance reasons. Instead, take the modulus of the > + * architected timer which should be random enough for our purposes. It's unfortunate that we can't use the instruction that was specifically designed for random tag generation. > + */ > static inline u8 mte_get_random_tag(void) > { > - void *addr; > + u64 cntvct; > > - asm(__MTE_PREAMBLE "irg %0, %0" > - : "=r" (addr)); > + asm("mrs %0, cntvct_el0" : "=r"(cntvct)); > > - return mte_get_ptr_tag(addr); > + return 0xF0 | (cntvct % mte_tag_mod); > } > > /* > diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h > index bc88a1ced0d7..412b94efcb11 100644 > --- a/arch/arm64/include/asm/mte.h > +++ b/arch/arm64/include/asm/mte.h > @@ -16,8 +16,6 @@ > > #include <asm/pgtable-types.h> > > -extern u64 gcr_kernel_excl; > - > void mte_clear_page_tags(void *addr); > unsigned long mte_copy_tags_from_user(void *to, const void __user *from, > unsigned long n); > diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S > index ce59280355c5..c95bfe145639 100644 > --- a/arch/arm64/kernel/entry.S > +++ b/arch/arm64/kernel/entry.S > @@ -175,43 +175,6 @@ alternative_else_nop_endif > #endif > .endm > > - .macro mte_set_gcr, tmp, tmp2 > -#ifdef CONFIG_ARM64_MTE > - /* > - * Calculate and set the exclude mask preserving > - * the RRND (bit[16]) setting. > - */ > - mrs_s \tmp2, SYS_GCR_EL1 > - bfxil \tmp2, \tmp, #MTE_CTRL_GCR_USER_EXCL_SHIFT, #16 > - msr_s SYS_GCR_EL1, \tmp2 > -#endif > - .endm > - > - .macro mte_set_kernel_gcr, tmp, tmp2 > -#ifdef CONFIG_KASAN_HW_TAGS > -alternative_if_not ARM64_MTE > - b 1f > -alternative_else_nop_endif > - ldr_l \tmp, gcr_kernel_excl > - > - mte_set_gcr \tmp, \tmp2 > - isb > -1: > -#endif > - .endm > - > - .macro mte_set_user_gcr, tsk, tmp, tmp2 > -#ifdef CONFIG_ARM64_MTE > -alternative_if_not ARM64_MTE > - b 1f > -alternative_else_nop_endif > - ldr \tmp, [\tsk, #THREAD_MTE_CTRL] > - > - mte_set_gcr \tmp, \tmp2 > -1: > -#endif > - .endm > - > .macro kernel_entry, el, regsize = 64 > .if \regsize == 32 > mov w0, w0 // zero upper 32 bits of x0 > @@ -273,8 +236,6 @@ alternative_if ARM64_HAS_ADDRESS_AUTH > alternative_else_nop_endif > #endif > > - mte_set_kernel_gcr x22, x23 > - > scs_load tsk, x20 > .else > add x21, sp, #PT_REGS_SIZE > @@ -398,8 +359,6 @@ alternative_if ARM64_HAS_ADDRESS_AUTH > alternative_else_nop_endif > #endif > > - mte_set_user_gcr tsk, x0, x1 > - > apply_ssbd 0, x0, x1 > .endif > > diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c > index 48f218e070cc..b8d3e0b20702 100644 > --- a/arch/arm64/kernel/mte.c > +++ b/arch/arm64/kernel/mte.c > @@ -23,7 +23,7 @@ > #include <asm/ptrace.h> > #include <asm/sysreg.h> > > -u64 gcr_kernel_excl __ro_after_init; > +u64 mte_tag_mod __ro_after_init; > > static bool report_fault_once = true; > > @@ -98,22 +98,7 @@ int memcmp_pages(struct page *page1, struct page *page2) > > void mte_init_tags(u64 max_tag) > { > - static bool gcr_kernel_excl_initialized; > - > - if (!gcr_kernel_excl_initialized) { > - /* > - * The format of the tags in KASAN is 0xFF and in MTE is 0xF. > - * This conversion extracts an MTE tag from a KASAN tag. > - */ > - u64 incl = GENMASK(FIELD_GET(MTE_TAG_MASK >> MTE_TAG_SHIFT, > - max_tag), 0); > - > - gcr_kernel_excl = ~incl & SYS_GCR_EL1_EXCL_MASK; > - gcr_kernel_excl_initialized = true; > - } > - > - /* Enable the kernel exclude mask for random tags generation. */ > - write_sysreg_s(SYS_GCR_EL1_RRND | gcr_kernel_excl, SYS_GCR_EL1); > + mte_tag_mod = (max_tag & 0xF) + 1; > } > > static inline void __mte_enable_kernel(const char *mode, unsigned long tcf) > @@ -188,19 +173,7 @@ void mte_check_tfsr_el1(void) > } > #endif > > -static void update_gcr_el1_excl(u64 excl) > -{ > - > - /* > - * Note that the mask controlled by the user via prctl() is an > - * include while GCR_EL1 accepts an exclude mask. > - * No need for ISB since this only affects EL0 currently, implicit > - * with ERET. > - */ > - sysreg_clear_set_s(SYS_GCR_EL1, SYS_GCR_EL1_EXCL_MASK, excl); > -} > - > -static void mte_update_sctlr_user(struct task_struct *task) > +static void mte_sync_ctrl(struct task_struct *task) > { > /* > * This can only be called on the current or next task since the CPU > @@ -219,6 +192,17 @@ static void mte_update_sctlr_user(struct task_struct *task) > else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC) > sctlr |= SCTLR_EL1_TCF0_SYNC; > task->thread.sctlr_user = sctlr; > + > + /* > + * Note that the mask controlled by the user via prctl() is an > + * include while GCR_EL1 accepts an exclude mask. > + * No need for ISB since this only affects EL0 currently, implicit > + * with ERET. > + */ > + sysreg_clear_set_s(SYS_GCR_EL1, SYS_GCR_EL1_EXCL_MASK, > + (mte_ctrl & MTE_CTRL_GCR_USER_EXCL_MASK) >> > + MTE_CTRL_GCR_USER_EXCL_SHIFT); > + > preempt_enable(); > } > > @@ -233,13 +217,13 @@ void mte_thread_init_user(void) > clear_thread_flag(TIF_MTE_ASYNC_FAULT); > /* disable tag checking and reset tag generation mask */ > current->thread.mte_ctrl = MTE_CTRL_GCR_USER_EXCL_MASK; > - mte_update_sctlr_user(current); > + mte_sync_ctrl(current); > set_task_sctlr_el1(current->thread.sctlr_user); > } > > void mte_thread_switch(struct task_struct *next) > { > - mte_update_sctlr_user(next); > + mte_sync_ctrl(next); > > /* > * Check if an async tag exception occurred at EL1. > @@ -273,7 +257,7 @@ void mte_suspend_exit(void) > if (!system_supports_mte()) > return; > > - update_gcr_el1_excl(gcr_kernel_excl); > + mte_sync_ctrl(current); > } > > long set_mte_ctrl(struct task_struct *task, unsigned long arg) > @@ -291,7 +275,7 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg) > > task->thread.mte_ctrl = mte_ctrl; > if (task == current) { > - mte_update_sctlr_user(task); > + mte_sync_ctrl(task); > set_task_sctlr_el1(task->thread.sctlr_user); > } > > @@ -467,7 +451,7 @@ static ssize_t mte_tcf_preferred_show(struct device *dev, > > static void sync_sctlr(void *arg) > { > - mte_update_sctlr_user(current); > + mte_sync_ctrl(current); > set_task_sctlr_el1(current->thread.sctlr_user); > } > > -- > 2.32.0.93.g670b81a890-goog > Acked-by: Andrey Konovalov <andreyknvl@gmail.com>
On Fri, Jul 02, 2021 at 12:45:18PM -0700, Peter Collingbourne wrote: > Accessing GCR_EL1 and issuing an ISB can be expensive on some > microarchitectures. To avoid taking this performance hit on every > kernel entry/exit, switch GCR_EL1 on task switch rather than > entry/exit. This is essentially a revert of commit bad1e1c663e0 > ("arm64: mte: switch GCR_EL1 in kernel entry and exit"). As per the discussion in v1, we can avoid an ISB, though we are still left with the GCR_EL1 access. I'm surprised that access to a non self-synchronising register is that expensive but I suspect the benchmark is just timing a dummy syscall. I'm not asking for numbers but I'd like to make sure we don't optimise for unrealistic use-cases. Is something like a geekbench score affected for example? While we can get rid of the IRG in the kernel, at some point we may want to use ADDG as generated by the compiler. That too is affected by the GCR_EL1.Exclude mask. > This requires changing how we generate random tags for HW tag-based > KASAN, since at this point IRG would use the user's exclusion mask, > which may not be suitable for kernel use. In this patch I chose to take > the modulus of CNTVCT_EL0, however alternative approaches are possible. So a few successive mte_get_mem_tag() will give the same result if the counter hasn't changed. Even if ARMv8.6 requires a 1GHz timer frequency, I think an implementation is allowed to count in bigger increments. I'm inclined to NAK this patch on the grounds that we may need a specific GCR_EL1 configuration for the kernel. Feedback to the microarchitects: make access to this register faster.
On Mon, Jul 5, 2021 at 5:52 AM Catalin Marinas <catalin.marinas@arm.com> wrote: > > On Fri, Jul 02, 2021 at 12:45:18PM -0700, Peter Collingbourne wrote: > > Accessing GCR_EL1 and issuing an ISB can be expensive on some > > microarchitectures. To avoid taking this performance hit on every > > kernel entry/exit, switch GCR_EL1 on task switch rather than > > entry/exit. This is essentially a revert of commit bad1e1c663e0 > > ("arm64: mte: switch GCR_EL1 in kernel entry and exit"). > > As per the discussion in v1, we can avoid an ISB, though we are still > left with the GCR_EL1 access. I'm surprised that access to a non > self-synchronising register is that expensive but I suspect the > benchmark is just timing a dummy syscall. I'm not asking for numbers but > I'd like to make sure we don't optimise for unrealistic use-cases. Is > something like a geekbench score affected for example? FWIW, I was using this test program: https://patchwork.kernel.org/project/linux-arm-kernel/patch/20200801011152.39838-1-pcc@google.com/#23572981 Since it's an invalid syscall it's a good way to measure the effect of changes to entry/exit in isolation, but it does mean that we need to be careful when also making changes elsewhere in the kernel, as will become apparent in a moment. > While we can get rid of the IRG in the kernel, at some point we may want > to use ADDG as generated by the compiler. That too is affected by the > GCR_EL1.Exclude mask. > > > This requires changing how we generate random tags for HW tag-based > > KASAN, since at this point IRG would use the user's exclusion mask, > > which may not be suitable for kernel use. In this patch I chose to take > > the modulus of CNTVCT_EL0, however alternative approaches are possible. > > So a few successive mte_get_mem_tag() will give the same result if the > counter hasn't changed. Even if ARMv8.6 requires a 1GHz timer frequency, > I think an implementation is allowed to count in bigger increments. Yes, I observed that Apple M1 for example counts in increments of 16. Taking the modulus of the timer would happen to work as long as the increment is small enough (since it would mean that the timer would likely have incremented by the time we need to make another allocation) and a power of 2 (to ensure that we permute through all of the possible tag values), which I would expect to be the case on most microarchitectures. However, I developed an in-kernel allocator microbenchmark which revealed a more important issue with this patch, which is that on most cores switching from IRG to reading the timer costs more than the performance improvement from switching from the single ISB patch to the GCR on task switch patch. Which means that if KASAN is enabled, a single allocation would wipe out the performance improvement from avoiding touching GCR on entry/exit. I also tried a number of alternative approaches and they were also too expensive. So now I am less inclined to push for an approach that avoids touching GCR on entry/exit. > BTW, can you also modify mte_set_kernel_gcr to only do a write to the > GCR_EL1 register rather than a read-modify-write? Yes, this helps a bit. In v3 I now do this as well as single ISB. Peter
diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h index ddd4d17cf9a0..e9b3c1bdbba3 100644 --- a/arch/arm64/include/asm/mte-kasan.h +++ b/arch/arm64/include/asm/mte-kasan.h @@ -13,6 +13,8 @@ #ifdef CONFIG_ARM64_MTE +extern u64 mte_tag_mod; + /* * These functions are meant to be only used from KASAN runtime through * the arch_*() interface defined in asm/memory.h. @@ -37,15 +39,18 @@ static inline u8 mte_get_mem_tag(void *addr) return mte_get_ptr_tag(addr); } -/* Generate a random tag. */ +/* + * Generate a random tag. We can't use IRG because the user's GCR_EL1 is still + * installed for performance reasons. Instead, take the modulus of the + * architected timer which should be random enough for our purposes. + */ static inline u8 mte_get_random_tag(void) { - void *addr; + u64 cntvct; - asm(__MTE_PREAMBLE "irg %0, %0" - : "=r" (addr)); + asm("mrs %0, cntvct_el0" : "=r"(cntvct)); - return mte_get_ptr_tag(addr); + return 0xF0 | (cntvct % mte_tag_mod); } /* diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index bc88a1ced0d7..412b94efcb11 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -16,8 +16,6 @@ #include <asm/pgtable-types.h> -extern u64 gcr_kernel_excl; - void mte_clear_page_tags(void *addr); unsigned long mte_copy_tags_from_user(void *to, const void __user *from, unsigned long n); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index ce59280355c5..c95bfe145639 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -175,43 +175,6 @@ alternative_else_nop_endif #endif .endm - .macro mte_set_gcr, tmp, tmp2 -#ifdef CONFIG_ARM64_MTE - /* - * Calculate and set the exclude mask preserving - * the RRND (bit[16]) setting. - */ - mrs_s \tmp2, SYS_GCR_EL1 - bfxil \tmp2, \tmp, #MTE_CTRL_GCR_USER_EXCL_SHIFT, #16 - msr_s SYS_GCR_EL1, \tmp2 -#endif - .endm - - .macro mte_set_kernel_gcr, tmp, tmp2 -#ifdef CONFIG_KASAN_HW_TAGS -alternative_if_not ARM64_MTE - b 1f -alternative_else_nop_endif - ldr_l \tmp, gcr_kernel_excl - - mte_set_gcr \tmp, \tmp2 - isb -1: -#endif - .endm - - .macro mte_set_user_gcr, tsk, tmp, tmp2 -#ifdef CONFIG_ARM64_MTE -alternative_if_not ARM64_MTE - b 1f -alternative_else_nop_endif - ldr \tmp, [\tsk, #THREAD_MTE_CTRL] - - mte_set_gcr \tmp, \tmp2 -1: -#endif - .endm - .macro kernel_entry, el, regsize = 64 .if \regsize == 32 mov w0, w0 // zero upper 32 bits of x0 @@ -273,8 +236,6 @@ alternative_if ARM64_HAS_ADDRESS_AUTH alternative_else_nop_endif #endif - mte_set_kernel_gcr x22, x23 - scs_load tsk, x20 .else add x21, sp, #PT_REGS_SIZE @@ -398,8 +359,6 @@ alternative_if ARM64_HAS_ADDRESS_AUTH alternative_else_nop_endif #endif - mte_set_user_gcr tsk, x0, x1 - apply_ssbd 0, x0, x1 .endif diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 48f218e070cc..b8d3e0b20702 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -23,7 +23,7 @@ #include <asm/ptrace.h> #include <asm/sysreg.h> -u64 gcr_kernel_excl __ro_after_init; +u64 mte_tag_mod __ro_after_init; static bool report_fault_once = true; @@ -98,22 +98,7 @@ int memcmp_pages(struct page *page1, struct page *page2) void mte_init_tags(u64 max_tag) { - static bool gcr_kernel_excl_initialized; - - if (!gcr_kernel_excl_initialized) { - /* - * The format of the tags in KASAN is 0xFF and in MTE is 0xF. - * This conversion extracts an MTE tag from a KASAN tag. - */ - u64 incl = GENMASK(FIELD_GET(MTE_TAG_MASK >> MTE_TAG_SHIFT, - max_tag), 0); - - gcr_kernel_excl = ~incl & SYS_GCR_EL1_EXCL_MASK; - gcr_kernel_excl_initialized = true; - } - - /* Enable the kernel exclude mask for random tags generation. */ - write_sysreg_s(SYS_GCR_EL1_RRND | gcr_kernel_excl, SYS_GCR_EL1); + mte_tag_mod = (max_tag & 0xF) + 1; } static inline void __mte_enable_kernel(const char *mode, unsigned long tcf) @@ -188,19 +173,7 @@ void mte_check_tfsr_el1(void) } #endif -static void update_gcr_el1_excl(u64 excl) -{ - - /* - * Note that the mask controlled by the user via prctl() is an - * include while GCR_EL1 accepts an exclude mask. - * No need for ISB since this only affects EL0 currently, implicit - * with ERET. - */ - sysreg_clear_set_s(SYS_GCR_EL1, SYS_GCR_EL1_EXCL_MASK, excl); -} - -static void mte_update_sctlr_user(struct task_struct *task) +static void mte_sync_ctrl(struct task_struct *task) { /* * This can only be called on the current or next task since the CPU @@ -219,6 +192,17 @@ static void mte_update_sctlr_user(struct task_struct *task) else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC) sctlr |= SCTLR_EL1_TCF0_SYNC; task->thread.sctlr_user = sctlr; + + /* + * Note that the mask controlled by the user via prctl() is an + * include while GCR_EL1 accepts an exclude mask. + * No need for ISB since this only affects EL0 currently, implicit + * with ERET. + */ + sysreg_clear_set_s(SYS_GCR_EL1, SYS_GCR_EL1_EXCL_MASK, + (mte_ctrl & MTE_CTRL_GCR_USER_EXCL_MASK) >> + MTE_CTRL_GCR_USER_EXCL_SHIFT); + preempt_enable(); } @@ -233,13 +217,13 @@ void mte_thread_init_user(void) clear_thread_flag(TIF_MTE_ASYNC_FAULT); /* disable tag checking and reset tag generation mask */ current->thread.mte_ctrl = MTE_CTRL_GCR_USER_EXCL_MASK; - mte_update_sctlr_user(current); + mte_sync_ctrl(current); set_task_sctlr_el1(current->thread.sctlr_user); } void mte_thread_switch(struct task_struct *next) { - mte_update_sctlr_user(next); + mte_sync_ctrl(next); /* * Check if an async tag exception occurred at EL1. @@ -273,7 +257,7 @@ void mte_suspend_exit(void) if (!system_supports_mte()) return; - update_gcr_el1_excl(gcr_kernel_excl); + mte_sync_ctrl(current); } long set_mte_ctrl(struct task_struct *task, unsigned long arg) @@ -291,7 +275,7 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg) task->thread.mte_ctrl = mte_ctrl; if (task == current) { - mte_update_sctlr_user(task); + mte_sync_ctrl(task); set_task_sctlr_el1(task->thread.sctlr_user); } @@ -467,7 +451,7 @@ static ssize_t mte_tcf_preferred_show(struct device *dev, static void sync_sctlr(void *arg) { - mte_update_sctlr_user(current); + mte_sync_ctrl(current); set_task_sctlr_el1(current->thread.sctlr_user); }
Accessing GCR_EL1 and issuing an ISB can be expensive on some microarchitectures. To avoid taking this performance hit on every kernel entry/exit, switch GCR_EL1 on task switch rather than entry/exit. This is essentially a revert of commit bad1e1c663e0 ("arm64: mte: switch GCR_EL1 in kernel entry and exit"). This requires changing how we generate random tags for HW tag-based KASAN, since at this point IRG would use the user's exclusion mask, which may not be suitable for kernel use. In this patch I chose to take the modulus of CNTVCT_EL0, however alternative approaches are possible. Signed-off-by: Peter Collingbourne <pcc@google.com> Link: https://linux-review.googlesource.com/id/I560a190a74176ca4cc5191dad08f77f6b1577c75 --- v2: - rebase onto v9 of the tag checking mode preference series arch/arm64/include/asm/mte-kasan.h | 15 ++++++--- arch/arm64/include/asm/mte.h | 2 -- arch/arm64/kernel/entry.S | 41 ----------------------- arch/arm64/kernel/mte.c | 54 +++++++++++------------------- 4 files changed, 29 insertions(+), 83 deletions(-)