diff mbox series

[v4] arm64: mte: allow async MTE to be upgraded to sync on a per-CPU basis

Message ID 20210615015728.3232519-1-pcc@google.com (mailing list archive)
State New, archived
Headers show
Series [v4] arm64: mte: allow async MTE to be upgraded to sync on a per-CPU basis | expand

Commit Message

Peter Collingbourne June 15, 2021, 1:57 a.m. UTC
On some CPUs the performance of MTE in synchronous mode is similar
to that of asynchronous mode. This makes it worthwhile to enable
synchronous mode on those CPUs when asynchronous mode is requested,
in order to gain the error detection benefits of synchronous mode
without the performance downsides. Therefore, make it possible for
user programs to opt into upgrading to synchronous mode on those CPUs
via a new prctl flag. The flag is orthogonal to the existing TCF modes
in order to accommodate upgrading from other TCF modes in the future.

The feature is controlled on a per-CPU basis via sysfs.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Link: https://linux-review.googlesource.com/id/Id6f95b71fde6e701dd30b5e108126af7286147e8
---
v4:
- switch to new mte_ctrl field
- make register_mte_upgrade_async_sysctl return an int
- change the sysctl to take 0 or 1 instead of raw TCF values
- "same as" -> "similar to"

v3:
- drop the device tree support
- add documentation
- add static_assert to ensure no overlap with real HW bits
- move per-CPU variable initialization to mte.c
- use smp_call_function_single instead of stop_machine

v2:
- make it an opt-in behavior
- change the format of the device tree node
- also allow controlling the feature via sysfs

 .../arm64/memory-tagging-extension.rst        |  20 +++
 arch/arm64/include/asm/mte.h                  |   4 +
 arch/arm64/include/asm/processor.h            |  14 +-
 arch/arm64/kernel/asm-offsets.c               |   2 +-
 arch/arm64/kernel/entry.S                     |   4 +-
 arch/arm64/kernel/mte.c                       | 153 ++++++++++++++----
 arch/arm64/kernel/process.c                   |   2 +-
 include/uapi/linux/prctl.h                    |   2 +
 8 files changed, 164 insertions(+), 37 deletions(-)

Comments

Catalin Marinas June 15, 2021, 6:02 p.m. UTC | #1
On Mon, Jun 14, 2021 at 06:57:28PM -0700, Peter Collingbourne wrote:
> @@ -120,6 +120,25 @@ in the ``PR_MTE_TAG_MASK`` bit-field.
>  interface provides an include mask. An include mask of ``0`` (exclusion
>  mask ``0xffff``) results in the CPU always generating tag ``0``.
>  
> +Upgrading to stricter tag checking modes
> +----------------------------------------
> +
> +On some CPUs the performance of MTE in stricter tag checking modes
> +is similar to that of less strict tag checking modes. This makes it
> +worthwhile to enable stricter checks on those CPUs when a less strict
> +checking mode is requested, in order to gain the error detection
> +benefits of the stricter checks without the performance downsides. To
> +opt into upgrading to a stricter checking mode on those CPUs, the user
> +can set the ``PR_MTE_DYNAMIC_TCF`` flag bit in the ``flags`` argument
> +to the ``prctl(PR_SET_TAGGED_ADDR_CTRL, flags, 0, 0, 0)`` system call.
> +
> +This feature is currently only supported for upgrading from
> +asynchronous mode. To configure a CPU to upgrade from asynchronous mode
> +to synchronous mode, a privileged user may write the value ``1`` to
> +``/sys/devices/system/cpu/cpu<N>/mte_upgrade_async``, and to disable
> +upgrading they may write the value ``2``. By default the feature is
> +disabled on all CPUs.

This needs updated as well to for 0 as disabled.

I wonder whether we could generalise this to something like
mte_tcf_upgrade and allow asymmetric to be expanded to sync. Otherwise
we'd have to add another interface when we know that if a CPU can handle
sync as fast as async, the asymmetric mode should also be upgraded. So a
more generic mte_tcf_upgrade just holds the strictest that the CPU can
handle without significant performance degradation.

The mte_upgrade_async can be confusing as well for the asymmetric mode
where the writes are asynchronous.

> diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
> index 125a10e413e9..ad85e8519669 100644
> --- a/arch/arm64/kernel/mte.c
> +++ b/arch/arm64/kernel/mte.c
> @@ -4,6 +4,7 @@
>   */
>  
>  #include <linux/bitops.h>
> +#include <linux/cpu.h>
>  #include <linux/kernel.h>
>  #include <linux/mm.h>
>  #include <linux/prctl.h>
> @@ -26,6 +27,9 @@ u64 gcr_kernel_excl __ro_after_init;
>  
>  static bool report_fault_once = true;
>  
> +DEFINE_PER_CPU_READ_MOSTLY(u64, mte_upgrade_async);
> +EXPORT_PER_CPU_SYMBOL(mte_upgrade_async);

I think this should be static and not exported, unless I missed its use
elsewhere.

> @@ -216,15 +210,33 @@ void mte_thread_init_user(void)
>  	dsb(ish);
>  	write_sysreg_s(0, SYS_TFSRE0_EL1);
>  	clear_thread_flag(TIF_MTE_ASYNC_FAULT);
> -	/* disable tag checking */
> -	set_task_sctlr_el1((current->thread.sctlr_user & ~SCTLR_EL1_TCF0_MASK) |
> -			   SCTLR_EL1_TCF0_NONE);
> -	/* reset tag generation mask */
> -	set_gcr_el1_excl(SYS_GCR_EL1_EXCL_MASK);
> +	/* disable tag checking and reset tag generation mask */
> +	current->thread.mte_ctrl =
> +		MTE_CTRL_GCR_USER_EXCL_MASK | MTE_CTRL_TCF_NONE;
> +	mte_update_sctlr_user(current);
> +	set_task_sctlr_el1(current->thread.sctlr_user);
> +}
> +
> +void mte_update_sctlr_user(struct task_struct *task)
> +{
> +	unsigned long sctlr = task->thread.sctlr_user;
> +
> +	sctlr &= ~SCTLR_EL1_TCF0_MASK;
> +	if ((task->thread.mte_ctrl & MTE_CTRL_DYNAMIC_TCF) &&
> +	    (task->thread.mte_ctrl & MTE_CTRL_TCF_MASK) == MTE_CTRL_TCF_ASYNC) {
> +		sctlr |= __this_cpu_read(mte_upgrade_async);

If we consider 0 to mean "disable upgrade", you'd just need another
check here before the write. But it may simplify some of the sysfs code
to avoid the switch statement and the pre-initialisation of
mte_upgrade_async.

> +	} else {
> +		sctlr |= ((task->thread.mte_ctrl & MTE_CTRL_TCF_MASK) >>
> +			  MTE_CTRL_TCF_SHIFT)
> +			 << SCTLR_EL1_TCF0_SHIFT;

Nitpick: we tend to place the operator on the previous line you can
probably place the shift constant on that line as well.

> +	}
> +	task->thread.sctlr_user = sctlr;

I think on the "else" path, we shouldn't bother updating sctlr_user,
though it probably needs some tweaking of the prctl() path.

Otherwise the patch looks in the right direction.
Peter Collingbourne June 15, 2021, 8:06 p.m. UTC | #2
On Tue, Jun 15, 2021 at 11:02 AM Catalin Marinas
<catalin.marinas@arm.com> wrote:
>
> On Mon, Jun 14, 2021 at 06:57:28PM -0700, Peter Collingbourne wrote:
> > @@ -120,6 +120,25 @@ in the ``PR_MTE_TAG_MASK`` bit-field.
> >  interface provides an include mask. An include mask of ``0`` (exclusion
> >  mask ``0xffff``) results in the CPU always generating tag ``0``.
> >
> > +Upgrading to stricter tag checking modes
> > +----------------------------------------
> > +
> > +On some CPUs the performance of MTE in stricter tag checking modes
> > +is similar to that of less strict tag checking modes. This makes it
> > +worthwhile to enable stricter checks on those CPUs when a less strict
> > +checking mode is requested, in order to gain the error detection
> > +benefits of the stricter checks without the performance downsides. To
> > +opt into upgrading to a stricter checking mode on those CPUs, the user
> > +can set the ``PR_MTE_DYNAMIC_TCF`` flag bit in the ``flags`` argument
> > +to the ``prctl(PR_SET_TAGGED_ADDR_CTRL, flags, 0, 0, 0)`` system call.
> > +
> > +This feature is currently only supported for upgrading from
> > +asynchronous mode. To configure a CPU to upgrade from asynchronous mode
> > +to synchronous mode, a privileged user may write the value ``1`` to
> > +``/sys/devices/system/cpu/cpu<N>/mte_upgrade_async``, and to disable
> > +upgrading they may write the value ``2``. By default the feature is
> > +disabled on all CPUs.
>
> This needs updated as well to for 0 as disabled.

Will fix.

> I wonder whether we could generalise this to something like
> mte_tcf_upgrade and allow asymmetric to be expanded to sync. Otherwise
> we'd have to add another interface when we know that if a CPU can handle
> sync as fast as async, the asymmetric mode should also be upgraded. So a
> more generic mte_tcf_upgrade just holds the strictest that the CPU can
> handle without significant performance degradation.

Maybe. But that wouldn't accommodate all scenarios. E.g. what if asym
should be upgraded to sync but async should not be upgraded? Or if we
wanted to allow upgrading from none? If we wanted to expand the
interface you've described to one where we allow these upgrade
patterns, I think things could get confusing. The way that things work
now we should be able to extend to allow these to be configured
uniformly.

Since the system designer knows which modes are supported on a
particular system, it doesn't seem like it would be a big concern to
require all of them to be configured.

> The mte_upgrade_async can be confusing as well for the asymmetric mode
> where the writes are asynchronous.

It seems equivalent to e.g. the TCF constants being named async, asym, sync.

> > diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
> > index 125a10e413e9..ad85e8519669 100644
> > --- a/arch/arm64/kernel/mte.c
> > +++ b/arch/arm64/kernel/mte.c
> > @@ -4,6 +4,7 @@
> >   */
> >
> >  #include <linux/bitops.h>
> > +#include <linux/cpu.h>
> >  #include <linux/kernel.h>
> >  #include <linux/mm.h>
> >  #include <linux/prctl.h>
> > @@ -26,6 +27,9 @@ u64 gcr_kernel_excl __ro_after_init;
> >
> >  static bool report_fault_once = true;
> >
> > +DEFINE_PER_CPU_READ_MOSTLY(u64, mte_upgrade_async);
> > +EXPORT_PER_CPU_SYMBOL(mte_upgrade_async);
>
> I think this should be static and not exported, unless I missed its use
> elsewhere.

Will fix.

> > @@ -216,15 +210,33 @@ void mte_thread_init_user(void)
> >       dsb(ish);
> >       write_sysreg_s(0, SYS_TFSRE0_EL1);
> >       clear_thread_flag(TIF_MTE_ASYNC_FAULT);
> > -     /* disable tag checking */
> > -     set_task_sctlr_el1((current->thread.sctlr_user & ~SCTLR_EL1_TCF0_MASK) |
> > -                        SCTLR_EL1_TCF0_NONE);
> > -     /* reset tag generation mask */
> > -     set_gcr_el1_excl(SYS_GCR_EL1_EXCL_MASK);
> > +     /* disable tag checking and reset tag generation mask */
> > +     current->thread.mte_ctrl =
> > +             MTE_CTRL_GCR_USER_EXCL_MASK | MTE_CTRL_TCF_NONE;
> > +     mte_update_sctlr_user(current);
> > +     set_task_sctlr_el1(current->thread.sctlr_user);
> > +}
> > +
> > +void mte_update_sctlr_user(struct task_struct *task)
> > +{
> > +     unsigned long sctlr = task->thread.sctlr_user;
> > +
> > +     sctlr &= ~SCTLR_EL1_TCF0_MASK;
> > +     if ((task->thread.mte_ctrl & MTE_CTRL_DYNAMIC_TCF) &&
> > +         (task->thread.mte_ctrl & MTE_CTRL_TCF_MASK) == MTE_CTRL_TCF_ASYNC) {
> > +             sctlr |= __this_cpu_read(mte_upgrade_async);
>
> If we consider 0 to mean "disable upgrade", you'd just need another
> check here before the write. But it may simplify some of the sysfs code
> to avoid the switch statement and the pre-initialisation of
> mte_upgrade_async.

The way that I've written it will accommodate upgrading to asym more
easily. With that, either way we will need a switch statement
somewhere and it seems best to have it on the sysfs side since that
code will be invoked less often.

> > +     } else {
> > +             sctlr |= ((task->thread.mte_ctrl & MTE_CTRL_TCF_MASK) >>
> > +                       MTE_CTRL_TCF_SHIFT)
> > +                      << SCTLR_EL1_TCF0_SHIFT;
>
> Nitpick: we tend to place the operator on the previous line you can
> probably place the shift constant on that line as well.

Will fix.

> > +     }
> > +     task->thread.sctlr_user = sctlr;
>
> I think on the "else" path, we shouldn't bother updating sctlr_user,
> though it probably needs some tweaking of the prctl() path.

I thought about that but it didn't seem like it would justify the
added complexity on the prctl path.

Peter
diff mbox series

Patch

diff --git a/Documentation/arm64/memory-tagging-extension.rst b/Documentation/arm64/memory-tagging-extension.rst
index b540178a93f8..bd1b357e95c0 100644
--- a/Documentation/arm64/memory-tagging-extension.rst
+++ b/Documentation/arm64/memory-tagging-extension.rst
@@ -120,6 +120,25 @@  in the ``PR_MTE_TAG_MASK`` bit-field.
 interface provides an include mask. An include mask of ``0`` (exclusion
 mask ``0xffff``) results in the CPU always generating tag ``0``.
 
+Upgrading to stricter tag checking modes
+----------------------------------------
+
+On some CPUs the performance of MTE in stricter tag checking modes
+is similar to that of less strict tag checking modes. This makes it
+worthwhile to enable stricter checks on those CPUs when a less strict
+checking mode is requested, in order to gain the error detection
+benefits of the stricter checks without the performance downsides. To
+opt into upgrading to a stricter checking mode on those CPUs, the user
+can set the ``PR_MTE_DYNAMIC_TCF`` flag bit in the ``flags`` argument
+to the ``prctl(PR_SET_TAGGED_ADDR_CTRL, flags, 0, 0, 0)`` system call.
+
+This feature is currently only supported for upgrading from
+asynchronous mode. To configure a CPU to upgrade from asynchronous mode
+to synchronous mode, a privileged user may write the value ``1`` to
+``/sys/devices/system/cpu/cpu<N>/mte_upgrade_async``, and to disable
+upgrading they may write the value ``2``. By default the feature is
+disabled on all CPUs.
+
 Initial process state
 ---------------------
 
@@ -128,6 +147,7 @@  On ``execve()``, the new process has the following configuration:
 - ``PR_TAGGED_ADDR_ENABLE`` set to 0 (disabled)
 - Tag checking mode set to ``PR_MTE_TCF_NONE``
 - ``PR_MTE_TAG_MASK`` set to 0 (all tags excluded)
+- ``PR_MTE_DYNAMIC_TCF`` set to 0 (disabled)
 - ``PSTATE.TCO`` set to 0
 - ``PROT_MTE`` not set on any of the initial memory maps
 
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index bc88a1ced0d7..719687412798 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -40,6 +40,7 @@  void mte_free_tag_storage(char *storage);
 void mte_sync_tags(pte_t *ptep, pte_t pte);
 void mte_copy_page_tags(void *kto, const void *kfrom);
 void mte_thread_init_user(void);
+void mte_update_sctlr_user(struct task_struct *task);
 void mte_thread_switch(struct task_struct *next);
 void mte_suspend_enter(void);
 void mte_suspend_exit(void);
@@ -62,6 +63,9 @@  static inline void mte_copy_page_tags(void *kto, const void *kfrom)
 static inline void mte_thread_init_user(void)
 {
 }
+static inline void mte_update_sctlr_user(struct task_struct *task)
+{
+}
 static inline void mte_thread_switch(struct task_struct *next)
 {
 }
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 9df3feeee890..f8607c3a5706 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -16,6 +16,18 @@ 
  */
 #define NET_IP_ALIGN	0
 
+#define MTE_CTRL_GCR_USER_EXCL_SHIFT	0
+#define MTE_CTRL_GCR_USER_EXCL_MASK	0xffff
+
+#define MTE_CTRL_TCF_SHIFT		16
+#define MTE_CTRL_TCF_NONE		(0UL << MTE_CTRL_TCF_SHIFT)
+#define MTE_CTRL_TCF_SYNC		(1UL << MTE_CTRL_TCF_SHIFT)
+#define MTE_CTRL_TCF_ASYNC		(2UL << MTE_CTRL_TCF_SHIFT)
+#define MTE_CTRL_TCF_MASK		(3UL << MTE_CTRL_TCF_SHIFT)
+
+#define MTE_CTRL_DYNAMIC_TCF_SHIFT	18
+#define MTE_CTRL_DYNAMIC_TCF		(1UL << MTE_CTRL_DYNAMIC_TCF_SHIFT)
+
 #ifndef __ASSEMBLY__
 
 #include <linux/build_bug.h>
@@ -151,7 +163,7 @@  struct thread_struct {
 	struct ptrauth_keys_kernel	keys_kernel;
 #endif
 #ifdef CONFIG_ARM64_MTE
-	u64			gcr_user_excl;
+	u64			mte_ctrl;
 #endif
 	u64			sctlr_user;
 };
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 0cb34ccb6e73..63d02cd67b44 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -49,7 +49,7 @@  int main(void)
   DEFINE(THREAD_KEYS_KERNEL,	offsetof(struct task_struct, thread.keys_kernel));
 #endif
 #ifdef CONFIG_ARM64_MTE
-  DEFINE(THREAD_GCR_EL1_USER,	offsetof(struct task_struct, thread.gcr_user_excl));
+  DEFINE(THREAD_MTE_CTRL,	offsetof(struct task_struct, thread.mte_ctrl));
 #endif
   BLANK();
   DEFINE(S_X0,			offsetof(struct pt_regs, regs[0]));
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 3513984a88bd..ce59280355c5 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -182,7 +182,7 @@  alternative_else_nop_endif
 	 * the RRND (bit[16]) setting.
 	 */
 	mrs_s	\tmp2, SYS_GCR_EL1
-	bfi	\tmp2, \tmp, #0, #16
+	bfxil	\tmp2, \tmp, #MTE_CTRL_GCR_USER_EXCL_SHIFT, #16
 	msr_s	SYS_GCR_EL1, \tmp2
 #endif
 	.endm
@@ -205,7 +205,7 @@  alternative_else_nop_endif
 alternative_if_not ARM64_MTE
 	b	1f
 alternative_else_nop_endif
-	ldr	\tmp, [\tsk, #THREAD_GCR_EL1_USER]
+	ldr	\tmp, [\tsk, #THREAD_MTE_CTRL]
 
 	mte_set_gcr \tmp, \tmp2
 1:
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 125a10e413e9..ad85e8519669 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -4,6 +4,7 @@ 
  */
 
 #include <linux/bitops.h>
+#include <linux/cpu.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/prctl.h>
@@ -26,6 +27,9 @@  u64 gcr_kernel_excl __ro_after_init;
 
 static bool report_fault_once = true;
 
+DEFINE_PER_CPU_READ_MOSTLY(u64, mte_upgrade_async);
+EXPORT_PER_CPU_SYMBOL(mte_upgrade_async);
+
 #ifdef CONFIG_KASAN_HW_TAGS
 /* Whether the MTE asynchronous mode is enabled. */
 DEFINE_STATIC_KEY_FALSE(mte_async_mode);
@@ -197,16 +201,6 @@  static void update_gcr_el1_excl(u64 excl)
 	sysreg_clear_set_s(SYS_GCR_EL1, SYS_GCR_EL1_EXCL_MASK, excl);
 }
 
-static void set_gcr_el1_excl(u64 excl)
-{
-	current->thread.gcr_user_excl = excl;
-
-	/*
-	 * SYS_GCR_EL1 will be set to current->thread.gcr_user_excl value
-	 * by mte_set_user_gcr() in kernel_exit,
-	 */
-}
-
 void mte_thread_init_user(void)
 {
 	if (!system_supports_mte())
@@ -216,15 +210,33 @@  void mte_thread_init_user(void)
 	dsb(ish);
 	write_sysreg_s(0, SYS_TFSRE0_EL1);
 	clear_thread_flag(TIF_MTE_ASYNC_FAULT);
-	/* disable tag checking */
-	set_task_sctlr_el1((current->thread.sctlr_user & ~SCTLR_EL1_TCF0_MASK) |
-			   SCTLR_EL1_TCF0_NONE);
-	/* reset tag generation mask */
-	set_gcr_el1_excl(SYS_GCR_EL1_EXCL_MASK);
+	/* disable tag checking and reset tag generation mask */
+	current->thread.mte_ctrl =
+		MTE_CTRL_GCR_USER_EXCL_MASK | MTE_CTRL_TCF_NONE;
+	mte_update_sctlr_user(current);
+	set_task_sctlr_el1(current->thread.sctlr_user);
+}
+
+void mte_update_sctlr_user(struct task_struct *task)
+{
+	unsigned long sctlr = task->thread.sctlr_user;
+
+	sctlr &= ~SCTLR_EL1_TCF0_MASK;
+	if ((task->thread.mte_ctrl & MTE_CTRL_DYNAMIC_TCF) &&
+	    (task->thread.mte_ctrl & MTE_CTRL_TCF_MASK) == MTE_CTRL_TCF_ASYNC) {
+		sctlr |= __this_cpu_read(mte_upgrade_async);
+	} else {
+		sctlr |= ((task->thread.mte_ctrl & MTE_CTRL_TCF_MASK) >>
+			  MTE_CTRL_TCF_SHIFT)
+			 << SCTLR_EL1_TCF0_SHIFT;
+	}
+	task->thread.sctlr_user = sctlr;
 }
 
 void mte_thread_switch(struct task_struct *next)
 {
+	mte_update_sctlr_user(next);
+
 	/*
 	 * Check if an async tag exception occurred at EL1.
 	 *
@@ -262,33 +274,34 @@  void mte_suspend_exit(void)
 
 long set_mte_ctrl(struct task_struct *task, unsigned long arg)
 {
-	u64 sctlr = task->thread.sctlr_user & ~SCTLR_EL1_TCF0_MASK;
-	u64 gcr_excl = ~((arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT) &
-		       SYS_GCR_EL1_EXCL_MASK;
+	u64 mte_ctrl = (~((arg & PR_MTE_TAG_MASK) >> PR_MTE_TAG_SHIFT) &
+			SYS_GCR_EL1_EXCL_MASK)
+		       << MTE_CTRL_GCR_USER_EXCL_SHIFT;
 
 	if (!system_supports_mte())
 		return 0;
 
 	switch (arg & PR_MTE_TCF_MASK) {
 	case PR_MTE_TCF_NONE:
-		sctlr |= SCTLR_EL1_TCF0_NONE;
+		mte_ctrl |= MTE_CTRL_TCF_NONE;
 		break;
 	case PR_MTE_TCF_SYNC:
-		sctlr |= SCTLR_EL1_TCF0_SYNC;
+		mte_ctrl |= MTE_CTRL_TCF_SYNC;
 		break;
 	case PR_MTE_TCF_ASYNC:
-		sctlr |= SCTLR_EL1_TCF0_ASYNC;
+		mte_ctrl |= MTE_CTRL_TCF_ASYNC;
 		break;
 	default:
 		return -EINVAL;
 	}
 
-	if (task != current) {
-		task->thread.sctlr_user = sctlr;
-		task->thread.gcr_user_excl = gcr_excl;
-	} else {
-		set_task_sctlr_el1(sctlr);
-		set_gcr_el1_excl(gcr_excl);
+	if (arg & PR_MTE_DYNAMIC_TCF)
+		mte_ctrl |= MTE_CTRL_DYNAMIC_TCF;
+
+	task->thread.mte_ctrl = mte_ctrl;
+	if (task == current) {
+		mte_update_sctlr_user(task);
+		set_task_sctlr_el1(task->thread.sctlr_user);
 	}
 
 	return 0;
@@ -297,25 +310,29 @@  long set_mte_ctrl(struct task_struct *task, unsigned long arg)
 long get_mte_ctrl(struct task_struct *task)
 {
 	unsigned long ret;
-	u64 incl = ~task->thread.gcr_user_excl & SYS_GCR_EL1_EXCL_MASK;
+	u64 incl = (~task->thread.mte_ctrl >> MTE_CTRL_GCR_USER_EXCL_SHIFT) &
+		   SYS_GCR_EL1_EXCL_MASK;
 
 	if (!system_supports_mte())
 		return 0;
 
 	ret = incl << PR_MTE_TAG_SHIFT;
 
-	switch (task->thread.sctlr_user & SCTLR_EL1_TCF0_MASK) {
-	case SCTLR_EL1_TCF0_NONE:
+	switch (task->thread.mte_ctrl & MTE_CTRL_TCF_MASK) {
+	case MTE_CTRL_TCF_NONE:
 		ret |= PR_MTE_TCF_NONE;
 		break;
-	case SCTLR_EL1_TCF0_SYNC:
+	case MTE_CTRL_TCF_SYNC:
 		ret |= PR_MTE_TCF_SYNC;
 		break;
-	case SCTLR_EL1_TCF0_ASYNC:
+	case MTE_CTRL_TCF_ASYNC:
 		ret |= PR_MTE_TCF_ASYNC;
 		break;
 	}
 
+	if (task->thread.mte_ctrl & MTE_CTRL_DYNAMIC_TCF)
+		ret |= PR_MTE_DYNAMIC_TCF;
+
 	return ret;
 }
 
@@ -453,3 +470,75 @@  int mte_ptrace_copy_tags(struct task_struct *child, long request,
 
 	return ret;
 }
+
+static ssize_t mte_upgrade_async_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	switch (per_cpu(mte_upgrade_async, dev->id)) {
+	case SCTLR_EL1_TCF0_ASYNC:
+		return sysfs_emit(buf, "0\n");
+	case SCTLR_EL1_TCF0_SYNC:
+		return sysfs_emit(buf, "1\n");
+	default:
+		return sysfs_emit(buf, "???\n");
+	}
+}
+
+static void sync_sctlr(void *arg)
+{
+	mte_update_sctlr_user(current);
+	set_task_sctlr_el1(current->thread.sctlr_user);
+}
+
+static ssize_t mte_upgrade_async_store(struct device *dev,
+				       struct device_attribute *attr,
+				       const char *buf, size_t count)
+{
+	ssize_t ret;
+	u32 val;
+	u64 tcf;
+
+	ret = kstrtou32(buf, 0, &val);
+	if (ret < 0)
+		return ret;
+
+	switch (val) {
+	case 0:
+		tcf = SCTLR_EL1_TCF0_ASYNC;
+		break;
+	case 1:
+		tcf = SCTLR_EL1_TCF0_SYNC;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	device_lock(dev);
+	per_cpu(mte_upgrade_async, dev->id) = tcf;
+
+	if (cpu_online(dev->id))
+		ret = smp_call_function_single(dev->id, sync_sctlr, NULL, 0);
+	if (ret == 0)
+		ret = count;
+	device_unlock(dev);
+
+	return ret;
+}
+static DEVICE_ATTR_RW(mte_upgrade_async);
+
+static int register_mte_upgrade_async_sysctl(void)
+{
+	unsigned int cpu;
+
+	if (!system_supports_mte())
+		return 0;
+
+	for_each_possible_cpu(cpu) {
+		per_cpu(mte_upgrade_async, cpu) = SCTLR_EL1_TCF0_ASYNC;
+		device_create_file(get_cpu_device(cpu),
+				   &dev_attr_mte_upgrade_async);
+	}
+
+	return 0;
+}
+subsys_initcall(register_mte_upgrade_async_sysctl);
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index b4bb67f17a2c..09bd9c378678 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -659,7 +659,7 @@  long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg)
 		return -EINVAL;
 
 	if (system_supports_mte())
-		valid_mask |= PR_MTE_TCF_MASK | PR_MTE_TAG_MASK;
+		valid_mask |= PR_MTE_TCF_MASK | PR_MTE_TAG_MASK | PR_MTE_DYNAMIC_TCF;
 
 	if (arg & ~valid_mask)
 		return -EINVAL;
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 18a9f59dc067..4dab44732814 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -242,6 +242,8 @@  struct prctl_mm_map {
 /* MTE tag inclusion mask */
 # define PR_MTE_TAG_SHIFT		3
 # define PR_MTE_TAG_MASK		(0xffffUL << PR_MTE_TAG_SHIFT)
+/* Enable dynamic upgrading of MTE tag check fault mode */
+# define PR_MTE_DYNAMIC_TCF		(1UL << 19)
 
 /* Control reclaim behavior when allocating memory */
 #define PR_SET_IO_FLUSHER		57