diff mbox series

[v9,3/4] arm64: mte: introduce a per-CPU tag checking mode preference

Message ID 20210702194110.2045282-4-pcc@google.com (mailing list archive)
State New, archived
Headers show
Series arm64: mte: allow async MTE to be upgraded to sync on a per-CPU basis | expand

Commit Message

Peter Collingbourne July 2, 2021, 7:41 p.m. UTC
Add a per-CPU sysfs node, mte_tcf_preferred, that allows the preferred
tag checking mode to be configured. The current possible values are
async and sync.

Link: https://linux-review.googlesource.com/id/I7493dcd533a2785a1437b16c3f6b50919f840854
Signed-off-by: Peter Collingbourne <pcc@google.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/kernel/mte.c | 77 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 75 insertions(+), 2 deletions(-)

Comments

Will Deacon July 7, 2021, 11:15 a.m. UTC | #1
On Fri, Jul 02, 2021 at 12:41:09PM -0700, Peter Collingbourne wrote:
> Add a per-CPU sysfs node, mte_tcf_preferred, that allows the preferred
> tag checking mode to be configured. The current possible values are
> async and sync.
> 
> Link: https://linux-review.googlesource.com/id/I7493dcd533a2785a1437b16c3f6b50919f840854
> Signed-off-by: Peter Collingbourne <pcc@google.com>
> Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
> ---
>  arch/arm64/kernel/mte.c | 77 +++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 75 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
> index 53d89915029d..48f218e070cc 100644
> --- a/arch/arm64/kernel/mte.c
> +++ b/arch/arm64/kernel/mte.c
> @@ -4,6 +4,7 @@
>   */
>  
>  #include <linux/bitops.h>
> +#include <linux/cpu.h>
>  #include <linux/kernel.h>
>  #include <linux/mm.h>
>  #include <linux/prctl.h>
> @@ -26,6 +27,8 @@ u64 gcr_kernel_excl __ro_after_init;
>  
>  static bool report_fault_once = true;
>  
> +static DEFINE_PER_CPU_READ_MOSTLY(u64, mte_tcf_preferred);
> +
>  #ifdef CONFIG_KASAN_HW_TAGS
>  /* Whether the MTE asynchronous mode is enabled. */
>  DEFINE_STATIC_KEY_FALSE(mte_async_mode);
> @@ -199,17 +202,24 @@ static void update_gcr_el1_excl(u64 excl)
>  
>  static void mte_update_sctlr_user(struct task_struct *task)
>  {
> +	/*
> +	 * This can only be called on the current or next task since the CPU
> +	 * must match where the thread is going to run.
> +	 */
>  	unsigned long sctlr = task->thread.sctlr_user;
> -	unsigned long pref = MTE_CTRL_TCF_ASYNC;
>  	unsigned long mte_ctrl = task->thread.mte_ctrl;
> -	unsigned long resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl;
> +	unsigned long pref, resolved_mte_tcf;
>  
> +	preempt_disable();
> +	pref = __this_cpu_read(mte_tcf_preferred);
> +	resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl;
>  	sctlr &= ~SCTLR_EL1_TCF0_MASK;
>  	if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC)
>  		sctlr |= SCTLR_EL1_TCF0_ASYNC;
>  	else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC)
>  		sctlr |= SCTLR_EL1_TCF0_SYNC;
>  	task->thread.sctlr_user = sctlr;
> +	preempt_enable();
>  }
>  
>  void mte_thread_init_user(void)
> @@ -441,3 +451,66 @@ int mte_ptrace_copy_tags(struct task_struct *child, long request,
>  
>  	return ret;
>  }
> +
> +static ssize_t mte_tcf_preferred_show(struct device *dev,
> +				      struct device_attribute *attr, char *buf)
> +{
> +	switch (per_cpu(mte_tcf_preferred, dev->id)) {
> +	case MTE_CTRL_TCF_ASYNC:
> +		return sysfs_emit(buf, "async\n");
> +	case MTE_CTRL_TCF_SYNC:
> +		return sysfs_emit(buf, "sync\n");
> +	default:
> +		return sysfs_emit(buf, "???\n");
> +	}
> +}
> +
> +static void sync_sctlr(void *arg)
> +{
> +	mte_update_sctlr_user(current);
> +	set_task_sctlr_el1(current->thread.sctlr_user);
> +}
> +
> +static ssize_t mte_tcf_preferred_store(struct device *dev,
> +				       struct device_attribute *attr,
> +				       const char *buf, size_t count)
> +{
> +	ssize_t ret = 0;
> +	u64 tcf;
> +
> +	if (sysfs_streq(buf, "async"))
> +		tcf = MTE_CTRL_TCF_ASYNC;
> +	else if (sysfs_streq(buf, "sync"))
> +		tcf = MTE_CTRL_TCF_SYNC;
> +	else
> +		return -EINVAL;
> +
> +	device_lock(dev);
> +	per_cpu(mte_tcf_preferred, dev->id) = tcf;
> +
> +	if (cpu_online(dev->id))
> +		ret = smp_call_function_single(dev->id, sync_sctlr, NULL, 0);

Hmm, so this call could land right in the middle of a concurrent prctl().
What happens in that case?

Will
Peter Collingbourne July 12, 2021, 6:59 p.m. UTC | #2
On Wed, Jul 7, 2021 at 4:15 AM Will Deacon <will@kernel.org> wrote:
>
> On Fri, Jul 02, 2021 at 12:41:09PM -0700, Peter Collingbourne wrote:
> > Add a per-CPU sysfs node, mte_tcf_preferred, that allows the preferred
> > tag checking mode to be configured. The current possible values are
> > async and sync.
> >
> > Link: https://linux-review.googlesource.com/id/I7493dcd533a2785a1437b16c3f6b50919f840854
> > Signed-off-by: Peter Collingbourne <pcc@google.com>
> > Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
> > ---
> >  arch/arm64/kernel/mte.c | 77 +++++++++++++++++++++++++++++++++++++++--
> >  1 file changed, 75 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
> > index 53d89915029d..48f218e070cc 100644
> > --- a/arch/arm64/kernel/mte.c
> > +++ b/arch/arm64/kernel/mte.c
> > @@ -4,6 +4,7 @@
> >   */
> >
> >  #include <linux/bitops.h>
> > +#include <linux/cpu.h>
> >  #include <linux/kernel.h>
> >  #include <linux/mm.h>
> >  #include <linux/prctl.h>
> > @@ -26,6 +27,8 @@ u64 gcr_kernel_excl __ro_after_init;
> >
> >  static bool report_fault_once = true;
> >
> > +static DEFINE_PER_CPU_READ_MOSTLY(u64, mte_tcf_preferred);
> > +
> >  #ifdef CONFIG_KASAN_HW_TAGS
> >  /* Whether the MTE asynchronous mode is enabled. */
> >  DEFINE_STATIC_KEY_FALSE(mte_async_mode);
> > @@ -199,17 +202,24 @@ static void update_gcr_el1_excl(u64 excl)
> >
> >  static void mte_update_sctlr_user(struct task_struct *task)
> >  {
> > +     /*
> > +      * This can only be called on the current or next task since the CPU
> > +      * must match where the thread is going to run.
> > +      */
> >       unsigned long sctlr = task->thread.sctlr_user;
> > -     unsigned long pref = MTE_CTRL_TCF_ASYNC;
> >       unsigned long mte_ctrl = task->thread.mte_ctrl;
> > -     unsigned long resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl;
> > +     unsigned long pref, resolved_mte_tcf;
> >
> > +     preempt_disable();
> > +     pref = __this_cpu_read(mte_tcf_preferred);
> > +     resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl;
> >       sctlr &= ~SCTLR_EL1_TCF0_MASK;
> >       if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC)
> >               sctlr |= SCTLR_EL1_TCF0_ASYNC;
> >       else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC)
> >               sctlr |= SCTLR_EL1_TCF0_SYNC;
> >       task->thread.sctlr_user = sctlr;
> > +     preempt_enable();
> >  }
> >
> >  void mte_thread_init_user(void)
> > @@ -441,3 +451,66 @@ int mte_ptrace_copy_tags(struct task_struct *child, long request,
> >
> >       return ret;
> >  }
> > +
> > +static ssize_t mte_tcf_preferred_show(struct device *dev,
> > +                                   struct device_attribute *attr, char *buf)
> > +{
> > +     switch (per_cpu(mte_tcf_preferred, dev->id)) {
> > +     case MTE_CTRL_TCF_ASYNC:
> > +             return sysfs_emit(buf, "async\n");
> > +     case MTE_CTRL_TCF_SYNC:
> > +             return sysfs_emit(buf, "sync\n");
> > +     default:
> > +             return sysfs_emit(buf, "???\n");
> > +     }
> > +}
> > +
> > +static void sync_sctlr(void *arg)
> > +{
> > +     mte_update_sctlr_user(current);
> > +     set_task_sctlr_el1(current->thread.sctlr_user);
> > +}
> > +
> > +static ssize_t mte_tcf_preferred_store(struct device *dev,
> > +                                    struct device_attribute *attr,
> > +                                    const char *buf, size_t count)
> > +{
> > +     ssize_t ret = 0;
> > +     u64 tcf;
> > +
> > +     if (sysfs_streq(buf, "async"))
> > +             tcf = MTE_CTRL_TCF_ASYNC;
> > +     else if (sysfs_streq(buf, "sync"))
> > +             tcf = MTE_CTRL_TCF_SYNC;
> > +     else
> > +             return -EINVAL;
> > +
> > +     device_lock(dev);
> > +     per_cpu(mte_tcf_preferred, dev->id) = tcf;
> > +
> > +     if (cpu_online(dev->id))
> > +             ret = smp_call_function_single(dev->id, sync_sctlr, NULL, 0);
>
> Hmm, so this call could land right in the middle of a concurrent prctl().
> What happens in that case?

I don't think anything can go wrong. When the prctl sets mte_ctrl we
then need to call mte_update_sctlr_user followed by set_task_sctlr_el1
and it doesn't matter if it happens multiple times (either
mte_update_sctlr_user/set_task_sctlr_el1/mte_update_sctlr_user/set_task_sctlr_el1
or even the less likely
mte_update_sctlr_user/mte_update_sctlr_user/set_task_sctlr_el1/set_task_sctlr_el1).

Actually, I'm not sure we need any code in the sync_sctlr function at
all. Simply scheduling an empty function onto the CPU will cause
mte_update_sctlr_user and then update_sctlr_el1 to be called when the
task that was originally running on the CPU gets rescheduled onto it,
as a result of the change to mte_thread_switch.

Peter
Will Deacon July 13, 2021, 5:48 p.m. UTC | #3
On Mon, Jul 12, 2021 at 11:59:39AM -0700, Peter Collingbourne wrote:
> On Wed, Jul 7, 2021 at 4:15 AM Will Deacon <will@kernel.org> wrote:
> >
> > On Fri, Jul 02, 2021 at 12:41:09PM -0700, Peter Collingbourne wrote:
> > > Add a per-CPU sysfs node, mte_tcf_preferred, that allows the preferred
> > > tag checking mode to be configured. The current possible values are
> > > async and sync.
> > >
> > > Link: https://linux-review.googlesource.com/id/I7493dcd533a2785a1437b16c3f6b50919f840854
> > > Signed-off-by: Peter Collingbourne <pcc@google.com>
> > > Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
> > > ---
> > >  arch/arm64/kernel/mte.c | 77 +++++++++++++++++++++++++++++++++++++++--
> > >  1 file changed, 75 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
> > > index 53d89915029d..48f218e070cc 100644
> > > --- a/arch/arm64/kernel/mte.c
> > > +++ b/arch/arm64/kernel/mte.c
> > > @@ -4,6 +4,7 @@
> > >   */
> > >
> > >  #include <linux/bitops.h>
> > > +#include <linux/cpu.h>
> > >  #include <linux/kernel.h>
> > >  #include <linux/mm.h>
> > >  #include <linux/prctl.h>
> > > @@ -26,6 +27,8 @@ u64 gcr_kernel_excl __ro_after_init;
> > >
> > >  static bool report_fault_once = true;
> > >
> > > +static DEFINE_PER_CPU_READ_MOSTLY(u64, mte_tcf_preferred);
> > > +
> > >  #ifdef CONFIG_KASAN_HW_TAGS
> > >  /* Whether the MTE asynchronous mode is enabled. */
> > >  DEFINE_STATIC_KEY_FALSE(mte_async_mode);
> > > @@ -199,17 +202,24 @@ static void update_gcr_el1_excl(u64 excl)
> > >
> > >  static void mte_update_sctlr_user(struct task_struct *task)
> > >  {
> > > +     /*
> > > +      * This can only be called on the current or next task since the CPU
> > > +      * must match where the thread is going to run.
> > > +      */
> > >       unsigned long sctlr = task->thread.sctlr_user;
> > > -     unsigned long pref = MTE_CTRL_TCF_ASYNC;
> > >       unsigned long mte_ctrl = task->thread.mte_ctrl;
> > > -     unsigned long resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl;
> > > +     unsigned long pref, resolved_mte_tcf;
> > >
> > > +     preempt_disable();
> > > +     pref = __this_cpu_read(mte_tcf_preferred);
> > > +     resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl;
> > >       sctlr &= ~SCTLR_EL1_TCF0_MASK;
> > >       if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC)
> > >               sctlr |= SCTLR_EL1_TCF0_ASYNC;
> > >       else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC)
> > >               sctlr |= SCTLR_EL1_TCF0_SYNC;
> > >       task->thread.sctlr_user = sctlr;
> > > +     preempt_enable();
> > >  }
> > >
> > >  void mte_thread_init_user(void)
> > > @@ -441,3 +451,66 @@ int mte_ptrace_copy_tags(struct task_struct *child, long request,
> > >
> > >       return ret;
> > >  }
> > > +
> > > +static ssize_t mte_tcf_preferred_show(struct device *dev,
> > > +                                   struct device_attribute *attr, char *buf)
> > > +{
> > > +     switch (per_cpu(mte_tcf_preferred, dev->id)) {
> > > +     case MTE_CTRL_TCF_ASYNC:
> > > +             return sysfs_emit(buf, "async\n");
> > > +     case MTE_CTRL_TCF_SYNC:
> > > +             return sysfs_emit(buf, "sync\n");
> > > +     default:
> > > +             return sysfs_emit(buf, "???\n");
> > > +     }
> > > +}
> > > +
> > > +static void sync_sctlr(void *arg)
> > > +{
> > > +     mte_update_sctlr_user(current);
> > > +     set_task_sctlr_el1(current->thread.sctlr_user);
> > > +}
> > > +
> > > +static ssize_t mte_tcf_preferred_store(struct device *dev,
> > > +                                    struct device_attribute *attr,
> > > +                                    const char *buf, size_t count)
> > > +{
> > > +     ssize_t ret = 0;
> > > +     u64 tcf;
> > > +
> > > +     if (sysfs_streq(buf, "async"))
> > > +             tcf = MTE_CTRL_TCF_ASYNC;
> > > +     else if (sysfs_streq(buf, "sync"))
> > > +             tcf = MTE_CTRL_TCF_SYNC;
> > > +     else
> > > +             return -EINVAL;
> > > +
> > > +     device_lock(dev);
> > > +     per_cpu(mte_tcf_preferred, dev->id) = tcf;
> > > +
> > > +     if (cpu_online(dev->id))
> > > +             ret = smp_call_function_single(dev->id, sync_sctlr, NULL, 0);
> >
> > Hmm, so this call could land right in the middle of a concurrent prctl().
> > What happens in that case?
> 
> I don't think anything can go wrong. When the prctl sets mte_ctrl we
> then need to call mte_update_sctlr_user followed by set_task_sctlr_el1
> and it doesn't matter if it happens multiple times (either
> mte_update_sctlr_user/set_task_sctlr_el1/mte_update_sctlr_user/set_task_sctlr_el1
> or even the less likely
> mte_update_sctlr_user/mte_update_sctlr_user/set_task_sctlr_el1/set_task_sctlr_el1).

I think you're still (at least) relying on the compiler not to tear or cache
accesses to ->thread.sctlr_user, no? It seems like it would be a lot simpler
just to leave the change until the next context switch and not send the IPI
at all. I think that's a reasonable behaviour because the write to sysfs is
racy anyway, so we can just document this.

> Actually, I'm not sure we need any code in the sync_sctlr function at
> all. Simply scheduling an empty function onto the CPU will cause
> mte_update_sctlr_user and then update_sctlr_el1 to be called when the
> task that was originally running on the CPU gets rescheduled onto it,
> as a result of the change to mte_thread_switch.

I'm not sure I agree here -- I thought smp_call_function_single() ran the
target function directly from the IPI handler in interrupt context, without
the need for a context-switch.

Will
Peter Collingbourne July 13, 2021, 10:46 p.m. UTC | #4
On Tue, Jul 13, 2021 at 10:48 AM Will Deacon <will@kernel.org> wrote:
>
> On Mon, Jul 12, 2021 at 11:59:39AM -0700, Peter Collingbourne wrote:
> > On Wed, Jul 7, 2021 at 4:15 AM Will Deacon <will@kernel.org> wrote:
> > >
> > > On Fri, Jul 02, 2021 at 12:41:09PM -0700, Peter Collingbourne wrote:
> > > > Add a per-CPU sysfs node, mte_tcf_preferred, that allows the preferred
> > > > tag checking mode to be configured. The current possible values are
> > > > async and sync.
> > > >
> > > > Link: https://linux-review.googlesource.com/id/I7493dcd533a2785a1437b16c3f6b50919f840854
> > > > Signed-off-by: Peter Collingbourne <pcc@google.com>
> > > > Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
> > > > ---
> > > >  arch/arm64/kernel/mte.c | 77 +++++++++++++++++++++++++++++++++++++++--
> > > >  1 file changed, 75 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
> > > > index 53d89915029d..48f218e070cc 100644
> > > > --- a/arch/arm64/kernel/mte.c
> > > > +++ b/arch/arm64/kernel/mte.c
> > > > @@ -4,6 +4,7 @@
> > > >   */
> > > >
> > > >  #include <linux/bitops.h>
> > > > +#include <linux/cpu.h>
> > > >  #include <linux/kernel.h>
> > > >  #include <linux/mm.h>
> > > >  #include <linux/prctl.h>
> > > > @@ -26,6 +27,8 @@ u64 gcr_kernel_excl __ro_after_init;
> > > >
> > > >  static bool report_fault_once = true;
> > > >
> > > > +static DEFINE_PER_CPU_READ_MOSTLY(u64, mte_tcf_preferred);
> > > > +
> > > >  #ifdef CONFIG_KASAN_HW_TAGS
> > > >  /* Whether the MTE asynchronous mode is enabled. */
> > > >  DEFINE_STATIC_KEY_FALSE(mte_async_mode);
> > > > @@ -199,17 +202,24 @@ static void update_gcr_el1_excl(u64 excl)
> > > >
> > > >  static void mte_update_sctlr_user(struct task_struct *task)
> > > >  {
> > > > +     /*
> > > > +      * This can only be called on the current or next task since the CPU
> > > > +      * must match where the thread is going to run.
> > > > +      */
> > > >       unsigned long sctlr = task->thread.sctlr_user;
> > > > -     unsigned long pref = MTE_CTRL_TCF_ASYNC;
> > > >       unsigned long mte_ctrl = task->thread.mte_ctrl;
> > > > -     unsigned long resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl;
> > > > +     unsigned long pref, resolved_mte_tcf;
> > > >
> > > > +     preempt_disable();
> > > > +     pref = __this_cpu_read(mte_tcf_preferred);
> > > > +     resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl;
> > > >       sctlr &= ~SCTLR_EL1_TCF0_MASK;
> > > >       if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC)
> > > >               sctlr |= SCTLR_EL1_TCF0_ASYNC;
> > > >       else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC)
> > > >               sctlr |= SCTLR_EL1_TCF0_SYNC;
> > > >       task->thread.sctlr_user = sctlr;
> > > > +     preempt_enable();
> > > >  }
> > > >
> > > >  void mte_thread_init_user(void)
> > > > @@ -441,3 +451,66 @@ int mte_ptrace_copy_tags(struct task_struct *child, long request,
> > > >
> > > >       return ret;
> > > >  }
> > > > +
> > > > +static ssize_t mte_tcf_preferred_show(struct device *dev,
> > > > +                                   struct device_attribute *attr, char *buf)
> > > > +{
> > > > +     switch (per_cpu(mte_tcf_preferred, dev->id)) {
> > > > +     case MTE_CTRL_TCF_ASYNC:
> > > > +             return sysfs_emit(buf, "async\n");
> > > > +     case MTE_CTRL_TCF_SYNC:
> > > > +             return sysfs_emit(buf, "sync\n");
> > > > +     default:
> > > > +             return sysfs_emit(buf, "???\n");
> > > > +     }
> > > > +}
> > > > +
> > > > +static void sync_sctlr(void *arg)
> > > > +{
> > > > +     mte_update_sctlr_user(current);
> > > > +     set_task_sctlr_el1(current->thread.sctlr_user);
> > > > +}
> > > > +
> > > > +static ssize_t mte_tcf_preferred_store(struct device *dev,
> > > > +                                    struct device_attribute *attr,
> > > > +                                    const char *buf, size_t count)
> > > > +{
> > > > +     ssize_t ret = 0;
> > > > +     u64 tcf;
> > > > +
> > > > +     if (sysfs_streq(buf, "async"))
> > > > +             tcf = MTE_CTRL_TCF_ASYNC;
> > > > +     else if (sysfs_streq(buf, "sync"))
> > > > +             tcf = MTE_CTRL_TCF_SYNC;
> > > > +     else
> > > > +             return -EINVAL;
> > > > +
> > > > +     device_lock(dev);
> > > > +     per_cpu(mte_tcf_preferred, dev->id) = tcf;
> > > > +
> > > > +     if (cpu_online(dev->id))
> > > > +             ret = smp_call_function_single(dev->id, sync_sctlr, NULL, 0);
> > >
> > > Hmm, so this call could land right in the middle of a concurrent prctl().
> > > What happens in that case?
> >
> > I don't think anything can go wrong. When the prctl sets mte_ctrl we
> > then need to call mte_update_sctlr_user followed by set_task_sctlr_el1
> > and it doesn't matter if it happens multiple times (either
> > mte_update_sctlr_user/set_task_sctlr_el1/mte_update_sctlr_user/set_task_sctlr_el1
> > or even the less likely
> > mte_update_sctlr_user/mte_update_sctlr_user/set_task_sctlr_el1/set_task_sctlr_el1).
>
> I think you're still (at least) relying on the compiler not to tear or cache
> accesses to ->thread.sctlr_user, no? It seems like it would be a lot simpler
> just to leave the change until the next context switch and not send the IPI
> at all. I think that's a reasonable behaviour because the write to sysfs is
> racy anyway, so we can just document this.

Works for me. I removed the call to smp_call_function_single here and
updated the documentation patch to say that changes won't take effect
immediately.

> > Actually, I'm not sure we need any code in the sync_sctlr function at
> > all. Simply scheduling an empty function onto the CPU will cause
> > mte_update_sctlr_user and then update_sctlr_el1 to be called when the
> > task that was originally running on the CPU gets rescheduled onto it,
> > as a result of the change to mte_thread_switch.
>
> I'm not sure I agree here -- I thought smp_call_function_single() ran the
> target function directly from the IPI handler in interrupt context, without
> the need for a context-switch.

I see -- I thought that they ran as something like a kthread, but you
may be right.

Peter
diff mbox series

Patch

diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 53d89915029d..48f218e070cc 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -4,6 +4,7 @@ 
  */
 
 #include <linux/bitops.h>
+#include <linux/cpu.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/prctl.h>
@@ -26,6 +27,8 @@  u64 gcr_kernel_excl __ro_after_init;
 
 static bool report_fault_once = true;
 
+static DEFINE_PER_CPU_READ_MOSTLY(u64, mte_tcf_preferred);
+
 #ifdef CONFIG_KASAN_HW_TAGS
 /* Whether the MTE asynchronous mode is enabled. */
 DEFINE_STATIC_KEY_FALSE(mte_async_mode);
@@ -199,17 +202,24 @@  static void update_gcr_el1_excl(u64 excl)
 
 static void mte_update_sctlr_user(struct task_struct *task)
 {
+	/*
+	 * This can only be called on the current or next task since the CPU
+	 * must match where the thread is going to run.
+	 */
 	unsigned long sctlr = task->thread.sctlr_user;
-	unsigned long pref = MTE_CTRL_TCF_ASYNC;
 	unsigned long mte_ctrl = task->thread.mte_ctrl;
-	unsigned long resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl;
+	unsigned long pref, resolved_mte_tcf;
 
+	preempt_disable();
+	pref = __this_cpu_read(mte_tcf_preferred);
+	resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl;
 	sctlr &= ~SCTLR_EL1_TCF0_MASK;
 	if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC)
 		sctlr |= SCTLR_EL1_TCF0_ASYNC;
 	else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC)
 		sctlr |= SCTLR_EL1_TCF0_SYNC;
 	task->thread.sctlr_user = sctlr;
+	preempt_enable();
 }
 
 void mte_thread_init_user(void)
@@ -441,3 +451,66 @@  int mte_ptrace_copy_tags(struct task_struct *child, long request,
 
 	return ret;
 }
+
+static ssize_t mte_tcf_preferred_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	switch (per_cpu(mte_tcf_preferred, dev->id)) {
+	case MTE_CTRL_TCF_ASYNC:
+		return sysfs_emit(buf, "async\n");
+	case MTE_CTRL_TCF_SYNC:
+		return sysfs_emit(buf, "sync\n");
+	default:
+		return sysfs_emit(buf, "???\n");
+	}
+}
+
+static void sync_sctlr(void *arg)
+{
+	mte_update_sctlr_user(current);
+	set_task_sctlr_el1(current->thread.sctlr_user);
+}
+
+static ssize_t mte_tcf_preferred_store(struct device *dev,
+				       struct device_attribute *attr,
+				       const char *buf, size_t count)
+{
+	ssize_t ret = 0;
+	u64 tcf;
+
+	if (sysfs_streq(buf, "async"))
+		tcf = MTE_CTRL_TCF_ASYNC;
+	else if (sysfs_streq(buf, "sync"))
+		tcf = MTE_CTRL_TCF_SYNC;
+	else
+		return -EINVAL;
+
+	device_lock(dev);
+	per_cpu(mte_tcf_preferred, dev->id) = tcf;
+
+	if (cpu_online(dev->id))
+		ret = smp_call_function_single(dev->id, sync_sctlr, NULL, 0);
+	if (ret == 0)
+		ret = count;
+	device_unlock(dev);
+
+	return ret;
+}
+static DEVICE_ATTR_RW(mte_tcf_preferred);
+
+static int register_mte_tcf_preferred_sysctl(void)
+{
+	unsigned int cpu;
+
+	if (!system_supports_mte())
+		return 0;
+
+	for_each_possible_cpu(cpu) {
+		per_cpu(mte_tcf_preferred, cpu) = MTE_CTRL_TCF_ASYNC;
+		device_create_file(get_cpu_device(cpu),
+				   &dev_attr_mte_tcf_preferred);
+	}
+
+	return 0;
+}
+subsys_initcall(register_mte_tcf_preferred_sysctl);