[v7,18/29] arm64: mte: Allow user control of the tag check mode via prctl()
diff mbox series

Message ID 20200715170844.30064-19-catalin.marinas@arm.com
State New
Headers show
Series
  • arm64: Memory Tagging Extension user-space support
Related show

Commit Message

Catalin Marinas July 15, 2020, 5:08 p.m. UTC
By default, even if PROT_MTE is set on a memory range, there is no tag
check fault reporting (SIGSEGV). Introduce a set of option to the
exiting prctl(PR_SET_TAGGED_ADDR_CTRL) to allow user control of the tag
check fault mode:

  PR_MTE_TCF_NONE  - no reporting (default)
  PR_MTE_TCF_SYNC  - synchronous tag check fault reporting
  PR_MTE_TCF_ASYNC - asynchronous tag check fault reporting

These options translate into the corresponding SCTLR_EL1.TCF0 bitfield,
context-switched by the kernel. Note that uaccess done by the kernel is
not checked and cannot be configured by the user.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
---

Notes:
    v3:
    - Use SCTLR_EL1_TCF0_NONE instead of 0 for consistency.
    - Move mte_thread_switch() in this patch from an earlier one. In
      addition, it is called after the dsb() in __switch_to() so that any
      asynchronous tag check faults have been registered in the TFSR_EL1
      registers (to be added with the in-kernel MTE support.
    
    v2:
    - Handle SCTLR_EL1_TCF0_NONE explicitly for consistency with PR_MTE_TCF_NONE.
    - Fix SCTLR_EL1 register setting in flush_mte_state() (thanks to Peter
      Collingbourne).
    - Added ISB to update_sctlr_el1_tcf0() since, with the latest
      architecture update/fix, the TCF0 field is used by the uaccess
      routines.

 arch/arm64/include/asm/mte.h       | 14 ++++++
 arch/arm64/include/asm/processor.h |  3 ++
 arch/arm64/kernel/mte.c            | 77 ++++++++++++++++++++++++++++++
 arch/arm64/kernel/process.c        | 26 ++++++++--
 include/uapi/linux/prctl.h         |  6 +++
 5 files changed, 123 insertions(+), 3 deletions(-)

Comments

Kevin Brodsky July 20, 2020, 3:30 p.m. UTC | #1
On 15/07/2020 18:08, Catalin Marinas wrote:
> By default, even if PROT_MTE is set on a memory range, there is no tag
> check fault reporting (SIGSEGV). Introduce a set of option to the
> exiting prctl(PR_SET_TAGGED_ADDR_CTRL) to allow user control of the tag
> check fault mode:
>
>    PR_MTE_TCF_NONE  - no reporting (default)
>    PR_MTE_TCF_SYNC  - synchronous tag check fault reporting
>    PR_MTE_TCF_ASYNC - asynchronous tag check fault reporting
>
> These options translate into the corresponding SCTLR_EL1.TCF0 bitfield,
> context-switched by the kernel. Note that uaccess done by the kernel is
> not checked and cannot be configured by the user.
>
> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will@kernel.org>
> ---
>
> Notes:
>      v3:
>      - Use SCTLR_EL1_TCF0_NONE instead of 0 for consistency.
>      - Move mte_thread_switch() in this patch from an earlier one. In
>        addition, it is called after the dsb() in __switch_to() so that any
>        asynchronous tag check faults have been registered in the TFSR_EL1
>        registers (to be added with the in-kernel MTE support.
>      
>      v2:
>      - Handle SCTLR_EL1_TCF0_NONE explicitly for consistency with PR_MTE_TCF_NONE.
>      - Fix SCTLR_EL1 register setting in flush_mte_state() (thanks to Peter
>        Collingbourne).
>      - Added ISB to update_sctlr_el1_tcf0() since, with the latest
>        architecture update/fix, the TCF0 field is used by the uaccess
>        routines.
>
>   arch/arm64/include/asm/mte.h       | 14 ++++++
>   arch/arm64/include/asm/processor.h |  3 ++
>   arch/arm64/kernel/mte.c            | 77 ++++++++++++++++++++++++++++++
>   arch/arm64/kernel/process.c        | 26 ++++++++--
>   include/uapi/linux/prctl.h         |  6 +++
>   5 files changed, 123 insertions(+), 3 deletions(-)
>
> diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
> index b2577eee62c2..df2efbc9f8f1 100644
> --- a/arch/arm64/include/asm/mte.h
> +++ b/arch/arm64/include/asm/mte.h
> @@ -21,6 +21,9 @@ void mte_clear_page_tags(void *addr);
>   void mte_sync_tags(pte_t *ptep, pte_t pte);
>   void mte_copy_page_tags(void *kto, const void *kfrom);
>   void flush_mte_state(void);
> +void mte_thread_switch(struct task_struct *next);
> +long set_mte_ctrl(unsigned long arg);
> +long get_mte_ctrl(void);
>   
>   #else
>   
> @@ -36,6 +39,17 @@ static inline void mte_copy_page_tags(void *kto, const void *kfrom)
>   static inline void flush_mte_state(void)
>   {
>   }
> +static inline void mte_thread_switch(struct task_struct *next)
> +{
> +}
> +static inline long set_mte_ctrl(unsigned long arg)
> +{
> +	return 0;
> +}
> +static inline long get_mte_ctrl(void)
> +{
> +	return 0;
> +}
>   
>   #endif
>   
> diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
> index 240fe5e5b720..80e7f0573309 100644
> --- a/arch/arm64/include/asm/processor.h
> +++ b/arch/arm64/include/asm/processor.h
> @@ -151,6 +151,9 @@ struct thread_struct {
>   	struct ptrauth_keys_user	keys_user;
>   	struct ptrauth_keys_kernel	keys_kernel;
>   #endif
> +#ifdef CONFIG_ARM64_MTE
> +	u64			sctlr_tcf0;
> +#endif
>   };
>   
>   static inline void arch_thread_struct_whitelist(unsigned long *offset,
> diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
> index 5f54fd140610..375483a1f573 100644
> --- a/arch/arm64/kernel/mte.c
> +++ b/arch/arm64/kernel/mte.c
> @@ -5,6 +5,8 @@
>   
>   #include <linux/bitops.h>
>   #include <linux/mm.h>
> +#include <linux/prctl.h>
> +#include <linux/sched.h>
>   #include <linux/string.h>
>   #include <linux/thread_info.h>
>   
> @@ -49,6 +51,26 @@ int memcmp_pages(struct page *page1, struct page *page2)
>   	return ret;
>   }
>   
> +static void update_sctlr_el1_tcf0(u64 tcf0)
> +{
> +	/* ISB required for the kernel uaccess routines */
> +	sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF0_MASK, tcf0);
> +	isb();
> +}
> +
> +static void set_sctlr_el1_tcf0(u64 tcf0)
> +{
> +	/*
> +	 * mte_thread_switch() checks current->thread.sctlr_tcf0 as an
> +	 * optimisation. Disable preemption so that it does not see
> +	 * the variable update before the SCTLR_EL1.TCF0 one.
> +	 */
> +	preempt_disable();
> +	current->thread.sctlr_tcf0 = tcf0;
> +	update_sctlr_el1_tcf0(tcf0);
> +	preempt_enable();
> +}
> +
>   void flush_mte_state(void)
>   {
>   	if (!system_supports_mte())
> @@ -58,4 +80,59 @@ void flush_mte_state(void)
>   	dsb(ish);
>   	write_sysreg_s(0, SYS_TFSRE0_EL1);
>   	clear_thread_flag(TIF_MTE_ASYNC_FAULT);
> +	/* disable tag checking */
> +	set_sctlr_el1_tcf0(SCTLR_EL1_TCF0_NONE);
> +}
> +
> +void mte_thread_switch(struct task_struct *next)
> +{
> +	if (!system_supports_mte())
> +		return;
> +
> +	/* avoid expensive SCTLR_EL1 accesses if no change */
> +	if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0)

I think this could be improved by checking whether `next` is a kernel thread, in 
which case thread.sctlr_tcf0 is 0 but there is no point in setting SCTLR_EL1.TCF0, 
since there should not be any access via TTBR0.

Kevin

> +		update_sctlr_el1_tcf0(next->thread.sctlr_tcf0);
> +}
> +
> +long set_mte_ctrl(unsigned long arg)
> +{
> +	u64 tcf0;
> +
> +	if (!system_supports_mte())
> +		return 0;
> +
> +	switch (arg & PR_MTE_TCF_MASK) {
> +	case PR_MTE_TCF_NONE:
> +		tcf0 = SCTLR_EL1_TCF0_NONE;
> +		break;
> +	case PR_MTE_TCF_SYNC:
> +		tcf0 = SCTLR_EL1_TCF0_SYNC;
> +		break;
> +	case PR_MTE_TCF_ASYNC:
> +		tcf0 = SCTLR_EL1_TCF0_ASYNC;
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	set_sctlr_el1_tcf0(tcf0);
> +
> +	return 0;
> +}
> +
> +long get_mte_ctrl(void)
> +{
> +	if (!system_supports_mte())
> +		return 0;
> +
> +	switch (current->thread.sctlr_tcf0) {
> +	case SCTLR_EL1_TCF0_NONE:
> +		return PR_MTE_TCF_NONE;
> +	case SCTLR_EL1_TCF0_SYNC:
> +		return PR_MTE_TCF_SYNC;
> +	case SCTLR_EL1_TCF0_ASYNC:
> +		return PR_MTE_TCF_ASYNC;
> +	}
> +
> +	return 0;
>   }
> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
> index 695705d1f8e5..d19ce8053a03 100644
> --- a/arch/arm64/kernel/process.c
> +++ b/arch/arm64/kernel/process.c
> @@ -544,6 +544,13 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
>   	 */
>   	dsb(ish);
>   
> +	/*
> +	 * MTE thread switching must happen after the DSB above to ensure that
> +	 * any asynchronous tag check faults have been logged in the TFSR*_EL1
> +	 * registers.
> +	 */
> +	mte_thread_switch(next);
> +
>   	/* the actual thread switch */
>   	last = cpu_switch_to(prev, next);
>   
> @@ -603,9 +610,15 @@ static unsigned int tagged_addr_disabled;
>   
>   long set_tagged_addr_ctrl(unsigned long arg)
>   {
> +	unsigned long valid_mask = PR_TAGGED_ADDR_ENABLE;
> +
>   	if (is_compat_task())
>   		return -EINVAL;
> -	if (arg & ~PR_TAGGED_ADDR_ENABLE)
> +
> +	if (system_supports_mte())
> +		valid_mask |= PR_MTE_TCF_MASK;
> +
> +	if (arg & ~valid_mask)
>   		return -EINVAL;
>   
>   	/*
> @@ -615,6 +628,9 @@ long set_tagged_addr_ctrl(unsigned long arg)
>   	if (arg & PR_TAGGED_ADDR_ENABLE && tagged_addr_disabled)
>   		return -EINVAL;
>   
> +	if (set_mte_ctrl(arg) != 0)
> +		return -EINVAL;
> +
>   	update_thread_flag(TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE);
>   
>   	return 0;
> @@ -622,13 +638,17 @@ long set_tagged_addr_ctrl(unsigned long arg)
>   
>   long get_tagged_addr_ctrl(void)
>   {
> +	long ret = 0;
> +
>   	if (is_compat_task())
>   		return -EINVAL;
>   
>   	if (test_thread_flag(TIF_TAGGED_ADDR))
> -		return PR_TAGGED_ADDR_ENABLE;
> +		ret = PR_TAGGED_ADDR_ENABLE;
>   
> -	return 0;
> +	ret |= get_mte_ctrl();
> +
> +	return ret;
>   }
>   
>   /*
> diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
> index 07b4f8131e36..2390ab324afa 100644
> --- a/include/uapi/linux/prctl.h
> +++ b/include/uapi/linux/prctl.h
> @@ -233,6 +233,12 @@ struct prctl_mm_map {
>   #define PR_SET_TAGGED_ADDR_CTRL		55
>   #define PR_GET_TAGGED_ADDR_CTRL		56
>   # define PR_TAGGED_ADDR_ENABLE		(1UL << 0)
> +/* MTE tag check fault modes */
> +# define PR_MTE_TCF_SHIFT		1
> +# define PR_MTE_TCF_NONE		(0UL << PR_MTE_TCF_SHIFT)
> +# define PR_MTE_TCF_SYNC		(1UL << PR_MTE_TCF_SHIFT)
> +# define PR_MTE_TCF_ASYNC		(2UL << PR_MTE_TCF_SHIFT)
> +# define PR_MTE_TCF_MASK		(3UL << PR_MTE_TCF_SHIFT)
>   
>   /* Control reclaim behavior when allocating memory */
>   #define PR_SET_IO_FLUSHER		57
Dave Martin July 20, 2020, 5 p.m. UTC | #2
On Mon, Jul 20, 2020 at 04:30:35PM +0100, Kevin Brodsky wrote:
> On 15/07/2020 18:08, Catalin Marinas wrote:
> >By default, even if PROT_MTE is set on a memory range, there is no tag
> >check fault reporting (SIGSEGV). Introduce a set of option to the
> >exiting prctl(PR_SET_TAGGED_ADDR_CTRL) to allow user control of the tag
> >check fault mode:
> >
> >   PR_MTE_TCF_NONE  - no reporting (default)
> >   PR_MTE_TCF_SYNC  - synchronous tag check fault reporting
> >   PR_MTE_TCF_ASYNC - asynchronous tag check fault reporting
> >
> >These options translate into the corresponding SCTLR_EL1.TCF0 bitfield,
> >context-switched by the kernel. Note that uaccess done by the kernel is
> >not checked and cannot be configured by the user.
> >
> >Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
> >Cc: Will Deacon <will@kernel.org>
> >---
> >
> >Notes:
> >     v3:
> >     - Use SCTLR_EL1_TCF0_NONE instead of 0 for consistency.
> >     - Move mte_thread_switch() in this patch from an earlier one. In
> >       addition, it is called after the dsb() in __switch_to() so that any
> >       asynchronous tag check faults have been registered in the TFSR_EL1
> >       registers (to be added with the in-kernel MTE support.
> >     v2:
> >     - Handle SCTLR_EL1_TCF0_NONE explicitly for consistency with PR_MTE_TCF_NONE.
> >     - Fix SCTLR_EL1 register setting in flush_mte_state() (thanks to Peter
> >       Collingbourne).
> >     - Added ISB to update_sctlr_el1_tcf0() since, with the latest
> >       architecture update/fix, the TCF0 field is used by the uaccess
> >       routines.

[...]

> >diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c

[...]

> >+void mte_thread_switch(struct task_struct *next)
> >+{
> >+	if (!system_supports_mte())
> >+		return;
> >+
> >+	/* avoid expensive SCTLR_EL1 accesses if no change */
> >+	if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0)
> 
> I think this could be improved by checking whether `next` is a kernel
> thread, in which case thread.sctlr_tcf0 is 0 but there is no point in
> setting SCTLR_EL1.TCF0, since there should not be any access via TTBR0.

Out of interest, do we have a nice way of testing for a kernel thread
now?

I remember fpsimd_thread_switch() used to check for task->mm, but we
seem to have got rid of that at some point.  set_mm() can defeat this,
and anyway the heavy lifting for FPSIMD is now deferred until returning
to userspace.

Cheers
---Dave
Catalin Marinas July 22, 2020, 10:28 a.m. UTC | #3
On Mon, Jul 20, 2020 at 06:00:50PM +0100, Dave P Martin wrote:
> On Mon, Jul 20, 2020 at 04:30:35PM +0100, Kevin Brodsky wrote:
> > On 15/07/2020 18:08, Catalin Marinas wrote:
> > >+void mte_thread_switch(struct task_struct *next)
> > >+{
> > >+	if (!system_supports_mte())
> > >+		return;
> > >+
> > >+	/* avoid expensive SCTLR_EL1 accesses if no change */
> > >+	if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0)
> > 
> > I think this could be improved by checking whether `next` is a kernel
> > thread, in which case thread.sctlr_tcf0 is 0 but there is no point in
> > setting SCTLR_EL1.TCF0, since there should not be any access via TTBR0.
> 
> Out of interest, do we have a nice way of testing for a kernel thread
> now?
> 
> I remember fpsimd_thread_switch() used to check for task->mm, but we
> seem to have got rid of that at some point.  set_mm() can defeat this,
> and anyway the heavy lifting for FPSIMD is now deferred until returning
> to userspace.

I think a better way is (current->flags & PF_KTHREAD). kthread_use_mm()
indeed changes current->mm.
Catalin Marinas July 22, 2020, 11:09 a.m. UTC | #4
On Mon, Jul 20, 2020 at 04:30:35PM +0100, Kevin Brodsky wrote:
> On 15/07/2020 18:08, Catalin Marinas wrote:
> > +void mte_thread_switch(struct task_struct *next)
> > +{
> > +	if (!system_supports_mte())
> > +		return;
> > +
> > +	/* avoid expensive SCTLR_EL1 accesses if no change */
> > +	if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0)
> 
> I think this could be improved by checking whether `next` is a kernel
> thread, in which case thread.sctlr_tcf0 is 0 but there is no point in
> setting SCTLR_EL1.TCF0, since there should not be any access via TTBR0.

It's not about kernel or user thread here. kthread_use_mm() (just
use_mm() in older kernels) would set an mm on a kernel thread,
temporarily making it behave as a user one. Since the sctlr_tcf0 is per
thread, not per mm, we need to switch to the default TCF0 for kthreads
so that user accesses (if use_mm() is called) don't generate any tag
check faults. Note that switch_mm() does not touch TCF0.

If we did allow a global, per-mm TCF0 setting, such kthreads could only
handle synchronous faults and no SIGSEGV generated (as we do with
copy_{from,to}_user() for normal threads).

If we want to revisit per-thread vs per-mm TCF0 setting, now is the
time.
Kevin Brodsky July 23, 2020, 7:33 p.m. UTC | #5
On 20/07/2020 18:00, Dave Martin wrote:
> On Mon, Jul 20, 2020 at 04:30:35PM +0100, Kevin Brodsky wrote:
>> On 15/07/2020 18:08, Catalin Marinas wrote:
>>> By default, even if PROT_MTE is set on a memory range, there is no tag
>>> check fault reporting (SIGSEGV). Introduce a set of option to the
>>> exiting prctl(PR_SET_TAGGED_ADDR_CTRL) to allow user control of the tag
>>> check fault mode:
>>>
>>>    PR_MTE_TCF_NONE  - no reporting (default)
>>>    PR_MTE_TCF_SYNC  - synchronous tag check fault reporting
>>>    PR_MTE_TCF_ASYNC - asynchronous tag check fault reporting
>>>
>>> These options translate into the corresponding SCTLR_EL1.TCF0 bitfield,
>>> context-switched by the kernel. Note that uaccess done by the kernel is
>>> not checked and cannot be configured by the user.
>>>
>>> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
>>> Cc: Will Deacon <will@kernel.org>
>>> ---
>>>
>>> Notes:
>>>      v3:
>>>      - Use SCTLR_EL1_TCF0_NONE instead of 0 for consistency.
>>>      - Move mte_thread_switch() in this patch from an earlier one. In
>>>        addition, it is called after the dsb() in __switch_to() so that any
>>>        asynchronous tag check faults have been registered in the TFSR_EL1
>>>        registers (to be added with the in-kernel MTE support.
>>>      v2:
>>>      - Handle SCTLR_EL1_TCF0_NONE explicitly for consistency with PR_MTE_TCF_NONE.
>>>      - Fix SCTLR_EL1 register setting in flush_mte_state() (thanks to Peter
>>>        Collingbourne).
>>>      - Added ISB to update_sctlr_el1_tcf0() since, with the latest
>>>        architecture update/fix, the TCF0 field is used by the uaccess
>>>        routines.
> [...]
>
>>> diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
> [...]
>
>>> +void mte_thread_switch(struct task_struct *next)
>>> +{
>>> +	if (!system_supports_mte())
>>> +		return;
>>> +
>>> +	/* avoid expensive SCTLR_EL1 accesses if no change */
>>> +	if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0)
>> I think this could be improved by checking whether `next` is a kernel
>> thread, in which case thread.sctlr_tcf0 is 0 but there is no point in
>> setting SCTLR_EL1.TCF0, since there should not be any access via TTBR0.
> Out of interest, do we have a nice way of testing for a kernel thread
> now?

Isn't it as simple as checking if PF_KTHREAD is set in tsk->flags? At least this is 
what ssbs_thread_switch() does.

Kevin

> I remember fpsimd_thread_switch() used to check for task->mm, but we
> seem to have got rid of that at some point.  set_mm() can defeat this,
> and anyway the heavy lifting for FPSIMD is now deferred until returning
> to userspace.
>
> Cheers
> ---Dave
Kevin Brodsky Aug. 4, 2020, 7:34 p.m. UTC | #6
On 15/07/2020 18:08, Catalin Marinas wrote:
> By default, even if PROT_MTE is set on a memory range, there is no tag
> check fault reporting (SIGSEGV). Introduce a set of option to the
> exiting prctl(PR_SET_TAGGED_ADDR_CTRL) to allow user control of the tag
> check fault mode:
>
>    PR_MTE_TCF_NONE  - no reporting (default)
>    PR_MTE_TCF_SYNC  - synchronous tag check fault reporting
>    PR_MTE_TCF_ASYNC - asynchronous tag check fault reporting
>
> These options translate into the corresponding SCTLR_EL1.TCF0 bitfield,
> context-switched by the kernel. Note that uaccess done by the kernel is
> not checked and cannot be configured by the user.

The last sentence is outdated, it should probably say that uaccess is only checked in 
in synchronous mode.

Kevin

> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will@kernel.org>
> ---
>
> Notes:
>      v3:
>      - Use SCTLR_EL1_TCF0_NONE instead of 0 for consistency.
>      - Move mte_thread_switch() in this patch from an earlier one. In
>        addition, it is called after the dsb() in __switch_to() so that any
>        asynchronous tag check faults have been registered in the TFSR_EL1
>        registers (to be added with the in-kernel MTE support.
>      
>      v2:
>      - Handle SCTLR_EL1_TCF0_NONE explicitly for consistency with PR_MTE_TCF_NONE.
>      - Fix SCTLR_EL1 register setting in flush_mte_state() (thanks to Peter
>        Collingbourne).
>      - Added ISB to update_sctlr_el1_tcf0() since, with the latest
>        architecture update/fix, the TCF0 field is used by the uaccess
>        routines.
>
>   arch/arm64/include/asm/mte.h       | 14 ++++++
>   arch/arm64/include/asm/processor.h |  3 ++
>   arch/arm64/kernel/mte.c            | 77 ++++++++++++++++++++++++++++++
>   arch/arm64/kernel/process.c        | 26 ++++++++--
>   include/uapi/linux/prctl.h         |  6 +++
>   5 files changed, 123 insertions(+), 3 deletions(-)
>
> diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
> index b2577eee62c2..df2efbc9f8f1 100644
> --- a/arch/arm64/include/asm/mte.h
> +++ b/arch/arm64/include/asm/mte.h
> @@ -21,6 +21,9 @@ void mte_clear_page_tags(void *addr);
>   void mte_sync_tags(pte_t *ptep, pte_t pte);
>   void mte_copy_page_tags(void *kto, const void *kfrom);
>   void flush_mte_state(void);
> +void mte_thread_switch(struct task_struct *next);
> +long set_mte_ctrl(unsigned long arg);
> +long get_mte_ctrl(void);
>   
>   #else
>   
> @@ -36,6 +39,17 @@ static inline void mte_copy_page_tags(void *kto, const void *kfrom)
>   static inline void flush_mte_state(void)
>   {
>   }
> +static inline void mte_thread_switch(struct task_struct *next)
> +{
> +}
> +static inline long set_mte_ctrl(unsigned long arg)
> +{
> +	return 0;
> +}
> +static inline long get_mte_ctrl(void)
> +{
> +	return 0;
> +}
>   
>   #endif
>   
> diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
> index 240fe5e5b720..80e7f0573309 100644
> --- a/arch/arm64/include/asm/processor.h
> +++ b/arch/arm64/include/asm/processor.h
> @@ -151,6 +151,9 @@ struct thread_struct {
>   	struct ptrauth_keys_user	keys_user;
>   	struct ptrauth_keys_kernel	keys_kernel;
>   #endif
> +#ifdef CONFIG_ARM64_MTE
> +	u64			sctlr_tcf0;
> +#endif
>   };
>   
>   static inline void arch_thread_struct_whitelist(unsigned long *offset,
> diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
> index 5f54fd140610..375483a1f573 100644
> --- a/arch/arm64/kernel/mte.c
> +++ b/arch/arm64/kernel/mte.c
> @@ -5,6 +5,8 @@
>   
>   #include <linux/bitops.h>
>   #include <linux/mm.h>
> +#include <linux/prctl.h>
> +#include <linux/sched.h>
>   #include <linux/string.h>
>   #include <linux/thread_info.h>
>   
> @@ -49,6 +51,26 @@ int memcmp_pages(struct page *page1, struct page *page2)
>   	return ret;
>   }
>   
> +static void update_sctlr_el1_tcf0(u64 tcf0)
> +{
> +	/* ISB required for the kernel uaccess routines */
> +	sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF0_MASK, tcf0);
> +	isb();
> +}
> +
> +static void set_sctlr_el1_tcf0(u64 tcf0)
> +{
> +	/*
> +	 * mte_thread_switch() checks current->thread.sctlr_tcf0 as an
> +	 * optimisation. Disable preemption so that it does not see
> +	 * the variable update before the SCTLR_EL1.TCF0 one.
> +	 */
> +	preempt_disable();
> +	current->thread.sctlr_tcf0 = tcf0;
> +	update_sctlr_el1_tcf0(tcf0);
> +	preempt_enable();
> +}
> +
>   void flush_mte_state(void)
>   {
>   	if (!system_supports_mte())
> @@ -58,4 +80,59 @@ void flush_mte_state(void)
>   	dsb(ish);
>   	write_sysreg_s(0, SYS_TFSRE0_EL1);
>   	clear_thread_flag(TIF_MTE_ASYNC_FAULT);
> +	/* disable tag checking */
> +	set_sctlr_el1_tcf0(SCTLR_EL1_TCF0_NONE);
> +}
> +
> +void mte_thread_switch(struct task_struct *next)
> +{
> +	if (!system_supports_mte())
> +		return;
> +
> +	/* avoid expensive SCTLR_EL1 accesses if no change */
> +	if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0)
> +		update_sctlr_el1_tcf0(next->thread.sctlr_tcf0);
> +}
> +
> +long set_mte_ctrl(unsigned long arg)
> +{
> +	u64 tcf0;
> +
> +	if (!system_supports_mte())
> +		return 0;
> +
> +	switch (arg & PR_MTE_TCF_MASK) {
> +	case PR_MTE_TCF_NONE:
> +		tcf0 = SCTLR_EL1_TCF0_NONE;
> +		break;
> +	case PR_MTE_TCF_SYNC:
> +		tcf0 = SCTLR_EL1_TCF0_SYNC;
> +		break;
> +	case PR_MTE_TCF_ASYNC:
> +		tcf0 = SCTLR_EL1_TCF0_ASYNC;
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +
> +	set_sctlr_el1_tcf0(tcf0);
> +
> +	return 0;
> +}
> +
> +long get_mte_ctrl(void)
> +{
> +	if (!system_supports_mte())
> +		return 0;
> +
> +	switch (current->thread.sctlr_tcf0) {
> +	case SCTLR_EL1_TCF0_NONE:
> +		return PR_MTE_TCF_NONE;
> +	case SCTLR_EL1_TCF0_SYNC:
> +		return PR_MTE_TCF_SYNC;
> +	case SCTLR_EL1_TCF0_ASYNC:
> +		return PR_MTE_TCF_ASYNC;
> +	}
> +
> +	return 0;
>   }
> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
> index 695705d1f8e5..d19ce8053a03 100644
> --- a/arch/arm64/kernel/process.c
> +++ b/arch/arm64/kernel/process.c
> @@ -544,6 +544,13 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
>   	 */
>   	dsb(ish);
>   
> +	/*
> +	 * MTE thread switching must happen after the DSB above to ensure that
> +	 * any asynchronous tag check faults have been logged in the TFSR*_EL1
> +	 * registers.
> +	 */
> +	mte_thread_switch(next);
> +
>   	/* the actual thread switch */
>   	last = cpu_switch_to(prev, next);
>   
> @@ -603,9 +610,15 @@ static unsigned int tagged_addr_disabled;
>   
>   long set_tagged_addr_ctrl(unsigned long arg)
>   {
> +	unsigned long valid_mask = PR_TAGGED_ADDR_ENABLE;
> +
>   	if (is_compat_task())
>   		return -EINVAL;
> -	if (arg & ~PR_TAGGED_ADDR_ENABLE)
> +
> +	if (system_supports_mte())
> +		valid_mask |= PR_MTE_TCF_MASK;
> +
> +	if (arg & ~valid_mask)
>   		return -EINVAL;
>   
>   	/*
> @@ -615,6 +628,9 @@ long set_tagged_addr_ctrl(unsigned long arg)
>   	if (arg & PR_TAGGED_ADDR_ENABLE && tagged_addr_disabled)
>   		return -EINVAL;
>   
> +	if (set_mte_ctrl(arg) != 0)
> +		return -EINVAL;
> +
>   	update_thread_flag(TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE);
>   
>   	return 0;
> @@ -622,13 +638,17 @@ long set_tagged_addr_ctrl(unsigned long arg)
>   
>   long get_tagged_addr_ctrl(void)
>   {
> +	long ret = 0;
> +
>   	if (is_compat_task())
>   		return -EINVAL;
>   
>   	if (test_thread_flag(TIF_TAGGED_ADDR))
> -		return PR_TAGGED_ADDR_ENABLE;
> +		ret = PR_TAGGED_ADDR_ENABLE;
>   
> -	return 0;
> +	ret |= get_mte_ctrl();
> +
> +	return ret;
>   }
>   
>   /*
> diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
> index 07b4f8131e36..2390ab324afa 100644
> --- a/include/uapi/linux/prctl.h
> +++ b/include/uapi/linux/prctl.h
> @@ -233,6 +233,12 @@ struct prctl_mm_map {
>   #define PR_SET_TAGGED_ADDR_CTRL		55
>   #define PR_GET_TAGGED_ADDR_CTRL		56
>   # define PR_TAGGED_ADDR_ENABLE		(1UL << 0)
> +/* MTE tag check fault modes */
> +# define PR_MTE_TCF_SHIFT		1
> +# define PR_MTE_TCF_NONE		(0UL << PR_MTE_TCF_SHIFT)
> +# define PR_MTE_TCF_SYNC		(1UL << PR_MTE_TCF_SHIFT)
> +# define PR_MTE_TCF_ASYNC		(2UL << PR_MTE_TCF_SHIFT)
> +# define PR_MTE_TCF_MASK		(3UL << PR_MTE_TCF_SHIFT)
>   
>   /* Control reclaim behavior when allocating memory */
>   #define PR_SET_IO_FLUSHER		57
Catalin Marinas Aug. 5, 2020, 9:24 a.m. UTC | #7
On Tue, Aug 04, 2020 at 08:34:42PM +0100, Kevin Brodsky wrote:
> On 15/07/2020 18:08, Catalin Marinas wrote:
> > By default, even if PROT_MTE is set on a memory range, there is no tag
> > check fault reporting (SIGSEGV). Introduce a set of option to the
> > exiting prctl(PR_SET_TAGGED_ADDR_CTRL) to allow user control of the tag
> > check fault mode:
> > 
> >    PR_MTE_TCF_NONE  - no reporting (default)
> >    PR_MTE_TCF_SYNC  - synchronous tag check fault reporting
> >    PR_MTE_TCF_ASYNC - asynchronous tag check fault reporting
> > 
> > These options translate into the corresponding SCTLR_EL1.TCF0 bitfield,
> > context-switched by the kernel. Note that uaccess done by the kernel is
> > not checked and cannot be configured by the user.
> 
> The last sentence is outdated, it should probably say that uaccess is only
> checked in in synchronous mode.

Thanks, I forgot about the commit log. The documentation was updated to:

**Note**: Kernel accesses to the user address space (e.g. ``read()``
system call) are not checked if the user thread tag checking mode is
``PR_MTE_TCF_NONE`` or ``PR_MTE_TCF_ASYNC``. If the tag checking mode is
``PR_MTE_TCF_SYNC``, the kernel makes a best effort to check its user
address accesses, however it cannot always guarantee it.

Patch
diff mbox series

diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index b2577eee62c2..df2efbc9f8f1 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -21,6 +21,9 @@  void mte_clear_page_tags(void *addr);
 void mte_sync_tags(pte_t *ptep, pte_t pte);
 void mte_copy_page_tags(void *kto, const void *kfrom);
 void flush_mte_state(void);
+void mte_thread_switch(struct task_struct *next);
+long set_mte_ctrl(unsigned long arg);
+long get_mte_ctrl(void);
 
 #else
 
@@ -36,6 +39,17 @@  static inline void mte_copy_page_tags(void *kto, const void *kfrom)
 static inline void flush_mte_state(void)
 {
 }
+static inline void mte_thread_switch(struct task_struct *next)
+{
+}
+static inline long set_mte_ctrl(unsigned long arg)
+{
+	return 0;
+}
+static inline long get_mte_ctrl(void)
+{
+	return 0;
+}
 
 #endif
 
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 240fe5e5b720..80e7f0573309 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -151,6 +151,9 @@  struct thread_struct {
 	struct ptrauth_keys_user	keys_user;
 	struct ptrauth_keys_kernel	keys_kernel;
 #endif
+#ifdef CONFIG_ARM64_MTE
+	u64			sctlr_tcf0;
+#endif
 };
 
 static inline void arch_thread_struct_whitelist(unsigned long *offset,
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 5f54fd140610..375483a1f573 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -5,6 +5,8 @@ 
 
 #include <linux/bitops.h>
 #include <linux/mm.h>
+#include <linux/prctl.h>
+#include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/thread_info.h>
 
@@ -49,6 +51,26 @@  int memcmp_pages(struct page *page1, struct page *page2)
 	return ret;
 }
 
+static void update_sctlr_el1_tcf0(u64 tcf0)
+{
+	/* ISB required for the kernel uaccess routines */
+	sysreg_clear_set(sctlr_el1, SCTLR_EL1_TCF0_MASK, tcf0);
+	isb();
+}
+
+static void set_sctlr_el1_tcf0(u64 tcf0)
+{
+	/*
+	 * mte_thread_switch() checks current->thread.sctlr_tcf0 as an
+	 * optimisation. Disable preemption so that it does not see
+	 * the variable update before the SCTLR_EL1.TCF0 one.
+	 */
+	preempt_disable();
+	current->thread.sctlr_tcf0 = tcf0;
+	update_sctlr_el1_tcf0(tcf0);
+	preempt_enable();
+}
+
 void flush_mte_state(void)
 {
 	if (!system_supports_mte())
@@ -58,4 +80,59 @@  void flush_mte_state(void)
 	dsb(ish);
 	write_sysreg_s(0, SYS_TFSRE0_EL1);
 	clear_thread_flag(TIF_MTE_ASYNC_FAULT);
+	/* disable tag checking */
+	set_sctlr_el1_tcf0(SCTLR_EL1_TCF0_NONE);
+}
+
+void mte_thread_switch(struct task_struct *next)
+{
+	if (!system_supports_mte())
+		return;
+
+	/* avoid expensive SCTLR_EL1 accesses if no change */
+	if (current->thread.sctlr_tcf0 != next->thread.sctlr_tcf0)
+		update_sctlr_el1_tcf0(next->thread.sctlr_tcf0);
+}
+
+long set_mte_ctrl(unsigned long arg)
+{
+	u64 tcf0;
+
+	if (!system_supports_mte())
+		return 0;
+
+	switch (arg & PR_MTE_TCF_MASK) {
+	case PR_MTE_TCF_NONE:
+		tcf0 = SCTLR_EL1_TCF0_NONE;
+		break;
+	case PR_MTE_TCF_SYNC:
+		tcf0 = SCTLR_EL1_TCF0_SYNC;
+		break;
+	case PR_MTE_TCF_ASYNC:
+		tcf0 = SCTLR_EL1_TCF0_ASYNC;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	set_sctlr_el1_tcf0(tcf0);
+
+	return 0;
+}
+
+long get_mte_ctrl(void)
+{
+	if (!system_supports_mte())
+		return 0;
+
+	switch (current->thread.sctlr_tcf0) {
+	case SCTLR_EL1_TCF0_NONE:
+		return PR_MTE_TCF_NONE;
+	case SCTLR_EL1_TCF0_SYNC:
+		return PR_MTE_TCF_SYNC;
+	case SCTLR_EL1_TCF0_ASYNC:
+		return PR_MTE_TCF_ASYNC;
+	}
+
+	return 0;
 }
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 695705d1f8e5..d19ce8053a03 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -544,6 +544,13 @@  __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
 	 */
 	dsb(ish);
 
+	/*
+	 * MTE thread switching must happen after the DSB above to ensure that
+	 * any asynchronous tag check faults have been logged in the TFSR*_EL1
+	 * registers.
+	 */
+	mte_thread_switch(next);
+
 	/* the actual thread switch */
 	last = cpu_switch_to(prev, next);
 
@@ -603,9 +610,15 @@  static unsigned int tagged_addr_disabled;
 
 long set_tagged_addr_ctrl(unsigned long arg)
 {
+	unsigned long valid_mask = PR_TAGGED_ADDR_ENABLE;
+
 	if (is_compat_task())
 		return -EINVAL;
-	if (arg & ~PR_TAGGED_ADDR_ENABLE)
+
+	if (system_supports_mte())
+		valid_mask |= PR_MTE_TCF_MASK;
+
+	if (arg & ~valid_mask)
 		return -EINVAL;
 
 	/*
@@ -615,6 +628,9 @@  long set_tagged_addr_ctrl(unsigned long arg)
 	if (arg & PR_TAGGED_ADDR_ENABLE && tagged_addr_disabled)
 		return -EINVAL;
 
+	if (set_mte_ctrl(arg) != 0)
+		return -EINVAL;
+
 	update_thread_flag(TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE);
 
 	return 0;
@@ -622,13 +638,17 @@  long set_tagged_addr_ctrl(unsigned long arg)
 
 long get_tagged_addr_ctrl(void)
 {
+	long ret = 0;
+
 	if (is_compat_task())
 		return -EINVAL;
 
 	if (test_thread_flag(TIF_TAGGED_ADDR))
-		return PR_TAGGED_ADDR_ENABLE;
+		ret = PR_TAGGED_ADDR_ENABLE;
 
-	return 0;
+	ret |= get_mte_ctrl();
+
+	return ret;
 }
 
 /*
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 07b4f8131e36..2390ab324afa 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -233,6 +233,12 @@  struct prctl_mm_map {
 #define PR_SET_TAGGED_ADDR_CTRL		55
 #define PR_GET_TAGGED_ADDR_CTRL		56
 # define PR_TAGGED_ADDR_ENABLE		(1UL << 0)
+/* MTE tag check fault modes */
+# define PR_MTE_TCF_SHIFT		1
+# define PR_MTE_TCF_NONE		(0UL << PR_MTE_TCF_SHIFT)
+# define PR_MTE_TCF_SYNC		(1UL << PR_MTE_TCF_SHIFT)
+# define PR_MTE_TCF_ASYNC		(2UL << PR_MTE_TCF_SHIFT)
+# define PR_MTE_TCF_MASK		(3UL << PR_MTE_TCF_SHIFT)
 
 /* Control reclaim behavior when allocating memory */
 #define PR_SET_IO_FLUSHER		57