diff mbox series

[v3,19/23] arm64: mte: Add PTRACE_{PEEK,POKE}MTETAGS support

Message ID 20200421142603.3894-20-catalin.marinas@arm.com (mailing list archive)
State New, archived
Headers show
Series arm64: Memory Tagging Extension user-space support | expand

Commit Message

Catalin Marinas April 21, 2020, 2:25 p.m. UTC
Add support for bulk setting/getting of the MTE tags in a tracee's
address space at 'addr' in the ptrace() syscall prototype. 'data' points
to a struct iovec in the tracer's address space with iov_base
representing the address of a tracer's buffer of length iov_len. The
tags to be copied to/from the tracer's buffer are stored as one tag per
byte.

On successfully copying at least one tag, ptrace() returns 0 and updates
the tracer's iov_len with the number of tags copied. In case of error,
either -EIO or -EFAULT is returned, trying to follow the ptrace() man
page.

Note that the tag copying functions are not performance critical,
therefore they lack optimisations found in typical memory copy routines.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Alan Hayward <Alan.Hayward@arm.com>
Cc: Luis Machado <luis.machado@linaro.org>
Cc: Omair Javaid <omair.javaid@linaro.org>
---

Notes:
    New in v3.

 arch/arm64/include/asm/mte.h         |  17 ++++
 arch/arm64/include/uapi/asm/ptrace.h |   3 +
 arch/arm64/kernel/mte.c              | 127 +++++++++++++++++++++++++++
 arch/arm64/kernel/ptrace.c           |  15 +++-
 arch/arm64/lib/mte.S                 |  50 +++++++++++
 5 files changed, 211 insertions(+), 1 deletion(-)

Comments

Peter Collingbourne April 24, 2020, 11:28 p.m. UTC | #1
On Tue, Apr 21, 2020 at 7:26 AM Catalin Marinas <catalin.marinas@arm.com> wrote:
>
> Add support for bulk setting/getting of the MTE tags in a tracee's
> address space at 'addr' in the ptrace() syscall prototype. 'data' points
> to a struct iovec in the tracer's address space with iov_base
> representing the address of a tracer's buffer of length iov_len. The
> tags to be copied to/from the tracer's buffer are stored as one tag per
> byte.
>
> On successfully copying at least one tag, ptrace() returns 0 and updates
> the tracer's iov_len with the number of tags copied. In case of error,
> either -EIO or -EFAULT is returned, trying to follow the ptrace() man
> page.
>
> Note that the tag copying functions are not performance critical,
> therefore they lack optimisations found in typical memory copy routines.
>
> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: Alan Hayward <Alan.Hayward@arm.com>
> Cc: Luis Machado <luis.machado@linaro.org>
> Cc: Omair Javaid <omair.javaid@linaro.org>
> ---
>
> Notes:
>     New in v3.
>
>  arch/arm64/include/asm/mte.h         |  17 ++++
>  arch/arm64/include/uapi/asm/ptrace.h |   3 +
>  arch/arm64/kernel/mte.c              | 127 +++++++++++++++++++++++++++
>  arch/arm64/kernel/ptrace.c           |  15 +++-
>  arch/arm64/lib/mte.S                 |  50 +++++++++++
>  5 files changed, 211 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
> index 22eb3e06f311..0ca2aaff07a1 100644
> --- a/arch/arm64/include/asm/mte.h
> +++ b/arch/arm64/include/asm/mte.h
> @@ -2,12 +2,21 @@
>  #ifndef __ASM_MTE_H
>  #define __ASM_MTE_H
>
> +#define MTE_ALLOC_SIZE UL(16)
> +#define MTE_ALLOC_MASK (~(MTE_ALLOC_SIZE - 1))
> +#define MTE_TAG_SHIFT  (56)
> +#define MTE_TAG_SIZE   (4)
> +
>  #ifndef __ASSEMBLY__
>
>  #include <linux/sched.h>
>
>  /* Memory Tagging API */
>  int mte_memcmp_pages(const void *page1_addr, const void *page2_addr);
> +unsigned long mte_copy_tags_from_user(void *to, const void __user *from,
> +                                     unsigned long n);
> +unsigned long mte_copy_tags_to_user(void __user *to, void *from,
> +                                   unsigned long n);
>
>  #ifdef CONFIG_ARM64_MTE
>  void flush_mte_state(void);
> @@ -15,6 +24,8 @@ void mte_thread_switch(struct task_struct *next);
>  void mte_suspend_exit(void);
>  long set_mte_ctrl(unsigned long arg);
>  long get_mte_ctrl(void);
> +int mte_ptrace_copy_tags(struct task_struct *child, long request,
> +                        unsigned long addr, unsigned long data);
>  #else
>  static inline void flush_mte_state(void)
>  {
> @@ -33,6 +44,12 @@ static inline long get_mte_ctrl(void)
>  {
>         return 0;
>  }
> +static inline int mte_ptrace_copy_tags(struct task_struct *child,
> +                                      long request, unsigned long addr,
> +                                      unsigned long data)
> +{
> +       return -EIO;
> +}
>  #endif
>
>  #endif /* __ASSEMBLY__ */
> diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
> index 1daf6dda8af0..cd2a4a164de3 100644
> --- a/arch/arm64/include/uapi/asm/ptrace.h
> +++ b/arch/arm64/include/uapi/asm/ptrace.h
> @@ -67,6 +67,9 @@
>  /* syscall emulation path in ptrace */
>  #define PTRACE_SYSEMU            31
>  #define PTRACE_SYSEMU_SINGLESTEP  32
> +/* MTE allocation tag access */
> +#define PTRACE_PEEKMTETAGS       33
> +#define PTRACE_POKEMTETAGS       34
>
>  #ifndef __ASSEMBLY__
>
> diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
> index fa4a4196b248..0cb496ed9bf9 100644
> --- a/arch/arm64/kernel/mte.c
> +++ b/arch/arm64/kernel/mte.c
> @@ -3,12 +3,17 @@
>   * Copyright (C) 2020 ARM Ltd.
>   */
>
> +#include <linux/kernel.h>
> +#include <linux/mm.h>
>  #include <linux/prctl.h>
>  #include <linux/sched.h>
> +#include <linux/sched/mm.h>
>  #include <linux/thread_info.h>
> +#include <linux/uio.h>
>
>  #include <asm/cpufeature.h>
>  #include <asm/mte.h>
> +#include <asm/ptrace.h>
>  #include <asm/sysreg.h>
>
>  static void update_sctlr_el1_tcf0(u64 tcf0)
> @@ -133,3 +138,125 @@ long get_mte_ctrl(void)
>
>         return ret;
>  }
> +
> +/*
> + * Access MTE tags in another process' address space as given in mm. Update
> + * the number of tags copied. Return 0 if any tags copied, error otherwise.
> + * Inspired by __access_remote_vm().
> + */
> +static int __access_remote_tags(struct task_struct *tsk, struct mm_struct *mm,
> +                               unsigned long addr, struct iovec *kiov,
> +                               unsigned int gup_flags)
> +{
> +       struct vm_area_struct *vma;
> +       void __user *buf = kiov->iov_base;
> +       size_t len = kiov->iov_len;
> +       int ret;
> +       int write = gup_flags & FOLL_WRITE;
> +
> +       if (down_read_killable(&mm->mmap_sem))
> +               return -EIO;
> +
> +       if (!access_ok(buf, len))
> +               return -EFAULT;
> +
> +       while (len) {
> +               unsigned long tags, offset;
> +               void *maddr;
> +               struct page *page = NULL;
> +
> +               ret = get_user_pages_remote(tsk, mm, addr, 1, gup_flags,
> +                                           &page, &vma, NULL);
> +               if (ret <= 0)
> +                       break;
> +
> +               /* limit access to the end of the page */
> +               offset = offset_in_page(addr);
> +               tags = min(len, (PAGE_SIZE - offset) / MTE_ALLOC_SIZE);
> +
> +               maddr = page_address(page);
> +               if (write) {
> +                       tags = mte_copy_tags_from_user(maddr + offset, buf, tags);
> +                       set_page_dirty_lock(page);
> +               } else {
> +                       tags = mte_copy_tags_to_user(buf, maddr + offset, tags);
> +               }
> +               put_page(page);
> +
> +               /* error accessing the tracer's buffer */
> +               if (!tags)
> +                       break;
> +
> +               len -= tags;
> +               buf += tags;
> +               addr += tags * MTE_ALLOC_SIZE;
> +       }
> +       up_read(&mm->mmap_sem);
> +
> +       /* return an error if no tags copied */
> +       kiov->iov_len = buf - kiov->iov_base;
> +       if (!kiov->iov_len) {
> +               /* check for error accessing the tracee's address space */
> +               if (ret <= 0)
> +                       return -EIO;
> +               else
> +                       return -EFAULT;
> +       }
> +
> +       return 0;
> +}
> +
> +/*
> + * Copy MTE tags in another process' address space at 'addr' to/from tracer's
> + * iovec buffer. Return 0 on success. Inspired by ptrace_access_vm().
> + */
> +static int access_remote_tags(struct task_struct *tsk, unsigned long addr,
> +                             struct iovec *kiov, unsigned int gup_flags)
> +{
> +       struct mm_struct *mm;
> +       int ret;
> +
> +       mm = get_task_mm(tsk);
> +       if (!mm)
> +               return -EPERM;
> +
> +       if (!tsk->ptrace || (current != tsk->parent) ||
> +           ((get_dumpable(mm) != SUID_DUMP_USER) &&
> +            !ptracer_capable(tsk, mm->user_ns))) {
> +               mmput(mm);
> +               return -EPERM;
> +       }
> +
> +       ret = __access_remote_tags(tsk, mm, addr, kiov, gup_flags);
> +       mmput(mm);
> +
> +       return ret;
> +}
> +
> +int mte_ptrace_copy_tags(struct task_struct *child, long request,
> +                        unsigned long addr, unsigned long data)
> +{
> +       int ret;
> +       struct iovec kiov;
> +       struct iovec __user *uiov = (void __user *)data;
> +       unsigned int gup_flags = FOLL_FORCE;
> +
> +       if (!system_supports_mte())
> +               return -EIO;
> +
> +       if (get_user(kiov.iov_base, &uiov->iov_base) ||
> +           get_user(kiov.iov_len, &uiov->iov_len))
> +               return -EFAULT;
> +
> +       if (request == PTRACE_POKEMTETAGS)
> +               gup_flags |= FOLL_WRITE;
> +
> +       /* align addr to the MTE tag granule */
> +       addr &= MTE_ALLOC_MASK;
> +
> +       ret = access_remote_tags(child, addr, &kiov, gup_flags);
> +       if (!ret)
> +               ret = __put_user(kiov.iov_len, &uiov->iov_len);
> +
> +       return ret;
> +}
> diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
> index 077e352495eb..1fdb841ad536 100644
> --- a/arch/arm64/kernel/ptrace.c
> +++ b/arch/arm64/kernel/ptrace.c
> @@ -34,6 +34,7 @@
>  #include <asm/cpufeature.h>
>  #include <asm/debug-monitors.h>
>  #include <asm/fpsimd.h>
> +#include <asm/mte.h>
>  #include <asm/pgtable.h>
>  #include <asm/pointer_auth.h>
>  #include <asm/stacktrace.h>
> @@ -1797,7 +1798,19 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
>  long arch_ptrace(struct task_struct *child, long request,
>                  unsigned long addr, unsigned long data)
>  {
> -       return ptrace_request(child, request, addr, data);
> +       int ret;
> +
> +       switch (request) {
> +       case PTRACE_PEEKMTETAGS:
> +       case PTRACE_POKEMTETAGS:
> +               ret = mte_ptrace_copy_tags(child, request, addr, data);
> +               break;
> +       default:
> +               ret = ptrace_request(child, request, addr, data);
> +               break;
> +       }
> +
> +       return ret;
>  }
>
>  enum ptrace_syscall_dir {
> diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S
> index bd51ea7e2fcb..45be04a8c73c 100644
> --- a/arch/arm64/lib/mte.S
> +++ b/arch/arm64/lib/mte.S
> @@ -5,6 +5,7 @@
>  #include <linux/linkage.h>
>
>  #include <asm/assembler.h>
> +#include <asm/mte.h>
>
>  /*
>   * Compare tags of two pages
> @@ -44,3 +45,52 @@ SYM_FUNC_START(mte_memcmp_pages)
>
>         ret
>  SYM_FUNC_END(mte_memcmp_pages)
> +
> +/*
> + * Read tags from a user buffer (one tag per byte) and set the corresponding
> + * tags at the given kernel address. Used by PTRACE_POKEMTETAGS.
> + *   x0 - kernel address (to)
> + *   x1 - user buffer (from)
> + *   x2 - number of tags/bytes (n)
> + * Returns:
> + *   x0 - number of tags read/set
> + */
> +SYM_FUNC_START(mte_copy_tags_from_user)
> +       mov     x3, x1
> +1:
> +USER(2f, ldtrb w4, [x1])
> +       lsl     x4, x4, #MTE_TAG_SHIFT
> +       stg     x4, [x0], #MTE_ALLOC_SIZE
> +       add     x1, x1, #1
> +       subs    x2, x2, #1
> +       b.ne    1b
> +
> +       // exception handling and function return
> +2:     sub     x0, x1, x3              // update the number of tags set
> +       ret
> +SYM_FUNC_END(mte_copy_tags_from_user)
> +
> +/*
> + * Get the tags from a kernel address range and write the tag values to the
> + * given user buffer (one tag per byte). Used by PTRACE_PEEKMTETAGS.
> + *   x0 - user buffer (to)
> + *   x1 - kernel address (from)
> + *   x2 - number of tags/bytes (n)
> + * Returns:
> + *   x0 - number of tags read/set
> + */
> +SYM_FUNC_START(mte_copy_tags_to_user)
> +       mov     x3, x0
> +1:
> +       ldg     x4, [x1]
> +       ubfx    x4, x4, #MTE_TAG_SHIFT, #MTE_TAG_SIZE
> +USER(2f, sttrb w4, [x0])
> +       add     x0, x0, #1
> +       add     x1, x1, #MTE_ALLOC_SIZE
> +       subs    x2, x2, #1
> +       b.ne    1b
> +
> +       // exception handling and function return
> +2:     sub     x0, x0, x3              // update the number of tags copied
> +       ret
> +SYM_FUNC_END(mte_copy_tags_from_user)

Nit: should be SYM_FUNC_END(mte_copy_tags_to_user).

Peter
Kevin Brodsky April 29, 2020, 10:27 a.m. UTC | #2
On 21/04/2020 15:25, Catalin Marinas wrote:
> Add support for bulk setting/getting of the MTE tags in a tracee's
> address space at 'addr' in the ptrace() syscall prototype. 'data' points
> to a struct iovec in the tracer's address space with iov_base
> representing the address of a tracer's buffer of length iov_len. The
> tags to be copied to/from the tracer's buffer are stored as one tag per
> byte.
>
> On successfully copying at least one tag, ptrace() returns 0 and updates
> the tracer's iov_len with the number of tags copied. In case of error,
> either -EIO or -EFAULT is returned, trying to follow the ptrace() man
> page.
>
> Note that the tag copying functions are not performance critical,
> therefore they lack optimisations found in typical memory copy routines.
>
> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: Alan Hayward <Alan.Hayward@arm.com>
> Cc: Luis Machado <luis.machado@linaro.org>
> Cc: Omair Javaid <omair.javaid@linaro.org>
> ---
>
> Notes:
>      New in v3.
>
>   arch/arm64/include/asm/mte.h         |  17 ++++
>   arch/arm64/include/uapi/asm/ptrace.h |   3 +
>   arch/arm64/kernel/mte.c              | 127 +++++++++++++++++++++++++++
>   arch/arm64/kernel/ptrace.c           |  15 +++-
>   arch/arm64/lib/mte.S                 |  50 +++++++++++
>   5 files changed, 211 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
> index 22eb3e06f311..0ca2aaff07a1 100644
> --- a/arch/arm64/include/asm/mte.h
> +++ b/arch/arm64/include/asm/mte.h
> @@ -2,12 +2,21 @@
>   #ifndef __ASM_MTE_H
>   #define __ASM_MTE_H
>   
> +#define MTE_ALLOC_SIZE	UL(16)
> +#define MTE_ALLOC_MASK	(~(MTE_ALLOC_SIZE - 1))

Nit: maybe MTE_GRANULE_* would be clearer than MTE_ALLOC_*?

> +#define MTE_TAG_SHIFT	(56)
> +#define MTE_TAG_SIZE	(4)
> +
>   #ifndef __ASSEMBLY__
>   
>   #include <linux/sched.h>
>   
>   /* Memory Tagging API */
>   int mte_memcmp_pages(const void *page1_addr, const void *page2_addr);
> +unsigned long mte_copy_tags_from_user(void *to, const void __user *from,
> +				      unsigned long n);
> +unsigned long mte_copy_tags_to_user(void __user *to, void *from,
> +				    unsigned long n);
>   
>   #ifdef CONFIG_ARM64_MTE
>   void flush_mte_state(void);
> @@ -15,6 +24,8 @@ void mte_thread_switch(struct task_struct *next);
>   void mte_suspend_exit(void);
>   long set_mte_ctrl(unsigned long arg);
>   long get_mte_ctrl(void);
> +int mte_ptrace_copy_tags(struct task_struct *child, long request,
> +			 unsigned long addr, unsigned long data);
>   #else
>   static inline void flush_mte_state(void)
>   {
> @@ -33,6 +44,12 @@ static inline long get_mte_ctrl(void)
>   {
>   	return 0;
>   }
> +static inline int mte_ptrace_copy_tags(struct task_struct *child,
> +				       long request, unsigned long addr,
> +				       unsigned long data)
> +{
> +	return -EIO;
> +}
>   #endif
>   
>   #endif /* __ASSEMBLY__ */
> diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
> index 1daf6dda8af0..cd2a4a164de3 100644
> --- a/arch/arm64/include/uapi/asm/ptrace.h
> +++ b/arch/arm64/include/uapi/asm/ptrace.h
> @@ -67,6 +67,9 @@
>   /* syscall emulation path in ptrace */
>   #define PTRACE_SYSEMU		  31
>   #define PTRACE_SYSEMU_SINGLESTEP  32
> +/* MTE allocation tag access */
> +#define PTRACE_PEEKMTETAGS	  33
> +#define PTRACE_POKEMTETAGS	  34
>   
>   #ifndef __ASSEMBLY__
>   
> diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
> index fa4a4196b248..0cb496ed9bf9 100644
> --- a/arch/arm64/kernel/mte.c
> +++ b/arch/arm64/kernel/mte.c
> @@ -3,12 +3,17 @@
>    * Copyright (C) 2020 ARM Ltd.
>    */
>   
> +#include <linux/kernel.h>
> +#include <linux/mm.h>
>   #include <linux/prctl.h>
>   #include <linux/sched.h>
> +#include <linux/sched/mm.h>
>   #include <linux/thread_info.h>
> +#include <linux/uio.h>
>   
>   #include <asm/cpufeature.h>
>   #include <asm/mte.h>
> +#include <asm/ptrace.h>
>   #include <asm/sysreg.h>
>   
>   static void update_sctlr_el1_tcf0(u64 tcf0)
> @@ -133,3 +138,125 @@ long get_mte_ctrl(void)
>   
>   	return ret;
>   }
> +
> +/*
> + * Access MTE tags in another process' address space as given in mm. Update
> + * the number of tags copied. Return 0 if any tags copied, error otherwise.
> + * Inspired by __access_remote_vm().
> + */
> +static int __access_remote_tags(struct task_struct *tsk, struct mm_struct *mm,
> +				unsigned long addr, struct iovec *kiov,
> +				unsigned int gup_flags)
> +{
> +	struct vm_area_struct *vma;
> +	void __user *buf = kiov->iov_base;
> +	size_t len = kiov->iov_len;
> +	int ret;
> +	int write = gup_flags & FOLL_WRITE;
> +
> +	if (down_read_killable(&mm->mmap_sem))
> +		return -EIO;
> +
> +	if (!access_ok(buf, len))
> +		return -EFAULT;
> +
> +	while (len) {
> +		unsigned long tags, offset;
> +		void *maddr;
> +		struct page *page = NULL;
> +
> +		ret = get_user_pages_remote(tsk, mm, addr, 1, gup_flags,
> +					    &page, &vma, NULL);
> +		if (ret <= 0)
> +			break;
> +
> +		/* limit access to the end of the page */
> +		offset = offset_in_page(addr);
> +		tags = min(len, (PAGE_SIZE - offset) / MTE_ALLOC_SIZE);
> +
> +		maddr = page_address(page);
> +		if (write) {
> +			tags = mte_copy_tags_from_user(maddr + offset, buf, tags);
> +			set_page_dirty_lock(page);
> +		} else {
> +			tags = mte_copy_tags_to_user(buf, maddr + offset, tags);
> +		}
> +		put_page(page);
> +
> +		/* error accessing the tracer's buffer */
> +		if (!tags)
> +			break;
> +
> +		len -= tags;
> +		buf += tags;
> +		addr += tags * MTE_ALLOC_SIZE;
> +	}
> +	up_read(&mm->mmap_sem);
> +
> +	/* return an error if no tags copied */
> +	kiov->iov_len = buf - kiov->iov_base;
> +	if (!kiov->iov_len) {
> +		/* check for error accessing the tracee's address space */
> +		if (ret <= 0)
> +			return -EIO;
> +		else
> +			return -EFAULT;
> +	}
> +
> +	return 0;
> +}
> +
> +/*
> + * Copy MTE tags in another process' address space at 'addr' to/from tracer's
> + * iovec buffer. Return 0 on success. Inspired by ptrace_access_vm().
> + */
> +static int access_remote_tags(struct task_struct *tsk, unsigned long addr,
> +			      struct iovec *kiov, unsigned int gup_flags)
> +{
> +	struct mm_struct *mm;
> +	int ret;
> +
> +	mm = get_task_mm(tsk);
> +	if (!mm)
> +		return -EPERM;
> +
> +	if (!tsk->ptrace || (current != tsk->parent) ||
> +	    ((get_dumpable(mm) != SUID_DUMP_USER) &&
> +	     !ptracer_capable(tsk, mm->user_ns))) {
> +		mmput(mm);
> +		return -EPERM;
> +	}
> +
> +	ret = __access_remote_tags(tsk, mm, addr, kiov, gup_flags);
> +	mmput(mm);
> +
> +	return ret;
> +}
> +
> +int mte_ptrace_copy_tags(struct task_struct *child, long request,
> +			 unsigned long addr, unsigned long data)
> +{
> +	int ret;
> +	struct iovec kiov;
> +	struct iovec __user *uiov = (void __user *)data;
> +	unsigned int gup_flags = FOLL_FORCE;
> +
> +	if (!system_supports_mte())
> +		return -EIO;
> +
> +	if (get_user(kiov.iov_base, &uiov->iov_base) ||
> +	    get_user(kiov.iov_len, &uiov->iov_len))
> +		return -EFAULT;
> +
> +	if (request == PTRACE_POKEMTETAGS)
> +		gup_flags |= FOLL_WRITE;
> +
> +	/* align addr to the MTE tag granule */
> +	addr &= MTE_ALLOC_MASK;
> +
> +	ret = access_remote_tags(child, addr, &kiov, gup_flags);
> +	if (!ret)
> +		ret = __put_user(kiov.iov_len, &uiov->iov_len);
> +
> +	return ret;
> +}
> diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
> index 077e352495eb..1fdb841ad536 100644
> --- a/arch/arm64/kernel/ptrace.c
> +++ b/arch/arm64/kernel/ptrace.c
> @@ -34,6 +34,7 @@
>   #include <asm/cpufeature.h>
>   #include <asm/debug-monitors.h>
>   #include <asm/fpsimd.h>
> +#include <asm/mte.h>
>   #include <asm/pgtable.h>
>   #include <asm/pointer_auth.h>
>   #include <asm/stacktrace.h>
> @@ -1797,7 +1798,19 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
>   long arch_ptrace(struct task_struct *child, long request,
>   		 unsigned long addr, unsigned long data)
>   {
> -	return ptrace_request(child, request, addr, data);
> +	int ret;
> +
> +	switch (request) {
> +	case PTRACE_PEEKMTETAGS:
> +	case PTRACE_POKEMTETAGS:
> +		ret = mte_ptrace_copy_tags(child, request, addr, data);
> +		break;
> +	default:
> +		ret = ptrace_request(child, request, addr, data);
> +		break;
> +	}
> +
> +	return ret;
>   }
>   
>   enum ptrace_syscall_dir {
> diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S
> index bd51ea7e2fcb..45be04a8c73c 100644
> --- a/arch/arm64/lib/mte.S
> +++ b/arch/arm64/lib/mte.S
> @@ -5,6 +5,7 @@
>   #include <linux/linkage.h>
>   
>   #include <asm/assembler.h>
> +#include <asm/mte.h>
>   
>   /*
>    * Compare tags of two pages
> @@ -44,3 +45,52 @@ SYM_FUNC_START(mte_memcmp_pages)
>   
>   	ret
>   SYM_FUNC_END(mte_memcmp_pages)
> +
> +/*
> + * Read tags from a user buffer (one tag per byte) and set the corresponding
> + * tags at the given kernel address. Used by PTRACE_POKEMTETAGS.
> + *   x0 - kernel address (to)
> + *   x1 - user buffer (from)
> + *   x2 - number of tags/bytes (n)
> + * Returns:
> + *   x0 - number of tags read/set
> + */
> +SYM_FUNC_START(mte_copy_tags_from_user)
> +	mov	x3, x1
> +1:
> +USER(2f, ldtrb	w4, [x1])

Here we are making either of the following assumptions:
1. The __user pointer (here `from`) actually points to user memory, not kernel memory 
(and we have set_fs(USER_DS) in place).
2. CONFIG_ARM64_UAO is enabled and the hardware implements UAO.

1. is currently true because these functions are only used for the new ptrace 
requests, which indeed pass pointers to user memory. However, future users of these 
functions may not know about this requirement.
2. is not necessarily true because ARM64_MTE does not depend on ARM64_UAO.

It is unlikely that future users of these functions actually need to pass __user 
pointers to kernel memory, so adding a comment spelling out the first assumption is 
probably fine.

Kevin

> +	lsl	x4, x4, #MTE_TAG_SHIFT
> +	stg	x4, [x0], #MTE_ALLOC_SIZE
> +	add	x1, x1, #1
> +	subs	x2, x2, #1
> +	b.ne	1b
> +
> +	// exception handling and function return
> +2:	sub	x0, x1, x3		// update the number of tags set
> +	ret
> +SYM_FUNC_END(mte_copy_tags_from_user)
> +
> +/*
> + * Get the tags from a kernel address range and write the tag values to the
> + * given user buffer (one tag per byte). Used by PTRACE_PEEKMTETAGS.
> + *   x0 - user buffer (to)
> + *   x1 - kernel address (from)
> + *   x2 - number of tags/bytes (n)
> + * Returns:
> + *   x0 - number of tags read/set
> + */
> +SYM_FUNC_START(mte_copy_tags_to_user)
> +	mov	x3, x0
> +1:
> +	ldg	x4, [x1]
> +	ubfx	x4, x4, #MTE_TAG_SHIFT, #MTE_TAG_SIZE
> +USER(2f, sttrb	w4, [x0])
> +	add	x0, x0, #1
> +	add	x1, x1, #MTE_ALLOC_SIZE
> +	subs	x2, x2, #1
> +	b.ne	1b
> +
> +	// exception handling and function return
> +2:	sub	x0, x0, x3		// update the number of tags copied
> +	ret
> +SYM_FUNC_END(mte_copy_tags_from_user)
Catalin Marinas April 29, 2020, 3:24 p.m. UTC | #3
On Wed, Apr 29, 2020 at 11:27:10AM +0100, Kevin Brodsky wrote:
> On 21/04/2020 15:25, Catalin Marinas wrote:
> > diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S
> > index bd51ea7e2fcb..45be04a8c73c 100644
> > --- a/arch/arm64/lib/mte.S
> > +++ b/arch/arm64/lib/mte.S
> > @@ -5,6 +5,7 @@
> >   #include <linux/linkage.h>
> >   #include <asm/assembler.h>
> > +#include <asm/mte.h>
> >   /*
> >    * Compare tags of two pages
> > @@ -44,3 +45,52 @@ SYM_FUNC_START(mte_memcmp_pages)
> >   	ret
> >   SYM_FUNC_END(mte_memcmp_pages)
> > +
> > +/*
> > + * Read tags from a user buffer (one tag per byte) and set the corresponding
> > + * tags at the given kernel address. Used by PTRACE_POKEMTETAGS.
> > + *   x0 - kernel address (to)
> > + *   x1 - user buffer (from)
> > + *   x2 - number of tags/bytes (n)
> > + * Returns:
> > + *   x0 - number of tags read/set
> > + */
> > +SYM_FUNC_START(mte_copy_tags_from_user)
> > +	mov	x3, x1
> > +1:
> > +USER(2f, ldtrb	w4, [x1])
> 
> Here we are making either of the following assumptions:
> 1. The __user pointer (here `from`) actually points to user memory, not
> kernel memory (and we have set_fs(USER_DS) in place).
> 2. CONFIG_ARM64_UAO is enabled and the hardware implements UAO.
> 
> 1. is currently true because these functions are only used for the new
> ptrace requests, which indeed pass pointers to user memory. However, future
> users of these functions may not know about this requirement.
> 2. is not necessarily true because ARM64_MTE does not depend on ARM64_UAO.
> 
> It is unlikely that future users of these functions actually need to pass
> __user pointers to kernel memory, so adding a comment spelling out the first
> assumption is probably fine.

I found it easier to add uao_user_alternative rather than writing a
comment ;).

Thanks.
Dave Martin April 29, 2020, 4:46 p.m. UTC | #4
On Tue, Apr 21, 2020 at 03:25:59PM +0100, Catalin Marinas wrote:
> Add support for bulk setting/getting of the MTE tags in a tracee's
> address space at 'addr' in the ptrace() syscall prototype. 'data' points
> to a struct iovec in the tracer's address space with iov_base
> representing the address of a tracer's buffer of length iov_len. The
> tags to be copied to/from the tracer's buffer are stored as one tag per
> byte.
> 
> On successfully copying at least one tag, ptrace() returns 0 and updates
> the tracer's iov_len with the number of tags copied. In case of error,
> either -EIO or -EFAULT is returned, trying to follow the ptrace() man
> page.
> 
> Note that the tag copying functions are not performance critical,
> therefore they lack optimisations found in typical memory copy routines.

Doesn't quite belong here, but:

Can we dump the tags and possible the faulting mode etc. when dumping
core?

That information seems potentially valuable for debugging.
Tweaking the fault mode from a debugger may also be useful (which is
quite easy to achieve if coredump support is done by wrapping the MTE
control word in a regset).

These could probably be added later, though.


> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: Alan Hayward <Alan.Hayward@arm.com>
> Cc: Luis Machado <luis.machado@linaro.org>
> Cc: Omair Javaid <omair.javaid@linaro.org>
> ---
> 
> Notes:
>     New in v3.
> 
>  arch/arm64/include/asm/mte.h         |  17 ++++
>  arch/arm64/include/uapi/asm/ptrace.h |   3 +
>  arch/arm64/kernel/mte.c              | 127 +++++++++++++++++++++++++++
>  arch/arm64/kernel/ptrace.c           |  15 +++-
>  arch/arm64/lib/mte.S                 |  50 +++++++++++
>  5 files changed, 211 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
> index 22eb3e06f311..0ca2aaff07a1 100644
> --- a/arch/arm64/include/asm/mte.h
> +++ b/arch/arm64/include/asm/mte.h
> @@ -2,12 +2,21 @@
>  #ifndef __ASM_MTE_H
>  #define __ASM_MTE_H
>  
> +#define MTE_ALLOC_SIZE	UL(16)
> +#define MTE_ALLOC_MASK	(~(MTE_ALLOC_SIZE - 1))
> +#define MTE_TAG_SHIFT	(56)
> +#define MTE_TAG_SIZE	(4)
> +

Nit: pointless () on the last two #defines.

[...]

> diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
> index fa4a4196b248..0cb496ed9bf9 100644
> --- a/arch/arm64/kernel/mte.c
> +++ b/arch/arm64/kernel/mte.c
> @@ -3,12 +3,17 @@
>   * Copyright (C) 2020 ARM Ltd.
>   */
>  
> +#include <linux/kernel.h>
> +#include <linux/mm.h>
>  #include <linux/prctl.h>
>  #include <linux/sched.h>
> +#include <linux/sched/mm.h>
>  #include <linux/thread_info.h>
> +#include <linux/uio.h>
>  
>  #include <asm/cpufeature.h>
>  #include <asm/mte.h>
> +#include <asm/ptrace.h>
>  #include <asm/sysreg.h>
>  
>  static void update_sctlr_el1_tcf0(u64 tcf0)
> @@ -133,3 +138,125 @@ long get_mte_ctrl(void)
>  
>  	return ret;
>  }
> +
> +/*
> + * Access MTE tags in another process' address space as given in mm. Update
> + * the number of tags copied. Return 0 if any tags copied, error otherwise.
> + * Inspired by __access_remote_vm().
> + */
> +static int __access_remote_tags(struct task_struct *tsk, struct mm_struct *mm,
> +				unsigned long addr, struct iovec *kiov,
> +				unsigned int gup_flags)
> +{
> +	struct vm_area_struct *vma;
> +	void __user *buf = kiov->iov_base;
> +	size_t len = kiov->iov_len;
> +	int ret;
> +	int write = gup_flags & FOLL_WRITE;
> +
> +	if (down_read_killable(&mm->mmap_sem))
> +		return -EIO;
> +
> +	if (!access_ok(buf, len))
> +		return -EFAULT;

Leaked down_read()?

> +
> +	while (len) {
> +		unsigned long tags, offset;
> +		void *maddr;
> +		struct page *page = NULL;
> +
> +		ret = get_user_pages_remote(tsk, mm, addr, 1, gup_flags,
> +					    &page, &vma, NULL);
> +		if (ret <= 0)
> +			break;
> +
> +		/* limit access to the end of the page */
> +		offset = offset_in_page(addr);
> +		tags = min(len, (PAGE_SIZE - offset) / MTE_ALLOC_SIZE);
> +
> +		maddr = page_address(page);
> +		if (write) {
> +			tags = mte_copy_tags_from_user(maddr + offset, buf, tags);
> +			set_page_dirty_lock(page);
> +		} else {
> +			tags = mte_copy_tags_to_user(buf, maddr + offset, tags);
> +		}
> +		put_page(page);
> +
> +		/* error accessing the tracer's buffer */
> +		if (!tags)
> +			break;
> +
> +		len -= tags;
> +		buf += tags;
> +		addr += tags * MTE_ALLOC_SIZE;
> +	}
> +	up_read(&mm->mmap_sem);
> +
> +	/* return an error if no tags copied */
> +	kiov->iov_len = buf - kiov->iov_base;
> +	if (!kiov->iov_len) {
> +		/* check for error accessing the tracee's address space */
> +		if (ret <= 0)
> +			return -EIO;
> +		else
> +			return -EFAULT;
> +	}
> +
> +	return 0;
> +}
> +
> +/*
> + * Copy MTE tags in another process' address space at 'addr' to/from tracer's
> + * iovec buffer. Return 0 on success. Inspired by ptrace_access_vm().
> + */
> +static int access_remote_tags(struct task_struct *tsk, unsigned long addr,
> +			      struct iovec *kiov, unsigned int gup_flags)
> +{
> +	struct mm_struct *mm;
> +	int ret;
> +
> +	mm = get_task_mm(tsk);
> +	if (!mm)
> +		return -EPERM;
> +
> +	if (!tsk->ptrace || (current != tsk->parent) ||
> +	    ((get_dumpable(mm) != SUID_DUMP_USER) &&
> +	     !ptracer_capable(tsk, mm->user_ns))) {
> +		mmput(mm);
> +		return -EPERM;
> +	}
> +
> +	ret = __access_remote_tags(tsk, mm, addr, kiov, gup_flags);
> +	mmput(mm);
> +
> +	return ret;
> +}
> +
> +int mte_ptrace_copy_tags(struct task_struct *child, long request,
> +			 unsigned long addr, unsigned long data)
> +{
> +	int ret;
> +	struct iovec kiov;
> +	struct iovec __user *uiov = (void __user *)data;
> +	unsigned int gup_flags = FOLL_FORCE;
> +
> +	if (!system_supports_mte())
> +		return -EIO;
> +
> +	if (get_user(kiov.iov_base, &uiov->iov_base) ||
> +	    get_user(kiov.iov_len, &uiov->iov_len))
> +		return -EFAULT;
> +
> +	if (request == PTRACE_POKEMTETAGS)
> +		gup_flags |= FOLL_WRITE;
> +
> +	/* align addr to the MTE tag granule */
> +	addr &= MTE_ALLOC_MASK;
> +
> +	ret = access_remote_tags(child, addr, &kiov, gup_flags);
> +	if (!ret)
> +		ret = __put_user(kiov.iov_len, &uiov->iov_len);

Should this be put_user()?  We didn't use __get_user() above, and I
don't see what guards the access.

> +
> +	return ret;
> +}
> diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
> index 077e352495eb..1fdb841ad536 100644
> --- a/arch/arm64/kernel/ptrace.c
> +++ b/arch/arm64/kernel/ptrace.c
> @@ -34,6 +34,7 @@
>  #include <asm/cpufeature.h>
>  #include <asm/debug-monitors.h>
>  #include <asm/fpsimd.h>
> +#include <asm/mte.h>
>  #include <asm/pgtable.h>
>  #include <asm/pointer_auth.h>
>  #include <asm/stacktrace.h>
> @@ -1797,7 +1798,19 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
>  long arch_ptrace(struct task_struct *child, long request,
>  		 unsigned long addr, unsigned long data)
>  {
> -	return ptrace_request(child, request, addr, data);
> +	int ret;
> +
> +	switch (request) {
> +	case PTRACE_PEEKMTETAGS:
> +	case PTRACE_POKEMTETAGS:
> +		ret = mte_ptrace_copy_tags(child, request, addr, data);
> +		break;

Nit: return mte_trace_copy_tags()?

This is a new function, so we don't need to follow the verbose style of
the core code.  Not everyone likes returning out of switches though.

> +	default:
> +		ret = ptrace_request(child, request, addr, data);
> +		break;
> +	}
> +
> +	return ret;
>  }
>  
>  enum ptrace_syscall_dir {
> diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S
> index bd51ea7e2fcb..45be04a8c73c 100644
> --- a/arch/arm64/lib/mte.S
> +++ b/arch/arm64/lib/mte.S
> @@ -5,6 +5,7 @@
>  #include <linux/linkage.h>
>  
>  #include <asm/assembler.h>
> +#include <asm/mte.h>
>  
>  /*
>   * Compare tags of two pages
> @@ -44,3 +45,52 @@ SYM_FUNC_START(mte_memcmp_pages)
>  
>  	ret
>  SYM_FUNC_END(mte_memcmp_pages)
> +
> +/*
> + * Read tags from a user buffer (one tag per byte) and set the corresponding
> + * tags at the given kernel address. Used by PTRACE_POKEMTETAGS.
> + *   x0 - kernel address (to)
> + *   x1 - user buffer (from)
> + *   x2 - number of tags/bytes (n)

Is it worth checking for x2 == 0?  Currently, x2 will underflow and
we'll try to loop 2^64 times (until a fault stops us).

I don't think callers currently pass 0 here, but it feels like an
accident waiting to happen.  Things like memcpy() usually try to close
this loophole.

Similarly for _to_user().

Cheers
---Dave

> + * Returns:
> + *   x0 - number of tags read/set
> + */
> +SYM_FUNC_START(mte_copy_tags_from_user)
> +	mov	x3, x1
> +1:
> +USER(2f, ldtrb	w4, [x1])
> +	lsl	x4, x4, #MTE_TAG_SHIFT
> +	stg	x4, [x0], #MTE_ALLOC_SIZE
> +	add	x1, x1, #1
> +	subs	x2, x2, #1
> +	b.ne	1b
> +
> +	// exception handling and function return
> +2:	sub	x0, x1, x3		// update the number of tags set
> +	ret
> +SYM_FUNC_END(mte_copy_tags_from_user)
> +
> +/*
> + * Get the tags from a kernel address range and write the tag values to the
> + * given user buffer (one tag per byte). Used by PTRACE_PEEKMTETAGS.
> + *   x0 - user buffer (to)
> + *   x1 - kernel address (from)
> + *   x2 - number of tags/bytes (n)
> + * Returns:
> + *   x0 - number of tags read/set
> + */
> +SYM_FUNC_START(mte_copy_tags_to_user)
> +	mov	x3, x0
> +1:
> +	ldg	x4, [x1]
> +	ubfx	x4, x4, #MTE_TAG_SHIFT, #MTE_TAG_SIZE
> +USER(2f, sttrb	w4, [x0])
> +	add	x0, x0, #1
> +	add	x1, x1, #MTE_ALLOC_SIZE
> +	subs	x2, x2, #1
> +	b.ne	1b
> +
> +	// exception handling and function return
> +2:	sub	x0, x0, x3		// update the number of tags copied
> +	ret
> +SYM_FUNC_END(mte_copy_tags_from_user)
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Catalin Marinas April 30, 2020, 10:21 a.m. UTC | #5
On Wed, Apr 29, 2020 at 05:46:07PM +0100, Dave P Martin wrote:
> On Tue, Apr 21, 2020 at 03:25:59PM +0100, Catalin Marinas wrote:
> > Add support for bulk setting/getting of the MTE tags in a tracee's
> > address space at 'addr' in the ptrace() syscall prototype. 'data' points
> > to a struct iovec in the tracer's address space with iov_base
> > representing the address of a tracer's buffer of length iov_len. The
> > tags to be copied to/from the tracer's buffer are stored as one tag per
> > byte.
> > 
> > On successfully copying at least one tag, ptrace() returns 0 and updates
> > the tracer's iov_len with the number of tags copied. In case of error,
> > either -EIO or -EFAULT is returned, trying to follow the ptrace() man
> > page.
> > 
> > Note that the tag copying functions are not performance critical,
> > therefore they lack optimisations found in typical memory copy routines.
> 
> Doesn't quite belong here, but:
> 
> Can we dump the tags and possible the faulting mode etc. when dumping
> core?

Yes, a regset containing GCR_EL1 and SCTLR_EL1.TCF0 bits, maybe
TFSRE_EL1 could be useful. Discussing with Luis M (cc'ed, working on gdb
support), he didn't have an immediate need for this but it can be added
as a new patch.

Also coredump containing the tags may also be useful, I just have to
figure out how.

> These could probably be added later, though.

Yes, it wouldn't be a (breaking) ABI change if we do them later, just an
addition.

> > diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
> > index fa4a4196b248..0cb496ed9bf9 100644
> > --- a/arch/arm64/kernel/mte.c
> > +++ b/arch/arm64/kernel/mte.c
> > @@ -133,3 +138,125 @@ long get_mte_ctrl(void)
> >  
> >  	return ret;
> >  }
> > +
> > +/*
> > + * Access MTE tags in another process' address space as given in mm. Update
> > + * the number of tags copied. Return 0 if any tags copied, error otherwise.
> > + * Inspired by __access_remote_vm().
> > + */
> > +static int __access_remote_tags(struct task_struct *tsk, struct mm_struct *mm,
> > +				unsigned long addr, struct iovec *kiov,
> > +				unsigned int gup_flags)
> > +{
> > +	struct vm_area_struct *vma;
> > +	void __user *buf = kiov->iov_base;
> > +	size_t len = kiov->iov_len;
> > +	int ret;
> > +	int write = gup_flags & FOLL_WRITE;
> > +
> > +	if (down_read_killable(&mm->mmap_sem))
> > +		return -EIO;
> > +
> > +	if (!access_ok(buf, len))
> > +		return -EFAULT;
> 
> Leaked down_read()?

Ah, wrongly placed access_ok() check.

> > +int mte_ptrace_copy_tags(struct task_struct *child, long request,
> > +			 unsigned long addr, unsigned long data)
> > +{
> > +	int ret;
> > +	struct iovec kiov;
> > +	struct iovec __user *uiov = (void __user *)data;
> > +	unsigned int gup_flags = FOLL_FORCE;
> > +
> > +	if (!system_supports_mte())
> > +		return -EIO;
> > +
> > +	if (get_user(kiov.iov_base, &uiov->iov_base) ||
> > +	    get_user(kiov.iov_len, &uiov->iov_len))
> > +		return -EFAULT;
> > +
> > +	if (request == PTRACE_POKEMTETAGS)
> > +		gup_flags |= FOLL_WRITE;
> > +
> > +	/* align addr to the MTE tag granule */
> > +	addr &= MTE_ALLOC_MASK;
> > +
> > +	ret = access_remote_tags(child, addr, &kiov, gup_flags);
> > +	if (!ret)
> > +		ret = __put_user(kiov.iov_len, &uiov->iov_len);
> 
> Should this be put_user()?  We didn't use __get_user() above, and I
> don't see what guards the access.

It doesn't make any difference on arm64 (it's just put_user) but we had
get_user() to check the access to &uiov->iov_len already above.

> > +	default:
> > +		ret = ptrace_request(child, request, addr, data);
> > +		break;
> > +	}
> > +
> > +	return ret;
> >  }
> >  
> >  enum ptrace_syscall_dir {
> > diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S
> > index bd51ea7e2fcb..45be04a8c73c 100644
> > --- a/arch/arm64/lib/mte.S
> > +++ b/arch/arm64/lib/mte.S
> > @@ -5,6 +5,7 @@
> >  #include <linux/linkage.h>
> >  
> >  #include <asm/assembler.h>
> > +#include <asm/mte.h>
> >  
> >  /*
> >   * Compare tags of two pages
> > @@ -44,3 +45,52 @@ SYM_FUNC_START(mte_memcmp_pages)
> >  
> >  	ret
> >  SYM_FUNC_END(mte_memcmp_pages)
> > +
> > +/*
> > + * Read tags from a user buffer (one tag per byte) and set the corresponding
> > + * tags at the given kernel address. Used by PTRACE_POKEMTETAGS.
> > + *   x0 - kernel address (to)
> > + *   x1 - user buffer (from)
> > + *   x2 - number of tags/bytes (n)
> 
> Is it worth checking for x2 == 0?  Currently, x2 will underflow and
> we'll try to loop 2^64 times (until a fault stops us).
> 
> I don't think callers currently pass 0 here, but it feels like an
> accident waiting to happen.  Things like memcpy() usually try to close
> this loophole.

Good point.

Thanks.
Dave Martin May 4, 2020, 4:40 p.m. UTC | #6
On Thu, Apr 30, 2020 at 11:21:32AM +0100, Catalin Marinas wrote:
> On Wed, Apr 29, 2020 at 05:46:07PM +0100, Dave P Martin wrote:
> > On Tue, Apr 21, 2020 at 03:25:59PM +0100, Catalin Marinas wrote:
> > > Add support for bulk setting/getting of the MTE tags in a tracee's
> > > address space at 'addr' in the ptrace() syscall prototype. 'data' points
> > > to a struct iovec in the tracer's address space with iov_base
> > > representing the address of a tracer's buffer of length iov_len. The
> > > tags to be copied to/from the tracer's buffer are stored as one tag per
> > > byte.
> > > 
> > > On successfully copying at least one tag, ptrace() returns 0 and updates
> > > the tracer's iov_len with the number of tags copied. In case of error,
> > > either -EIO or -EFAULT is returned, trying to follow the ptrace() man
> > > page.
> > > 
> > > Note that the tag copying functions are not performance critical,
> > > therefore they lack optimisations found in typical memory copy routines.
> > 
> > Doesn't quite belong here, but:
> > 
> > Can we dump the tags and possible the faulting mode etc. when dumping
> > core?
> 
> Yes, a regset containing GCR_EL1 and SCTLR_EL1.TCF0 bits, maybe
> TFSRE_EL1 could be useful. Discussing with Luis M (cc'ed, working on gdb
> support), he didn't have an immediate need for this but it can be added
> as a new patch.
> 
> Also coredump containing the tags may also be useful, I just have to
> figure out how.
> 
> > These could probably be added later, though.
> 
> Yes, it wouldn't be a (breaking) ABI change if we do them later, just an
> addition.

Agreed

> > > diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
> > > index fa4a4196b248..0cb496ed9bf9 100644
> > > --- a/arch/arm64/kernel/mte.c
> > > +++ b/arch/arm64/kernel/mte.c
> > > @@ -133,3 +138,125 @@ long get_mte_ctrl(void)
> > >  
> > >  	return ret;
> > >  }
> > > +
> > > +/*
> > > + * Access MTE tags in another process' address space as given in mm. Update
> > > + * the number of tags copied. Return 0 if any tags copied, error otherwise.
> > > + * Inspired by __access_remote_vm().
> > > + */
> > > +static int __access_remote_tags(struct task_struct *tsk, struct mm_struct *mm,
> > > +				unsigned long addr, struct iovec *kiov,
> > > +				unsigned int gup_flags)
> > > +{
> > > +	struct vm_area_struct *vma;
> > > +	void __user *buf = kiov->iov_base;
> > > +	size_t len = kiov->iov_len;
> > > +	int ret;
> > > +	int write = gup_flags & FOLL_WRITE;
> > > +
> > > +	if (down_read_killable(&mm->mmap_sem))
> > > +		return -EIO;
> > > +
> > > +	if (!access_ok(buf, len))
> > > +		return -EFAULT;
> > 
> > Leaked down_read()?
> 
> Ah, wrongly placed access_ok() check.
> 
> > > +int mte_ptrace_copy_tags(struct task_struct *child, long request,
> > > +			 unsigned long addr, unsigned long data)
> > > +{
> > > +	int ret;
> > > +	struct iovec kiov;
> > > +	struct iovec __user *uiov = (void __user *)data;
> > > +	unsigned int gup_flags = FOLL_FORCE;
> > > +
> > > +	if (!system_supports_mte())
> > > +		return -EIO;
> > > +
> > > +	if (get_user(kiov.iov_base, &uiov->iov_base) ||
> > > +	    get_user(kiov.iov_len, &uiov->iov_len))
> > > +		return -EFAULT;
> > > +
> > > +	if (request == PTRACE_POKEMTETAGS)
> > > +		gup_flags |= FOLL_WRITE;
> > > +
> > > +	/* align addr to the MTE tag granule */
> > > +	addr &= MTE_ALLOC_MASK;
> > > +
> > > +	ret = access_remote_tags(child, addr, &kiov, gup_flags);
> > > +	if (!ret)
> > > +		ret = __put_user(kiov.iov_len, &uiov->iov_len);
> > 
> > Should this be put_user()?  We didn't use __get_user() above, and I
> > don't see what guards the access.
> 
> It doesn't make any difference on arm64 (it's just put_user) but we had
> get_user() to check the access to &uiov->iov_len already above.

Given that this isn't a critical path, I'd opt for now relying on side-
effects, since this could lead to mismaintenance in the future -- or
badly educate people who read the code.

That's just my preference though.

[...]

Cheers
---Dave
Luis Machado May 5, 2020, 6:03 p.m. UTC | #7
On 4/21/20 11:25 AM, Catalin Marinas wrote:
> Add support for bulk setting/getting of the MTE tags in a tracee's
> address space at 'addr' in the ptrace() syscall prototype. 'data' points
> to a struct iovec in the tracer's address space with iov_base
> representing the address of a tracer's buffer of length iov_len. The
> tags to be copied to/from the tracer's buffer are stored as one tag per
> byte.
> 
> On successfully copying at least one tag, ptrace() returns 0 and updates
> the tracer's iov_len with the number of tags copied. In case of error,
> either -EIO or -EFAULT is returned, trying to follow the ptrace() man
> page.
> 
> Note that the tag copying functions are not performance critical,
> therefore they lack optimisations found in typical memory copy routines.
> 
> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: Alan Hayward <Alan.Hayward@arm.com>
> Cc: Luis Machado <luis.machado@linaro.org>
> Cc: Omair Javaid <omair.javaid@linaro.org>
> ---
> 
> Notes:
>      New in v3.
> 
>   arch/arm64/include/asm/mte.h         |  17 ++++
>   arch/arm64/include/uapi/asm/ptrace.h |   3 +
>   arch/arm64/kernel/mte.c              | 127 +++++++++++++++++++++++++++
>   arch/arm64/kernel/ptrace.c           |  15 +++-
>   arch/arm64/lib/mte.S                 |  50 +++++++++++
>   5 files changed, 211 insertions(+), 1 deletion(-)

I'll try to exercise the new ptrace requests with QEMU and GDB.
Luis Machado May 12, 2020, 7:05 p.m. UTC | #8
Hi Catalin,

On 4/21/20 11:25 AM, Catalin Marinas wrote:
> Add support for bulk setting/getting of the MTE tags in a tracee's
> address space at 'addr' in the ptrace() syscall prototype. 'data' points
> to a struct iovec in the tracer's address space with iov_base
> representing the address of a tracer's buffer of length iov_len. The
> tags to be copied to/from the tracer's buffer are stored as one tag per
> byte.
> 
> On successfully copying at least one tag, ptrace() returns 0 and updates
> the tracer's iov_len with the number of tags copied. In case of error,
> either -EIO or -EFAULT is returned, trying to follow the ptrace() man
> page.
> 
> Note that the tag copying functions are not performance critical,
> therefore they lack optimisations found in typical memory copy routines.
> 
> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: Alan Hayward <Alan.Hayward@arm.com>
> Cc: Luis Machado <luis.machado@linaro.org>
> Cc: Omair Javaid <omair.javaid@linaro.org>
> ---
> 
> Notes:
>      New in v3.
> 
>   arch/arm64/include/asm/mte.h         |  17 ++++
>   arch/arm64/include/uapi/asm/ptrace.h |   3 +
>   arch/arm64/kernel/mte.c              | 127 +++++++++++++++++++++++++++
>   arch/arm64/kernel/ptrace.c           |  15 +++-
>   arch/arm64/lib/mte.S                 |  50 +++++++++++
>   5 files changed, 211 insertions(+), 1 deletion(-)
>
I started working on MTE support for GDB and I'm wondering if we've 
already defined a way to check for runtime MTE support (as opposed to a 
HWCAP2-based check) in a traced process.

Originally we were going to do it via empty-parameter ptrace calls, but 
you had mentioned something about a proc-based method, if I'm not mistaken.

Regards,
Luis
Catalin Marinas May 13, 2020, 10:48 a.m. UTC | #9
Hi Luis,

On Tue, May 12, 2020 at 04:05:15PM -0300, Luis Machado wrote:
> On 4/21/20 11:25 AM, Catalin Marinas wrote:
> > Add support for bulk setting/getting of the MTE tags in a tracee's
> > address space at 'addr' in the ptrace() syscall prototype. 'data' points
> > to a struct iovec in the tracer's address space with iov_base
> > representing the address of a tracer's buffer of length iov_len. The
> > tags to be copied to/from the tracer's buffer are stored as one tag per
> > byte.
> > 
> > On successfully copying at least one tag, ptrace() returns 0 and updates
> > the tracer's iov_len with the number of tags copied. In case of error,
> > either -EIO or -EFAULT is returned, trying to follow the ptrace() man
> > page.
> > 
> > Note that the tag copying functions are not performance critical,
> > therefore they lack optimisations found in typical memory copy routines.
> > 
> > Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
> > Cc: Will Deacon <will@kernel.org>
> > Cc: Alan Hayward <Alan.Hayward@arm.com>
> > Cc: Luis Machado <luis.machado@linaro.org>
> > Cc: Omair Javaid <omair.javaid@linaro.org>
> > ---
> > 
> > Notes:
> >      New in v3.
> > 
> >   arch/arm64/include/asm/mte.h         |  17 ++++
> >   arch/arm64/include/uapi/asm/ptrace.h |   3 +
> >   arch/arm64/kernel/mte.c              | 127 +++++++++++++++++++++++++++
> >   arch/arm64/kernel/ptrace.c           |  15 +++-
> >   arch/arm64/lib/mte.S                 |  50 +++++++++++
> >   5 files changed, 211 insertions(+), 1 deletion(-)
> > 
> I started working on MTE support for GDB and I'm wondering if we've already
> defined a way to check for runtime MTE support (as opposed to a HWCAP2-based
> check) in a traced process.
> 
> Originally we were going to do it via empty-parameter ptrace calls, but you
> had mentioned something about a proc-based method, if I'm not mistaken.

We could expose more information via proc_pid_arch_status() but that
would be the tagged address ABI and tag check fault mode and intended
for human consumption mostly. We don't have any ptrace interface that
exposes HWCAPs. Since the gdbserver runs on the same machine as the
debugged process, it can check the HWCAPs itself, they are the same for
all processes.

BTW, in my pre-v4 patches (hopefully I'll post v4 this week), I changed
the ptrace tag access slightly to return an error (and no tags copied)
if the page has not been mapped with PROT_MTE. The other option would
have been read-as-zero/write-ignored as per the hardware behaviour.
Either option is fine by me but I thought the write-ignored part would
be more confusing for the debugger. If you have any preference here,
please let me know.
Luis Machado May 13, 2020, 12:52 p.m. UTC | #10
On 5/13/20 7:48 AM, Catalin Marinas wrote:
> Hi Luis,
> 
> On Tue, May 12, 2020 at 04:05:15PM -0300, Luis Machado wrote:
>> On 4/21/20 11:25 AM, Catalin Marinas wrote:
>>> Add support for bulk setting/getting of the MTE tags in a tracee's
>>> address space at 'addr' in the ptrace() syscall prototype. 'data' points
>>> to a struct iovec in the tracer's address space with iov_base
>>> representing the address of a tracer's buffer of length iov_len. The
>>> tags to be copied to/from the tracer's buffer are stored as one tag per
>>> byte.
>>>
>>> On successfully copying at least one tag, ptrace() returns 0 and updates
>>> the tracer's iov_len with the number of tags copied. In case of error,
>>> either -EIO or -EFAULT is returned, trying to follow the ptrace() man
>>> page.
>>>
>>> Note that the tag copying functions are not performance critical,
>>> therefore they lack optimisations found in typical memory copy routines.
>>>
>>> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
>>> Cc: Will Deacon <will@kernel.org>
>>> Cc: Alan Hayward <Alan.Hayward@arm.com>
>>> Cc: Luis Machado <luis.machado@linaro.org>
>>> Cc: Omair Javaid <omair.javaid@linaro.org>
>>> ---
>>>
>>> Notes:
>>>       New in v3.
>>>
>>>    arch/arm64/include/asm/mte.h         |  17 ++++
>>>    arch/arm64/include/uapi/asm/ptrace.h |   3 +
>>>    arch/arm64/kernel/mte.c              | 127 +++++++++++++++++++++++++++
>>>    arch/arm64/kernel/ptrace.c           |  15 +++-
>>>    arch/arm64/lib/mte.S                 |  50 +++++++++++
>>>    5 files changed, 211 insertions(+), 1 deletion(-)
>>>
>> I started working on MTE support for GDB and I'm wondering if we've already
>> defined a way to check for runtime MTE support (as opposed to a HWCAP2-based
>> check) in a traced process.
>>
>> Originally we were going to do it via empty-parameter ptrace calls, but you
>> had mentioned something about a proc-based method, if I'm not mistaken.
> 
> We could expose more information via proc_pid_arch_status() but that
> would be the tagged address ABI and tag check fault mode and intended
> for human consumption mostly. We don't have any ptrace interface that
> exposes HWCAPs. Since the gdbserver runs on the same machine as the
> debugged process, it can check the HWCAPs itself, they are the same for
> all processes.

Sorry, I think i haven't made it clear. I already have access to HWCAP2 
both from GDB's and gdbserver's side. But HWCAP2 only indicates the 
availability of a particular feature in a CPU, it doesn't necessarily 
means the traced process is actively using MTE, right?

So GDB/gdbserver would need runtime checks to be able to tell if a 
process is using MTE, in which case the tools will pay attention to tags 
and additional MTE-related registers (sctlr and gcr) we plan to make 
available to userspace.

This would be similar to SVE, where we have a HWCAP bit indicating the 
presence of the feature, but it may not be in use at runtime for a 
particular running process.

The original proposal was to have GDB send PTRACE_PEEKMTETAGS with a 
NULL address and check the result. Then GDB would be able to decide if 
the process is using MTE or not.

> 
> BTW, in my pre-v4 patches (hopefully I'll post v4 this week), I changed
> the ptrace tag access slightly to return an error (and no tags copied)
> if the page has not been mapped with PROT_MTE. The other option would
> have been read-as-zero/write-ignored as per the hardware behaviour.
> Either option is fine by me but I thought the write-ignored part would
> be more confusing for the debugger. If you have any preference here,
> please let me know.
> 

I think erroring out is a better alternative, as long as the debugger 
can tell what the error means, like, for example, "this particular 
address doesn't make use of tags".
Catalin Marinas May 13, 2020, 2:11 p.m. UTC | #11
On Wed, May 13, 2020 at 09:52:52AM -0300, Luis Machado wrote:
> On 5/13/20 7:48 AM, Catalin Marinas wrote:
> > On Tue, May 12, 2020 at 04:05:15PM -0300, Luis Machado wrote:
> > > On 4/21/20 11:25 AM, Catalin Marinas wrote:
> > > > Add support for bulk setting/getting of the MTE tags in a tracee's
> > > > address space at 'addr' in the ptrace() syscall prototype. 'data' points
> > > > to a struct iovec in the tracer's address space with iov_base
> > > > representing the address of a tracer's buffer of length iov_len. The
> > > > tags to be copied to/from the tracer's buffer are stored as one tag per
> > > > byte.
> > > > 
> > > > On successfully copying at least one tag, ptrace() returns 0 and updates
> > > > the tracer's iov_len with the number of tags copied. In case of error,
> > > > either -EIO or -EFAULT is returned, trying to follow the ptrace() man
> > > > page.
> > > > 
> > > > Note that the tag copying functions are not performance critical,
> > > > therefore they lack optimisations found in typical memory copy routines.
> > > > 
> > > > Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
> > > > Cc: Will Deacon <will@kernel.org>
> > > > Cc: Alan Hayward <Alan.Hayward@arm.com>
> > > > Cc: Luis Machado <luis.machado@linaro.org>
> > > > Cc: Omair Javaid <omair.javaid@linaro.org>
> > > 
> > > I started working on MTE support for GDB and I'm wondering if we've already
> > > defined a way to check for runtime MTE support (as opposed to a HWCAP2-based
> > > check) in a traced process.
> > > 
> > > Originally we were going to do it via empty-parameter ptrace calls, but you
> > > had mentioned something about a proc-based method, if I'm not mistaken.
> > 
> > We could expose more information via proc_pid_arch_status() but that
> > would be the tagged address ABI and tag check fault mode and intended
> > for human consumption mostly. We don't have any ptrace interface that
> > exposes HWCAPs. Since the gdbserver runs on the same machine as the
> > debugged process, it can check the HWCAPs itself, they are the same for
> > all processes.
> 
> Sorry, I think i haven't made it clear. I already have access to HWCAP2 both
> from GDB's and gdbserver's side. But HWCAP2 only indicates the availability
> of a particular feature in a CPU, it doesn't necessarily means the traced
> process is actively using MTE, right?

Right, but "actively" is not well defined either. The only way to tell
whether a process is using MTE is to look for any PROT_MTE mappings. You
can access these via /proc/<pid>/maps. In theory, one can use MTE
without enabling the tagged address ABI or even tag checking (i.e. no
prctl() call).

> So GDB/gdbserver would need runtime checks to be able to tell if a process
> is using MTE, in which case the tools will pay attention to tags and
> additional MTE-related registers (sctlr and gcr) we plan to make available
> to userspace.

I'm happy to expose GCR_EL1.Excl and the SCTLR_EL1.TCF0 bits via ptrace
as a thread state. The tags, however, are a property of the memory range
rather than a per-thread state. That's what makes it different from
other register-based features like SVE.

> The original proposal was to have GDB send PTRACE_PEEKMTETAGS with a NULL
> address and check the result. Then GDB would be able to decide if the
> process is using MTE or not.

We don't store this information in the kernel as a bool and I don't
think it would be useful either. I think gdb, when displaying memory,
should attempt to show tags as well if the corresponding range was
mapped with PROT_MTE. Just probing whether a thread ever used MTE
doesn't help since you need to be more precise on which address supports
tags.

> > BTW, in my pre-v4 patches (hopefully I'll post v4 this week), I changed
> > the ptrace tag access slightly to return an error (and no tags copied)
> > if the page has not been mapped with PROT_MTE. The other option would
> > have been read-as-zero/write-ignored as per the hardware behaviour.
> > Either option is fine by me but I thought the write-ignored part would
> > be more confusing for the debugger. If you have any preference here,
> > please let me know.
> 
> I think erroring out is a better alternative, as long as the debugger can
> tell what the error means, like, for example, "this particular address
> doesn't make use of tags".

And you could use this for probing whether the range has tags or not.
With my current patches it returns -EFAULT but happy to change this to
-EOPNOTSUPP or -EINVAL. Note that it only returns an error if no tags
copied. If gdb asks for a range of two pages and only the first one has
PROT_MTE, it will return 0 and set the number of tags copied equivalent
to the first page. A subsequent call would return an error.

In my discussion with Dave on the documentation patch, I thought retries
wouldn't be needed but in the above case it may be useful to get an
error code. That's unless we change the interface to return an error and
also update the user iovec structure.
Luis Machado May 13, 2020, 3:09 p.m. UTC | #12
On 5/13/20 11:11 AM, Catalin Marinas wrote:
> On Wed, May 13, 2020 at 09:52:52AM -0300, Luis Machado wrote:
>> On 5/13/20 7:48 AM, Catalin Marinas wrote:
>>> On Tue, May 12, 2020 at 04:05:15PM -0300, Luis Machado wrote:
>>>> On 4/21/20 11:25 AM, Catalin Marinas wrote:
>>>>> Add support for bulk setting/getting of the MTE tags in a tracee's
>>>>> address space at 'addr' in the ptrace() syscall prototype. 'data' points
>>>>> to a struct iovec in the tracer's address space with iov_base
>>>>> representing the address of a tracer's buffer of length iov_len. The
>>>>> tags to be copied to/from the tracer's buffer are stored as one tag per
>>>>> byte.
>>>>>
>>>>> On successfully copying at least one tag, ptrace() returns 0 and updates
>>>>> the tracer's iov_len with the number of tags copied. In case of error,
>>>>> either -EIO or -EFAULT is returned, trying to follow the ptrace() man
>>>>> page.
>>>>>
>>>>> Note that the tag copying functions are not performance critical,
>>>>> therefore they lack optimisations found in typical memory copy routines.
>>>>>
>>>>> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
>>>>> Cc: Will Deacon <will@kernel.org>
>>>>> Cc: Alan Hayward <Alan.Hayward@arm.com>
>>>>> Cc: Luis Machado <luis.machado@linaro.org>
>>>>> Cc: Omair Javaid <omair.javaid@linaro.org>
>>>>
>>>> I started working on MTE support for GDB and I'm wondering if we've already
>>>> defined a way to check for runtime MTE support (as opposed to a HWCAP2-based
>>>> check) in a traced process.
>>>>
>>>> Originally we were going to do it via empty-parameter ptrace calls, but you
>>>> had mentioned something about a proc-based method, if I'm not mistaken.
>>>
>>> We could expose more information via proc_pid_arch_status() but that
>>> would be the tagged address ABI and tag check fault mode and intended
>>> for human consumption mostly. We don't have any ptrace interface that
>>> exposes HWCAPs. Since the gdbserver runs on the same machine as the
>>> debugged process, it can check the HWCAPs itself, they are the same for
>>> all processes.
>>
>> Sorry, I think i haven't made it clear. I already have access to HWCAP2 both
>> from GDB's and gdbserver's side. But HWCAP2 only indicates the availability
>> of a particular feature in a CPU, it doesn't necessarily means the traced
>> process is actively using MTE, right?
> 
> Right, but "actively" is not well defined either. The only way to tell
> whether a process is using MTE is to look for any PROT_MTE mappings. You
> can access these via /proc/<pid>/maps. In theory, one can use MTE
> without enabling the tagged address ABI or even tag checking (i.e. no
> prctl() call).
> 

I see the problem. I was hoping for a more immediate form of runtime 
check. One debuggers would validate and enable all the tag checks and 
register access at process attach/startup.

With that said, checking for PROT_MTE in /proc/<pid>/maps may still be 
useful, but a process with no immediate PROT_MTE maps doesn't mean such 
process won't attempt to use PROT_MTE later on. I'll have to factor that 
in, but I think it'll work.

I guess HWCAP2_MTE will be useful after all. We can just assume that 
whenever we have HWCAP2_MTE, we can fetch MTE registers and check for 
PROT_MTE.

>> So GDB/gdbserver would need runtime checks to be able to tell if a process
>> is using MTE, in which case the tools will pay attention to tags and
>> additional MTE-related registers (sctlr and gcr) we plan to make available
>> to userspace.
> 
> I'm happy to expose GCR_EL1.Excl and the SCTLR_EL1.TCF0 bits via ptrace
> as a thread state. The tags, however, are a property of the memory range
> rather than a per-thread state. That's what makes it different from
> other register-based features like SVE.

That's my understanding as well. I'm assuming, based on our previous 
discussion, that we'll have those couple registers under a regset (maybe 
NT_ARM_MTE).

> 
>> The original proposal was to have GDB send PTRACE_PEEKMTETAGS with a NULL
>> address and check the result. Then GDB would be able to decide if the
>> process is using MTE or not.
> 
> We don't store this information in the kernel as a bool and I don't
> think it would be useful either. I think gdb, when displaying memory,
> should attempt to show tags as well if the corresponding range was
> mapped with PROT_MTE. Just probing whether a thread ever used MTE
> doesn't help since you need to be more precise on which address supports
> tags.

Thanks for making this clear. Checking with ptrace won't work then. It 
seems like /proc/<pid>/maps is the way to go.

> 
>>> BTW, in my pre-v4 patches (hopefully I'll post v4 this week), I changed
>>> the ptrace tag access slightly to return an error (and no tags copied)
>>> if the page has not been mapped with PROT_MTE. The other option would
>>> have been read-as-zero/write-ignored as per the hardware behaviour.
>>> Either option is fine by me but I thought the write-ignored part would
>>> be more confusing for the debugger. If you have any preference here,
>>> please let me know.
>>
>> I think erroring out is a better alternative, as long as the debugger can
>> tell what the error means, like, for example, "this particular address
>> doesn't make use of tags".
> 
> And you could use this for probing whether the range has tags or not.
> With my current patches it returns -EFAULT but happy to change this to
> -EOPNOTSUPP or -EINVAL. Note that it only returns an error if no tags
> copied. If gdb asks for a range of two pages and only the first one has
> PROT_MTE, it will return 0 and set the number of tags copied equivalent
> to the first page. A subsequent call would return an error.
> 
> In my discussion with Dave on the documentation patch, I thought retries
> wouldn't be needed but in the above case it may be useful to get an
> error code. That's unless we change the interface to return an error and
> also update the user iovec structure.
> 

Let me think about this for a bit. I'm trying to factor in the 
/proc/<pid>/maps contents. If debuggers know which pages have PROT_MTE 
set, then we can teach the tools not to PEEK/POKE tags from/to those 
memory ranges, which simplifies the error handling a bit.
Luis Machado May 13, 2020, 4:45 p.m. UTC | #13
On 5/13/20 12:09 PM, Luis Machado wrote:
> On 5/13/20 11:11 AM, Catalin Marinas wrote:
>> On Wed, May 13, 2020 at 09:52:52AM -0300, Luis Machado wrote:
>>> On 5/13/20 7:48 AM, Catalin Marinas wrote:
>>>> On Tue, May 12, 2020 at 04:05:15PM -0300, Luis Machado wrote:
>>>>> On 4/21/20 11:25 AM, Catalin Marinas wrote:
>>>>>> Add support for bulk setting/getting of the MTE tags in a tracee's
>>>>>> address space at 'addr' in the ptrace() syscall prototype. 'data' 
>>>>>> points
>>>>>> to a struct iovec in the tracer's address space with iov_base
>>>>>> representing the address of a tracer's buffer of length iov_len. The
>>>>>> tags to be copied to/from the tracer's buffer are stored as one 
>>>>>> tag per
>>>>>> byte.
>>>>>>
>>>>>> On successfully copying at least one tag, ptrace() returns 0 and 
>>>>>> updates
>>>>>> the tracer's iov_len with the number of tags copied. In case of 
>>>>>> error,
>>>>>> either -EIO or -EFAULT is returned, trying to follow the ptrace() man
>>>>>> page.
>>>>>>
>>>>>> Note that the tag copying functions are not performance critical,
>>>>>> therefore they lack optimisations found in typical memory copy 
>>>>>> routines.
>>>>>>
>>>>>> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
>>>>>> Cc: Will Deacon <will@kernel.org>
>>>>>> Cc: Alan Hayward <Alan.Hayward@arm.com>
>>>>>> Cc: Luis Machado <luis.machado@linaro.org>
>>>>>> Cc: Omair Javaid <omair.javaid@linaro.org>
>>>>>
>>>>> I started working on MTE support for GDB and I'm wondering if we've 
>>>>> already
>>>>> defined a way to check for runtime MTE support (as opposed to a 
>>>>> HWCAP2-based
>>>>> check) in a traced process.
>>>>>
>>>>> Originally we were going to do it via empty-parameter ptrace calls, 
>>>>> but you
>>>>> had mentioned something about a proc-based method, if I'm not 
>>>>> mistaken.
>>>>
>>>> We could expose more information via proc_pid_arch_status() but that
>>>> would be the tagged address ABI and tag check fault mode and intended
>>>> for human consumption mostly. We don't have any ptrace interface that
>>>> exposes HWCAPs. Since the gdbserver runs on the same machine as the
>>>> debugged process, it can check the HWCAPs itself, they are the same for
>>>> all processes.
>>>
>>> Sorry, I think i haven't made it clear. I already have access to 
>>> HWCAP2 both
>>> from GDB's and gdbserver's side. But HWCAP2 only indicates the 
>>> availability
>>> of a particular feature in a CPU, it doesn't necessarily means the 
>>> traced
>>> process is actively using MTE, right?
>>
>> Right, but "actively" is not well defined either. The only way to tell
>> whether a process is using MTE is to look for any PROT_MTE mappings. You
>> can access these via /proc/<pid>/maps. In theory, one can use MTE
>> without enabling the tagged address ABI or even tag checking (i.e. no
>> prctl() call).
>>
> 
> I see the problem. I was hoping for a more immediate form of runtime 
> check. One debuggers would validate and enable all the tag checks and 
> register access at process attach/startup.
> 
> With that said, checking for PROT_MTE in /proc/<pid>/maps may still be 
> useful, but a process with no immediate PROT_MTE maps doesn't mean such 
> process won't attempt to use PROT_MTE later on. I'll have to factor that 
> in, but I think it'll work.
> 
> I guess HWCAP2_MTE will be useful after all. We can just assume that 
> whenever we have HWCAP2_MTE, we can fetch MTE registers and check for 
> PROT_MTE.
> 
>>> So GDB/gdbserver would need runtime checks to be able to tell if a 
>>> process
>>> is using MTE, in which case the tools will pay attention to tags and
>>> additional MTE-related registers (sctlr and gcr) we plan to make 
>>> available
>>> to userspace.
>>
>> I'm happy to expose GCR_EL1.Excl and the SCTLR_EL1.TCF0 bits via ptrace
>> as a thread state. The tags, however, are a property of the memory range
>> rather than a per-thread state. That's what makes it different from
>> other register-based features like SVE.
> 
> That's my understanding as well. I'm assuming, based on our previous 
> discussion, that we'll have those couple registers under a regset (maybe 
> NT_ARM_MTE).
> 
>>
>>> The original proposal was to have GDB send PTRACE_PEEKMTETAGS with a 
>>> NULL
>>> address and check the result. Then GDB would be able to decide if the
>>> process is using MTE or not.
>>
>> We don't store this information in the kernel as a bool and I don't
>> think it would be useful either. I think gdb, when displaying memory,
>> should attempt to show tags as well if the corresponding range was
>> mapped with PROT_MTE. Just probing whether a thread ever used MTE
>> doesn't help since you need to be more precise on which address supports
>> tags.
> 
> Thanks for making this clear. Checking with ptrace won't work then. It 
> seems like /proc/<pid>/maps is the way to go.
> 
>>
>>>> BTW, in my pre-v4 patches (hopefully I'll post v4 this week), I changed
>>>> the ptrace tag access slightly to return an error (and no tags copied)
>>>> if the page has not been mapped with PROT_MTE. The other option would
>>>> have been read-as-zero/write-ignored as per the hardware behaviour.
>>>> Either option is fine by me but I thought the write-ignored part would
>>>> be more confusing for the debugger. If you have any preference here,
>>>> please let me know.
>>>
>>> I think erroring out is a better alternative, as long as the debugger 
>>> can
>>> tell what the error means, like, for example, "this particular address
>>> doesn't make use of tags".
>>
>> And you could use this for probing whether the range has tags or not.
>> With my current patches it returns -EFAULT but happy to change this to
>> -EOPNOTSUPP or -EINVAL. Note that it only returns an error if no tags
>> copied. If gdb asks for a range of two pages and only the first one has
>> PROT_MTE, it will return 0 and set the number of tags copied equivalent
>> to the first page. A subsequent call would return an error.
>>
>> In my discussion with Dave on the documentation patch, I thought retries
>> wouldn't be needed but in the above case it may be useful to get an
>> error code. That's unless we change the interface to return an error and
>> also update the user iovec structure.
>>
> 
> Let me think about this for a bit. I'm trying to factor in the 
> /proc/<pid>/maps contents. If debuggers know which pages have PROT_MTE 
> set, then we can teach the tools not to PEEK/POKE tags from/to those 
> memory ranges, which simplifies the error handling a bit.

I was checking the output of /proc/<pid>/maps and it doesn't seem to 
contain flags against which i can match PROT_MTE. It seems 
/proc/<pid>/smaps is the one that contains the flags (mt) for MTE. Am i 
missing something?

Is this the only place debuggers can check for PROT_MTE? If so, that's 
unfortunate. /proc/<pid>/smaps doesn't seem to be convenient for parsing.
Catalin Marinas May 13, 2020, 5:11 p.m. UTC | #14
On Wed, May 13, 2020 at 01:45:27PM -0300, Luis Machado wrote:
> On 5/13/20 12:09 PM, Luis Machado wrote:
> > Let me think about this for a bit. I'm trying to factor in the
> > /proc/<pid>/maps contents. If debuggers know which pages have PROT_MTE
> > set, then we can teach the tools not to PEEK/POKE tags from/to those
> > memory ranges, which simplifies the error handling a bit.
> 
> I was checking the output of /proc/<pid>/maps and it doesn't seem to contain
> flags against which i can match PROT_MTE. It seems /proc/<pid>/smaps is the
> one that contains the flags (mt) for MTE. Am i missing something?

You are right, the smaps is the one with the MTE information.

> Is this the only place debuggers can check for PROT_MTE? If so, that's
> unfortunate. /proc/<pid>/smaps doesn't seem to be convenient for parsing.

We can't change 'maps' as it's a pretty standard format with rwxp
properties only.

If you don't want to check any /proc file, just attempt to read the tags
and check the ptrace return code. The downside is that you can't easily
probe if a process is using MTE or not. But is this piece of information
relevant? The gdb user should know what to look for (well, it's been a
while since I used a debugger ;)).
Dave Martin May 18, 2020, 4:47 p.m. UTC | #15
On Wed, May 13, 2020 at 01:45:27PM -0300, Luis Machado wrote:
> On 5/13/20 12:09 PM, Luis Machado wrote:
> >On 5/13/20 11:11 AM, Catalin Marinas wrote:
> >>On Wed, May 13, 2020 at 09:52:52AM -0300, Luis Machado wrote:
> >>>On 5/13/20 7:48 AM, Catalin Marinas wrote:
> >>>>On Tue, May 12, 2020 at 04:05:15PM -0300, Luis Machado wrote:
> >>>>>On 4/21/20 11:25 AM, Catalin Marinas wrote:
> >>>>>>Add support for bulk setting/getting of the MTE tags in a tracee's
> >>>>>>address space at 'addr' in the ptrace() syscall prototype.
> >>>>>>'data' points
> >>>>>>to a struct iovec in the tracer's address space with iov_base
> >>>>>>representing the address of a tracer's buffer of length iov_len. The
> >>>>>>tags to be copied to/from the tracer's buffer are stored as one
> >>>>>>tag per
> >>>>>>byte.
> >>>>>>
> >>>>>>On successfully copying at least one tag, ptrace() returns 0 and
> >>>>>>updates
> >>>>>>the tracer's iov_len with the number of tags copied. In case of
> >>>>>>error,
> >>>>>>either -EIO or -EFAULT is returned, trying to follow the ptrace() man
> >>>>>>page.
> >>>>>>
> >>>>>>Note that the tag copying functions are not performance critical,
> >>>>>>therefore they lack optimisations found in typical memory copy
> >>>>>>routines.
> >>>>>>
> >>>>>>Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
> >>>>>>Cc: Will Deacon <will@kernel.org>
> >>>>>>Cc: Alan Hayward <Alan.Hayward@arm.com>
> >>>>>>Cc: Luis Machado <luis.machado@linaro.org>
> >>>>>>Cc: Omair Javaid <omair.javaid@linaro.org>
> >>>>>
> >>>>>I started working on MTE support for GDB and I'm wondering if
> >>>>>we've already
> >>>>>defined a way to check for runtime MTE support (as opposed to a
> >>>>>HWCAP2-based
> >>>>>check) in a traced process.
> >>>>>
> >>>>>Originally we were going to do it via empty-parameter ptrace
> >>>>>calls, but you
> >>>>>had mentioned something about a proc-based method, if I'm not
> >>>>>mistaken.
> >>>>
> >>>>We could expose more information via proc_pid_arch_status() but that
> >>>>would be the tagged address ABI and tag check fault mode and intended
> >>>>for human consumption mostly. We don't have any ptrace interface that
> >>>>exposes HWCAPs. Since the gdbserver runs on the same machine as the
> >>>>debugged process, it can check the HWCAPs itself, they are the same for
> >>>>all processes.
> >>>
> >>>Sorry, I think i haven't made it clear. I already have access to
> >>>HWCAP2 both
> >>>from GDB's and gdbserver's side. But HWCAP2 only indicates the
> >>>availability
> >>>of a particular feature in a CPU, it doesn't necessarily means the
> >>>traced
> >>>process is actively using MTE, right?
> >>
> >>Right, but "actively" is not well defined either. The only way to tell
> >>whether a process is using MTE is to look for any PROT_MTE mappings. You
> >>can access these via /proc/<pid>/maps. In theory, one can use MTE
> >>without enabling the tagged address ABI or even tag checking (i.e. no
> >>prctl() call).
> >>
> >
> >I see the problem. I was hoping for a more immediate form of runtime
> >check. One debuggers would validate and enable all the tag checks and
> >register access at process attach/startup.
> >
> >With that said, checking for PROT_MTE in /proc/<pid>/maps may still be
> >useful, but a process with no immediate PROT_MTE maps doesn't mean such
> >process won't attempt to use PROT_MTE later on. I'll have to factor that
> >in, but I think it'll work.
> >
> >I guess HWCAP2_MTE will be useful after all. We can just assume that
> >whenever we have HWCAP2_MTE, we can fetch MTE registers and check for
> >PROT_MTE.
> >
> >>>So GDB/gdbserver would need runtime checks to be able to tell if a
> >>>process
> >>>is using MTE, in which case the tools will pay attention to tags and
> >>>additional MTE-related registers (sctlr and gcr) we plan to make
> >>>available
> >>>to userspace.
> >>
> >>I'm happy to expose GCR_EL1.Excl and the SCTLR_EL1.TCF0 bits via ptrace
> >>as a thread state. The tags, however, are a property of the memory range
> >>rather than a per-thread state. That's what makes it different from
> >>other register-based features like SVE.
> >
> >That's my understanding as well. I'm assuming, based on our previous
> >discussion, that we'll have those couple registers under a regset (maybe
> >NT_ARM_MTE).
> >
> >>
> >>>The original proposal was to have GDB send PTRACE_PEEKMTETAGS with a
> >>>NULL
> >>>address and check the result. Then GDB would be able to decide if the
> >>>process is using MTE or not.
> >>
> >>We don't store this information in the kernel as a bool and I don't
> >>think it would be useful either. I think gdb, when displaying memory,
> >>should attempt to show tags as well if the corresponding range was
> >>mapped with PROT_MTE. Just probing whether a thread ever used MTE
> >>doesn't help since you need to be more precise on which address supports
> >>tags.
> >
> >Thanks for making this clear. Checking with ptrace won't work then. It
> >seems like /proc/<pid>/maps is the way to go.
> >
> >>
> >>>>BTW, in my pre-v4 patches (hopefully I'll post v4 this week), I changed
> >>>>the ptrace tag access slightly to return an error (and no tags copied)
> >>>>if the page has not been mapped with PROT_MTE. The other option would
> >>>>have been read-as-zero/write-ignored as per the hardware behaviour.
> >>>>Either option is fine by me but I thought the write-ignored part would
> >>>>be more confusing for the debugger. If you have any preference here,
> >>>>please let me know.
> >>>
> >>>I think erroring out is a better alternative, as long as the debugger
> >>>can
> >>>tell what the error means, like, for example, "this particular address
> >>>doesn't make use of tags".
> >>
> >>And you could use this for probing whether the range has tags or not.
> >>With my current patches it returns -EFAULT but happy to change this to
> >>-EOPNOTSUPP or -EINVAL. Note that it only returns an error if no tags
> >>copied. If gdb asks for a range of two pages and only the first one has
> >>PROT_MTE, it will return 0 and set the number of tags copied equivalent
> >>to the first page. A subsequent call would return an error.
> >>
> >>In my discussion with Dave on the documentation patch, I thought retries
> >>wouldn't be needed but in the above case it may be useful to get an
> >>error code. That's unless we change the interface to return an error and
> >>also update the user iovec structure.
> >>
> >
> >Let me think about this for a bit. I'm trying to factor in the
> >/proc/<pid>/maps contents. If debuggers know which pages have PROT_MTE
> >set, then we can teach the tools not to PEEK/POKE tags from/to those
> >memory ranges, which simplifies the error handling a bit.
> 
> I was checking the output of /proc/<pid>/maps and it doesn't seem to contain
> flags against which i can match PROT_MTE. It seems /proc/<pid>/smaps is the
> one that contains the flags (mt) for MTE. Am i missing something?
> 
> Is this the only place debuggers can check for PROT_MTE? If so, that's
> unfortunate. /proc/<pid>/smaps doesn't seem to be convenient for parsing.

Does the /proc approach work for gdbserver?

For the SVE ptrace interface we eventually went with existence of the
NT_ARM_SVE regset as being the canonical way of detecting whether SVE is
present.

As has been discussed here, I think we probably do want to expose the
current MTE config for a thread via a new regset.  Without this, I can't
see how the debugger can know for sure what's going on.


Wrinkle: just because MTE is "off", pages might still be mapped with
PROT_MTE and have arbitrary tags set on them, and the debugger perhaps
needs a way to know that.  Currently grubbing around in /proc is the
only way to discover that.  Dunno whether it matters.

Cheers
---Dave
Luis Machado May 18, 2020, 5:12 p.m. UTC | #16
On 5/18/20 1:47 PM, Dave Martin wrote:
> On Wed, May 13, 2020 at 01:45:27PM -0300, Luis Machado wrote:
>> On 5/13/20 12:09 PM, Luis Machado wrote:
>>> On 5/13/20 11:11 AM, Catalin Marinas wrote:
>>>> On Wed, May 13, 2020 at 09:52:52AM -0300, Luis Machado wrote:
>>>>> On 5/13/20 7:48 AM, Catalin Marinas wrote:
>>>>>> On Tue, May 12, 2020 at 04:05:15PM -0300, Luis Machado wrote:
>>>>>>> On 4/21/20 11:25 AM, Catalin Marinas wrote:
>>>>>>>> Add support for bulk setting/getting of the MTE tags in a tracee's
>>>>>>>> address space at 'addr' in the ptrace() syscall prototype.
>>>>>>>> 'data' points
>>>>>>>> to a struct iovec in the tracer's address space with iov_base
>>>>>>>> representing the address of a tracer's buffer of length iov_len. The
>>>>>>>> tags to be copied to/from the tracer's buffer are stored as one
>>>>>>>> tag per
>>>>>>>> byte.
>>>>>>>>
>>>>>>>> On successfully copying at least one tag, ptrace() returns 0 and
>>>>>>>> updates
>>>>>>>> the tracer's iov_len with the number of tags copied. In case of
>>>>>>>> error,
>>>>>>>> either -EIO or -EFAULT is returned, trying to follow the ptrace() man
>>>>>>>> page.
>>>>>>>>
>>>>>>>> Note that the tag copying functions are not performance critical,
>>>>>>>> therefore they lack optimisations found in typical memory copy
>>>>>>>> routines.
>>>>>>>>
>>>>>>>> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
>>>>>>>> Cc: Will Deacon <will@kernel.org>
>>>>>>>> Cc: Alan Hayward <Alan.Hayward@arm.com>
>>>>>>>> Cc: Luis Machado <luis.machado@linaro.org>
>>>>>>>> Cc: Omair Javaid <omair.javaid@linaro.org>
>>>>>>>
>>>>>>> I started working on MTE support for GDB and I'm wondering if
>>>>>>> we've already
>>>>>>> defined a way to check for runtime MTE support (as opposed to a
>>>>>>> HWCAP2-based
>>>>>>> check) in a traced process.
>>>>>>>
>>>>>>> Originally we were going to do it via empty-parameter ptrace
>>>>>>> calls, but you
>>>>>>> had mentioned something about a proc-based method, if I'm not
>>>>>>> mistaken.
>>>>>>
>>>>>> We could expose more information via proc_pid_arch_status() but that
>>>>>> would be the tagged address ABI and tag check fault mode and intended
>>>>>> for human consumption mostly. We don't have any ptrace interface that
>>>>>> exposes HWCAPs. Since the gdbserver runs on the same machine as the
>>>>>> debugged process, it can check the HWCAPs itself, they are the same for
>>>>>> all processes.
>>>>>
>>>>> Sorry, I think i haven't made it clear. I already have access to
>>>>> HWCAP2 both
>>>> >from GDB's and gdbserver's side. But HWCAP2 only indicates the
>>>>> availability
>>>>> of a particular feature in a CPU, it doesn't necessarily means the
>>>>> traced
>>>>> process is actively using MTE, right?
>>>>
>>>> Right, but "actively" is not well defined either. The only way to tell
>>>> whether a process is using MTE is to look for any PROT_MTE mappings. You
>>>> can access these via /proc/<pid>/maps. In theory, one can use MTE
>>>> without enabling the tagged address ABI or even tag checking (i.e. no
>>>> prctl() call).
>>>>
>>>
>>> I see the problem. I was hoping for a more immediate form of runtime
>>> check. One debuggers would validate and enable all the tag checks and
>>> register access at process attach/startup.
>>>
>>> With that said, checking for PROT_MTE in /proc/<pid>/maps may still be
>>> useful, but a process with no immediate PROT_MTE maps doesn't mean such
>>> process won't attempt to use PROT_MTE later on. I'll have to factor that
>>> in, but I think it'll work.
>>>
>>> I guess HWCAP2_MTE will be useful after all. We can just assume that
>>> whenever we have HWCAP2_MTE, we can fetch MTE registers and check for
>>> PROT_MTE.
>>>
>>>>> So GDB/gdbserver would need runtime checks to be able to tell if a
>>>>> process
>>>>> is using MTE, in which case the tools will pay attention to tags and
>>>>> additional MTE-related registers (sctlr and gcr) we plan to make
>>>>> available
>>>>> to userspace.
>>>>
>>>> I'm happy to expose GCR_EL1.Excl and the SCTLR_EL1.TCF0 bits via ptrace
>>>> as a thread state. The tags, however, are a property of the memory range
>>>> rather than a per-thread state. That's what makes it different from
>>>> other register-based features like SVE.
>>>
>>> That's my understanding as well. I'm assuming, based on our previous
>>> discussion, that we'll have those couple registers under a regset (maybe
>>> NT_ARM_MTE).
>>>
>>>>
>>>>> The original proposal was to have GDB send PTRACE_PEEKMTETAGS with a
>>>>> NULL
>>>>> address and check the result. Then GDB would be able to decide if the
>>>>> process is using MTE or not.
>>>>
>>>> We don't store this information in the kernel as a bool and I don't
>>>> think it would be useful either. I think gdb, when displaying memory,
>>>> should attempt to show tags as well if the corresponding range was
>>>> mapped with PROT_MTE. Just probing whether a thread ever used MTE
>>>> doesn't help since you need to be more precise on which address supports
>>>> tags.
>>>
>>> Thanks for making this clear. Checking with ptrace won't work then. It
>>> seems like /proc/<pid>/maps is the way to go.
>>>
>>>>
>>>>>> BTW, in my pre-v4 patches (hopefully I'll post v4 this week), I changed
>>>>>> the ptrace tag access slightly to return an error (and no tags copied)
>>>>>> if the page has not been mapped with PROT_MTE. The other option would
>>>>>> have been read-as-zero/write-ignored as per the hardware behaviour.
>>>>>> Either option is fine by me but I thought the write-ignored part would
>>>>>> be more confusing for the debugger. If you have any preference here,
>>>>>> please let me know.
>>>>>
>>>>> I think erroring out is a better alternative, as long as the debugger
>>>>> can
>>>>> tell what the error means, like, for example, "this particular address
>>>>> doesn't make use of tags".
>>>>
>>>> And you could use this for probing whether the range has tags or not.
>>>> With my current patches it returns -EFAULT but happy to change this to
>>>> -EOPNOTSUPP or -EINVAL. Note that it only returns an error if no tags
>>>> copied. If gdb asks for a range of two pages and only the first one has
>>>> PROT_MTE, it will return 0 and set the number of tags copied equivalent
>>>> to the first page. A subsequent call would return an error.
>>>>
>>>> In my discussion with Dave on the documentation patch, I thought retries
>>>> wouldn't be needed but in the above case it may be useful to get an
>>>> error code. That's unless we change the interface to return an error and
>>>> also update the user iovec structure.
>>>>
>>>
>>> Let me think about this for a bit. I'm trying to factor in the
>>> /proc/<pid>/maps contents. If debuggers know which pages have PROT_MTE
>>> set, then we can teach the tools not to PEEK/POKE tags from/to those
>>> memory ranges, which simplifies the error handling a bit.
>>
>> I was checking the output of /proc/<pid>/maps and it doesn't seem to contain
>> flags against which i can match PROT_MTE. It seems /proc/<pid>/smaps is the
>> one that contains the flags (mt) for MTE. Am i missing something?
>>
>> Is this the only place debuggers can check for PROT_MTE? If so, that's
>> unfortunate. /proc/<pid>/smaps doesn't seem to be convenient for parsing.
> 
> Does the /proc approach work for gdbserver?

gdbserver also has access to /proc and reads memory from there 
(/proc/<pid>/mem.

> 
> For the SVE ptrace interface we eventually went with existence of the
> NT_ARM_SVE regset as being the canonical way of detecting whether SVE is
> present.

Do you mean "present" as in "this process is actively using SVE 
registers" or do you mean the CPU and kernel support SVE, but there's no 
guarantee SVE is being used?

 From what i remember, SVE runtime usage check is based on header data 
returned by the NT_ARM_SVE regset.

Right now i have a HWCAP2_MTE check for MTE. And for GDB, having 
HWCAP2_MTE implies having the NT_ARM_MTE regset.

> 
> As has been discussed here, I think we probably do want to expose the
> current MTE config for a thread via a new regset.  Without this, I can't
> see how the debugger can know for sure what's going on.

What kind of information would the debugger be looking for in those 
registers (sctlr and gcr)? Can MTE be switched on/off via those registers?

> 
> 
> Wrinkle: just because MTE is "off", pages might still be mapped with
> PROT_MTE and have arbitrary tags set on them, and the debugger perhaps
> needs a way to know that.  Currently grubbing around in /proc is the
> only way to discover that.  Dunno whether it matters.

That is the sort of thing that may confused the debugger.

If MTE is "off" (and thus the debugger doesn't need to validate tags), 
then the pages mapped with PROT_MTE that show up in /proc/<pid>/smaps 
should be ignored?

I'm looking for a precise way to tell if MTE is being used or not for a 
particular process/thread. This, in turn, will tell debuggers when to 
look for PROT_MTE mappings in /proc/<pid>/smaps and when to validate 
tagged addresses.

So far my assumption was that MTE will always be "on" when HWCAP2_MTE is 
present. So having HWCAP2_MTE means we have the NT_ARM_MTE regset and 
that PROT_MTE pages have to be checked.
Catalin Marinas May 19, 2020, 4:10 p.m. UTC | #17
On Mon, May 18, 2020 at 02:12:24PM -0300, Luis Machado wrote:
> On 5/18/20 1:47 PM, Dave Martin wrote:
> > Wrinkle: just because MTE is "off", pages might still be mapped with
> > PROT_MTE and have arbitrary tags set on them, and the debugger perhaps
> > needs a way to know that.  Currently grubbing around in /proc is the
> > only way to discover that.  Dunno whether it matters.
> 
> That is the sort of thing that may confused the debugger.
> 
> If MTE is "off" (and thus the debugger doesn't need to validate tags), then
> the pages mapped with PROT_MTE that show up in /proc/<pid>/smaps should be
> ignored?

There is no such thing as global MTE "off". If the HWCAP is present, a
user program can map an address with PROT_MTE and access tags. Maybe it
uses it for extra storage, you never know, doesn't have to be heap
allocation related.

> I'm looking for a precise way to tell if MTE is being used or not for a
> particular process/thread. This, in turn, will tell debuggers when to look
> for PROT_MTE mappings in /proc/<pid>/smaps and when to validate tagged
> addresses.
> 
> So far my assumption was that MTE will always be "on" when HWCAP2_MTE is
> present. So having HWCAP2_MTE means we have the NT_ARM_MTE regset and that
> PROT_MTE pages have to be checked.

Yes. I haven't figured out what to put in the regset yet, most likely
the prctl value as it has other software-only controls like the tagged
address ABI.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index 22eb3e06f311..0ca2aaff07a1 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -2,12 +2,21 @@ 
 #ifndef __ASM_MTE_H
 #define __ASM_MTE_H
 
+#define MTE_ALLOC_SIZE	UL(16)
+#define MTE_ALLOC_MASK	(~(MTE_ALLOC_SIZE - 1))
+#define MTE_TAG_SHIFT	(56)
+#define MTE_TAG_SIZE	(4)
+
 #ifndef __ASSEMBLY__
 
 #include <linux/sched.h>
 
 /* Memory Tagging API */
 int mte_memcmp_pages(const void *page1_addr, const void *page2_addr);
+unsigned long mte_copy_tags_from_user(void *to, const void __user *from,
+				      unsigned long n);
+unsigned long mte_copy_tags_to_user(void __user *to, void *from,
+				    unsigned long n);
 
 #ifdef CONFIG_ARM64_MTE
 void flush_mte_state(void);
@@ -15,6 +24,8 @@  void mte_thread_switch(struct task_struct *next);
 void mte_suspend_exit(void);
 long set_mte_ctrl(unsigned long arg);
 long get_mte_ctrl(void);
+int mte_ptrace_copy_tags(struct task_struct *child, long request,
+			 unsigned long addr, unsigned long data);
 #else
 static inline void flush_mte_state(void)
 {
@@ -33,6 +44,12 @@  static inline long get_mte_ctrl(void)
 {
 	return 0;
 }
+static inline int mte_ptrace_copy_tags(struct task_struct *child,
+				       long request, unsigned long addr,
+				       unsigned long data)
+{
+	return -EIO;
+}
 #endif
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
index 1daf6dda8af0..cd2a4a164de3 100644
--- a/arch/arm64/include/uapi/asm/ptrace.h
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -67,6 +67,9 @@ 
 /* syscall emulation path in ptrace */
 #define PTRACE_SYSEMU		  31
 #define PTRACE_SYSEMU_SINGLESTEP  32
+/* MTE allocation tag access */
+#define PTRACE_PEEKMTETAGS	  33
+#define PTRACE_POKEMTETAGS	  34
 
 #ifndef __ASSEMBLY__
 
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index fa4a4196b248..0cb496ed9bf9 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -3,12 +3,17 @@ 
  * Copyright (C) 2020 ARM Ltd.
  */
 
+#include <linux/kernel.h>
+#include <linux/mm.h>
 #include <linux/prctl.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/thread_info.h>
+#include <linux/uio.h>
 
 #include <asm/cpufeature.h>
 #include <asm/mte.h>
+#include <asm/ptrace.h>
 #include <asm/sysreg.h>
 
 static void update_sctlr_el1_tcf0(u64 tcf0)
@@ -133,3 +138,125 @@  long get_mte_ctrl(void)
 
 	return ret;
 }
+
+/*
+ * Access MTE tags in another process' address space as given in mm. Update
+ * the number of tags copied. Return 0 if any tags copied, error otherwise.
+ * Inspired by __access_remote_vm().
+ */
+static int __access_remote_tags(struct task_struct *tsk, struct mm_struct *mm,
+				unsigned long addr, struct iovec *kiov,
+				unsigned int gup_flags)
+{
+	struct vm_area_struct *vma;
+	void __user *buf = kiov->iov_base;
+	size_t len = kiov->iov_len;
+	int ret;
+	int write = gup_flags & FOLL_WRITE;
+
+	if (down_read_killable(&mm->mmap_sem))
+		return -EIO;
+
+	if (!access_ok(buf, len))
+		return -EFAULT;
+
+	while (len) {
+		unsigned long tags, offset;
+		void *maddr;
+		struct page *page = NULL;
+
+		ret = get_user_pages_remote(tsk, mm, addr, 1, gup_flags,
+					    &page, &vma, NULL);
+		if (ret <= 0)
+			break;
+
+		/* limit access to the end of the page */
+		offset = offset_in_page(addr);
+		tags = min(len, (PAGE_SIZE - offset) / MTE_ALLOC_SIZE);
+
+		maddr = page_address(page);
+		if (write) {
+			tags = mte_copy_tags_from_user(maddr + offset, buf, tags);
+			set_page_dirty_lock(page);
+		} else {
+			tags = mte_copy_tags_to_user(buf, maddr + offset, tags);
+		}
+		put_page(page);
+
+		/* error accessing the tracer's buffer */
+		if (!tags)
+			break;
+
+		len -= tags;
+		buf += tags;
+		addr += tags * MTE_ALLOC_SIZE;
+	}
+	up_read(&mm->mmap_sem);
+
+	/* return an error if no tags copied */
+	kiov->iov_len = buf - kiov->iov_base;
+	if (!kiov->iov_len) {
+		/* check for error accessing the tracee's address space */
+		if (ret <= 0)
+			return -EIO;
+		else
+			return -EFAULT;
+	}
+
+	return 0;
+}
+
+/*
+ * Copy MTE tags in another process' address space at 'addr' to/from tracer's
+ * iovec buffer. Return 0 on success. Inspired by ptrace_access_vm().
+ */
+static int access_remote_tags(struct task_struct *tsk, unsigned long addr,
+			      struct iovec *kiov, unsigned int gup_flags)
+{
+	struct mm_struct *mm;
+	int ret;
+
+	mm = get_task_mm(tsk);
+	if (!mm)
+		return -EPERM;
+
+	if (!tsk->ptrace || (current != tsk->parent) ||
+	    ((get_dumpable(mm) != SUID_DUMP_USER) &&
+	     !ptracer_capable(tsk, mm->user_ns))) {
+		mmput(mm);
+		return -EPERM;
+	}
+
+	ret = __access_remote_tags(tsk, mm, addr, kiov, gup_flags);
+	mmput(mm);
+
+	return ret;
+}
+
+int mte_ptrace_copy_tags(struct task_struct *child, long request,
+			 unsigned long addr, unsigned long data)
+{
+	int ret;
+	struct iovec kiov;
+	struct iovec __user *uiov = (void __user *)data;
+	unsigned int gup_flags = FOLL_FORCE;
+
+	if (!system_supports_mte())
+		return -EIO;
+
+	if (get_user(kiov.iov_base, &uiov->iov_base) ||
+	    get_user(kiov.iov_len, &uiov->iov_len))
+		return -EFAULT;
+
+	if (request == PTRACE_POKEMTETAGS)
+		gup_flags |= FOLL_WRITE;
+
+	/* align addr to the MTE tag granule */
+	addr &= MTE_ALLOC_MASK;
+
+	ret = access_remote_tags(child, addr, &kiov, gup_flags);
+	if (!ret)
+		ret = __put_user(kiov.iov_len, &uiov->iov_len);
+
+	return ret;
+}
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 077e352495eb..1fdb841ad536 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -34,6 +34,7 @@ 
 #include <asm/cpufeature.h>
 #include <asm/debug-monitors.h>
 #include <asm/fpsimd.h>
+#include <asm/mte.h>
 #include <asm/pgtable.h>
 #include <asm/pointer_auth.h>
 #include <asm/stacktrace.h>
@@ -1797,7 +1798,19 @@  const struct user_regset_view *task_user_regset_view(struct task_struct *task)
 long arch_ptrace(struct task_struct *child, long request,
 		 unsigned long addr, unsigned long data)
 {
-	return ptrace_request(child, request, addr, data);
+	int ret;
+
+	switch (request) {
+	case PTRACE_PEEKMTETAGS:
+	case PTRACE_POKEMTETAGS:
+		ret = mte_ptrace_copy_tags(child, request, addr, data);
+		break;
+	default:
+		ret = ptrace_request(child, request, addr, data);
+		break;
+	}
+
+	return ret;
 }
 
 enum ptrace_syscall_dir {
diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S
index bd51ea7e2fcb..45be04a8c73c 100644
--- a/arch/arm64/lib/mte.S
+++ b/arch/arm64/lib/mte.S
@@ -5,6 +5,7 @@ 
 #include <linux/linkage.h>
 
 #include <asm/assembler.h>
+#include <asm/mte.h>
 
 /*
  * Compare tags of two pages
@@ -44,3 +45,52 @@  SYM_FUNC_START(mte_memcmp_pages)
 
 	ret
 SYM_FUNC_END(mte_memcmp_pages)
+
+/*
+ * Read tags from a user buffer (one tag per byte) and set the corresponding
+ * tags at the given kernel address. Used by PTRACE_POKEMTETAGS.
+ *   x0 - kernel address (to)
+ *   x1 - user buffer (from)
+ *   x2 - number of tags/bytes (n)
+ * Returns:
+ *   x0 - number of tags read/set
+ */
+SYM_FUNC_START(mte_copy_tags_from_user)
+	mov	x3, x1
+1:
+USER(2f, ldtrb	w4, [x1])
+	lsl	x4, x4, #MTE_TAG_SHIFT
+	stg	x4, [x0], #MTE_ALLOC_SIZE
+	add	x1, x1, #1
+	subs	x2, x2, #1
+	b.ne	1b
+
+	// exception handling and function return
+2:	sub	x0, x1, x3		// update the number of tags set
+	ret
+SYM_FUNC_END(mte_copy_tags_from_user)
+
+/*
+ * Get the tags from a kernel address range and write the tag values to the
+ * given user buffer (one tag per byte). Used by PTRACE_PEEKMTETAGS.
+ *   x0 - user buffer (to)
+ *   x1 - kernel address (from)
+ *   x2 - number of tags/bytes (n)
+ * Returns:
+ *   x0 - number of tags read/set
+ */
+SYM_FUNC_START(mte_copy_tags_to_user)
+	mov	x3, x0
+1:
+	ldg	x4, [x1]
+	ubfx	x4, x4, #MTE_TAG_SHIFT, #MTE_TAG_SIZE
+USER(2f, sttrb	w4, [x0])
+	add	x0, x0, #1
+	add	x1, x1, #MTE_ALLOC_SIZE
+	subs	x2, x2, #1
+	b.ne	1b
+
+	// exception handling and function return
+2:	sub	x0, x0, x3		// update the number of tags copied
+	ret
+SYM_FUNC_END(mte_copy_tags_from_user)