diff mbox series

[mm,v3,25/38] kasan, vmalloc, arm64: mark vmalloc mappings as pgprot_tagged

Message ID d91e501aef74c5bb924cae90b469ff0dc1d56488.1639432170.git.andreyknvl@google.com (mailing list archive)
State New
Headers show
Series kasan, vmalloc, arm64: add vmalloc tagging support for SW/HW_TAGS | expand

Commit Message

andrey.konovalov@linux.dev Dec. 13, 2021, 9:54 p.m. UTC
From: Andrey Konovalov <andreyknvl@google.com>

HW_TAGS KASAN relies on ARM Memory Tagging Extension (MTE). With MTE,
a memory region must be mapped as MT_NORMAL_TAGGED to allow setting
memory tags via MTE-specific instructions.

Add proper protection bits to vmalloc() allocations. These allocations
are always backed by page_alloc pages, so the tags will actually be
getting set on the corresponding physical memory.

Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
Co-developed-by: Vincenzo Frascino <vincenzo.frascino@arm.com>
Signed-off-by: Vincenzo Frascino <vincenzo.frascino@arm.com>

---

Changes v2->v3:
- Update patch description.
---
 arch/arm64/include/asm/vmalloc.h | 10 ++++++++++
 include/linux/vmalloc.h          |  7 +++++++
 mm/vmalloc.c                     |  2 ++
 3 files changed, 19 insertions(+)

Comments

Catalin Marinas Dec. 14, 2021, 5:11 p.m. UTC | #1
On Mon, Dec 13, 2021 at 10:54:21PM +0100, andrey.konovalov@linux.dev wrote:
> diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h
> index b9185503feae..3d35adf365bf 100644
> --- a/arch/arm64/include/asm/vmalloc.h
> +++ b/arch/arm64/include/asm/vmalloc.h
> @@ -25,4 +25,14 @@ static inline bool arch_vmap_pmd_supported(pgprot_t prot)
>  
>  #endif
>  
> +#define arch_vmalloc_pgprot_modify arch_vmalloc_pgprot_modify
> +static inline pgprot_t arch_vmalloc_pgprot_modify(pgprot_t prot)
> +{
> +	if (IS_ENABLED(CONFIG_KASAN_HW_TAGS) &&
> +			(pgprot_val(prot) == pgprot_val(PAGE_KERNEL)))
> +		prot = pgprot_tagged(prot);
> +
> +	return prot;
> +}
> +
>  #endif /* _ASM_ARM64_VMALLOC_H */
> diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
> index 28becb10d013..760caeedd749 100644
> --- a/include/linux/vmalloc.h
> +++ b/include/linux/vmalloc.h
> @@ -115,6 +115,13 @@ static inline int arch_vmap_pte_supported_shift(unsigned long size)
>  }
>  #endif
>  
> +#ifndef arch_vmalloc_pgprot_modify
> +static inline pgprot_t arch_vmalloc_pgprot_modify(pgprot_t prot)
> +{
> +	return prot;
> +}
> +#endif
> +
>  /*
>   *	Highlevel APIs for driver use
>   */
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index 837ed355bfc6..58bd2f7f86d7 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -3060,6 +3060,8 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
>  		return NULL;
>  	}
>  
> +	prot = arch_vmalloc_pgprot_modify(prot);
> +
>  	if (vmap_allow_huge && !(vm_flags & VM_NO_HUGE_VMAP)) {
>  		unsigned long size_per_node;

I wonder whether we could fix the prot bits in the caller instead and we
won't need to worry about the exec or the module_alloc() case. Something
like:

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index d2a00ad4e1dd..4e8c61255b92 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -3112,7 +3112,7 @@ void *__vmalloc_node(unsigned long size, unsigned long align,
 			    gfp_t gfp_mask, int node, const void *caller)
 {
 	return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
-				gfp_mask, PAGE_KERNEL, 0, node, caller);
+			gfp_mask, pgprot_hwasan(PAGE_KERNEL), 0, node, caller);
 }
 /*
  * This is only for performance analysis of vmalloc and stress purpose.
@@ -3161,7 +3161,7 @@ EXPORT_SYMBOL(vmalloc);
 void *vmalloc_no_huge(unsigned long size)
 {
 	return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
-				    GFP_KERNEL, PAGE_KERNEL, VM_NO_HUGE_VMAP,
+				    GFP_KERNEL, pgprot_hwasan(PAGE_KERNEL), VM_NO_HUGE_VMAP,
 				    NUMA_NO_NODE, __builtin_return_address(0));
 }
 EXPORT_SYMBOL(vmalloc_no_huge);

with pgprot_hwasan() defined to pgprot_tagged() only if KASAN_HW_TAGS is
enabled.
Andrey Konovalov Dec. 14, 2021, 6:27 p.m. UTC | #2
On Tue, Dec 14, 2021 at 6:11 PM Catalin Marinas <catalin.marinas@arm.com> wrote:
>
> On Mon, Dec 13, 2021 at 10:54:21PM +0100, andrey.konovalov@linux.dev wrote:
> > diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h
> > index b9185503feae..3d35adf365bf 100644
> > --- a/arch/arm64/include/asm/vmalloc.h
> > +++ b/arch/arm64/include/asm/vmalloc.h
> > @@ -25,4 +25,14 @@ static inline bool arch_vmap_pmd_supported(pgprot_t prot)
> >
> >  #endif
> >
> > +#define arch_vmalloc_pgprot_modify arch_vmalloc_pgprot_modify
> > +static inline pgprot_t arch_vmalloc_pgprot_modify(pgprot_t prot)
> > +{
> > +     if (IS_ENABLED(CONFIG_KASAN_HW_TAGS) &&
> > +                     (pgprot_val(prot) == pgprot_val(PAGE_KERNEL)))
> > +             prot = pgprot_tagged(prot);
> > +
> > +     return prot;
> > +}
> > +
> >  #endif /* _ASM_ARM64_VMALLOC_H */
> > diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
> > index 28becb10d013..760caeedd749 100644
> > --- a/include/linux/vmalloc.h
> > +++ b/include/linux/vmalloc.h
> > @@ -115,6 +115,13 @@ static inline int arch_vmap_pte_supported_shift(unsigned long size)
> >  }
> >  #endif
> >
> > +#ifndef arch_vmalloc_pgprot_modify
> > +static inline pgprot_t arch_vmalloc_pgprot_modify(pgprot_t prot)
> > +{
> > +     return prot;
> > +}
> > +#endif
> > +
> >  /*
> >   *   Highlevel APIs for driver use
> >   */
> > diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> > index 837ed355bfc6..58bd2f7f86d7 100644
> > --- a/mm/vmalloc.c
> > +++ b/mm/vmalloc.c
> > @@ -3060,6 +3060,8 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
> >               return NULL;
> >       }
> >
> > +     prot = arch_vmalloc_pgprot_modify(prot);
> > +
> >       if (vmap_allow_huge && !(vm_flags & VM_NO_HUGE_VMAP)) {
> >               unsigned long size_per_node;
>
> I wonder whether we could fix the prot bits in the caller instead and we
> won't need to worry about the exec or the module_alloc() case. Something
> like:
>
> diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> index d2a00ad4e1dd..4e8c61255b92 100644
> --- a/mm/vmalloc.c
> +++ b/mm/vmalloc.c
> @@ -3112,7 +3112,7 @@ void *__vmalloc_node(unsigned long size, unsigned long align,
>                             gfp_t gfp_mask, int node, const void *caller)
>  {
>         return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
> -                               gfp_mask, PAGE_KERNEL, 0, node, caller);
> +                       gfp_mask, pgprot_hwasan(PAGE_KERNEL), 0, node, caller);
>  }
>  /*
>   * This is only for performance analysis of vmalloc and stress purpose.
> @@ -3161,7 +3161,7 @@ EXPORT_SYMBOL(vmalloc);
>  void *vmalloc_no_huge(unsigned long size)
>  {
>         return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
> -                                   GFP_KERNEL, PAGE_KERNEL, VM_NO_HUGE_VMAP,
> +                                   GFP_KERNEL, pgprot_hwasan(PAGE_KERNEL), VM_NO_HUGE_VMAP,
>                                     NUMA_NO_NODE, __builtin_return_address(0));
>  }
>  EXPORT_SYMBOL(vmalloc_no_huge);
>
> with pgprot_hwasan() defined to pgprot_tagged() only if KASAN_HW_TAGS is
> enabled.

And also change kasan_unpoison_vmalloc() to tag only if
pgprot_tagged() has been applied, I assume.

Hm. Then __vmalloc_node_range() callers will never get tagged memory
unless requested. I suppose that's OK, most of them untag the pointer
anyway.

But this won't work for SW_TAGS mode, which is also affected by the
exec issue and needs those kasan_reset_tag()s in module_alloc()/BPF.
We could invent some virtual protection bit for it and reuse
pgprot_hwasan(). Not sure if this would be acceptable.
Catalin Marinas Dec. 14, 2021, 7:27 p.m. UTC | #3
On Tue, Dec 14, 2021 at 07:27:09PM +0100, Andrey Konovalov wrote:
> On Tue, Dec 14, 2021 at 6:11 PM Catalin Marinas <catalin.marinas@arm.com> wrote:
> > On Mon, Dec 13, 2021 at 10:54:21PM +0100, andrey.konovalov@linux.dev wrote:
> > > diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h
> > > index b9185503feae..3d35adf365bf 100644
> > > --- a/arch/arm64/include/asm/vmalloc.h
> > > +++ b/arch/arm64/include/asm/vmalloc.h
> > > @@ -25,4 +25,14 @@ static inline bool arch_vmap_pmd_supported(pgprot_t prot)
> > >
> > >  #endif
> > >
> > > +#define arch_vmalloc_pgprot_modify arch_vmalloc_pgprot_modify
> > > +static inline pgprot_t arch_vmalloc_pgprot_modify(pgprot_t prot)
> > > +{
> > > +     if (IS_ENABLED(CONFIG_KASAN_HW_TAGS) &&
> > > +                     (pgprot_val(prot) == pgprot_val(PAGE_KERNEL)))
> > > +             prot = pgprot_tagged(prot);
> > > +
> > > +     return prot;
> > > +}
> > > +
> > >  #endif /* _ASM_ARM64_VMALLOC_H */
> > > diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
> > > index 28becb10d013..760caeedd749 100644
> > > --- a/include/linux/vmalloc.h
> > > +++ b/include/linux/vmalloc.h
> > > @@ -115,6 +115,13 @@ static inline int arch_vmap_pte_supported_shift(unsigned long size)
> > >  }
> > >  #endif
> > >
> > > +#ifndef arch_vmalloc_pgprot_modify
> > > +static inline pgprot_t arch_vmalloc_pgprot_modify(pgprot_t prot)
> > > +{
> > > +     return prot;
> > > +}
> > > +#endif
> > > +
> > >  /*
> > >   *   Highlevel APIs for driver use
> > >   */
> > > diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> > > index 837ed355bfc6..58bd2f7f86d7 100644
> > > --- a/mm/vmalloc.c
> > > +++ b/mm/vmalloc.c
> > > @@ -3060,6 +3060,8 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
> > >               return NULL;
> > >       }
> > >
> > > +     prot = arch_vmalloc_pgprot_modify(prot);
> > > +
> > >       if (vmap_allow_huge && !(vm_flags & VM_NO_HUGE_VMAP)) {
> > >               unsigned long size_per_node;
> >
> > I wonder whether we could fix the prot bits in the caller instead and we
> > won't need to worry about the exec or the module_alloc() case. Something
> > like:
> >
> > diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> > index d2a00ad4e1dd..4e8c61255b92 100644
> > --- a/mm/vmalloc.c
> > +++ b/mm/vmalloc.c
> > @@ -3112,7 +3112,7 @@ void *__vmalloc_node(unsigned long size, unsigned long align,
> >                             gfp_t gfp_mask, int node, const void *caller)
> >  {
> >         return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
> > -                               gfp_mask, PAGE_KERNEL, 0, node, caller);
> > +                       gfp_mask, pgprot_hwasan(PAGE_KERNEL), 0, node, caller);
> >  }
> >  /*
> >   * This is only for performance analysis of vmalloc and stress purpose.
> > @@ -3161,7 +3161,7 @@ EXPORT_SYMBOL(vmalloc);
> >  void *vmalloc_no_huge(unsigned long size)
> >  {
> >         return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
> > -                                   GFP_KERNEL, PAGE_KERNEL, VM_NO_HUGE_VMAP,
> > +                                   GFP_KERNEL, pgprot_hwasan(PAGE_KERNEL), VM_NO_HUGE_VMAP,
> >                                     NUMA_NO_NODE, __builtin_return_address(0));
> >  }
> >  EXPORT_SYMBOL(vmalloc_no_huge);
> >
> > with pgprot_hwasan() defined to pgprot_tagged() only if KASAN_HW_TAGS is
> > enabled.
> 
> And also change kasan_unpoison_vmalloc() to tag only if
> pgprot_tagged() has been applied, I assume.
> 
> Hm. Then __vmalloc_node_range() callers will never get tagged memory
> unless requested. I suppose that's OK, most of them untag the pointer
> anyway.
> 
> But this won't work for SW_TAGS mode, which is also affected by the
> exec issue and needs those kasan_reset_tag()s in module_alloc()/BPF.
> We could invent some virtual protection bit for it and reuse
> pgprot_hwasan(). Not sure if this would be acceptable.

Ah, a pgprot_hwasan() for the sw tags is probably not acceptable as this
requires an unnecessary pte bit. An alternative could be a GFP flag that
gets passed only from __vmalloc_node() etc.

Otherwise your original approach works as well.
Andrey Konovalov Dec. 20, 2021, 9:38 p.m. UTC | #4
On Tue, Dec 14, 2021 at 8:27 PM Catalin Marinas <catalin.marinas@arm.com> wrote:
>
> On Tue, Dec 14, 2021 at 07:27:09PM +0100, Andrey Konovalov wrote:
> > On Tue, Dec 14, 2021 at 6:11 PM Catalin Marinas <catalin.marinas@arm.com> wrote:
> > > On Mon, Dec 13, 2021 at 10:54:21PM +0100, andrey.konovalov@linux.dev wrote:
> > > > diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h
> > > > index b9185503feae..3d35adf365bf 100644
> > > > --- a/arch/arm64/include/asm/vmalloc.h
> > > > +++ b/arch/arm64/include/asm/vmalloc.h
> > > > @@ -25,4 +25,14 @@ static inline bool arch_vmap_pmd_supported(pgprot_t prot)
> > > >
> > > >  #endif
> > > >
> > > > +#define arch_vmalloc_pgprot_modify arch_vmalloc_pgprot_modify
> > > > +static inline pgprot_t arch_vmalloc_pgprot_modify(pgprot_t prot)
> > > > +{
> > > > +     if (IS_ENABLED(CONFIG_KASAN_HW_TAGS) &&
> > > > +                     (pgprot_val(prot) == pgprot_val(PAGE_KERNEL)))
> > > > +             prot = pgprot_tagged(prot);
> > > > +
> > > > +     return prot;
> > > > +}
> > > > +
> > > >  #endif /* _ASM_ARM64_VMALLOC_H */
> > > > diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
> > > > index 28becb10d013..760caeedd749 100644
> > > > --- a/include/linux/vmalloc.h
> > > > +++ b/include/linux/vmalloc.h
> > > > @@ -115,6 +115,13 @@ static inline int arch_vmap_pte_supported_shift(unsigned long size)
> > > >  }
> > > >  #endif
> > > >
> > > > +#ifndef arch_vmalloc_pgprot_modify
> > > > +static inline pgprot_t arch_vmalloc_pgprot_modify(pgprot_t prot)
> > > > +{
> > > > +     return prot;
> > > > +}
> > > > +#endif
> > > > +
> > > >  /*
> > > >   *   Highlevel APIs for driver use
> > > >   */
> > > > diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> > > > index 837ed355bfc6..58bd2f7f86d7 100644
> > > > --- a/mm/vmalloc.c
> > > > +++ b/mm/vmalloc.c
> > > > @@ -3060,6 +3060,8 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
> > > >               return NULL;
> > > >       }
> > > >
> > > > +     prot = arch_vmalloc_pgprot_modify(prot);
> > > > +
> > > >       if (vmap_allow_huge && !(vm_flags & VM_NO_HUGE_VMAP)) {
> > > >               unsigned long size_per_node;
> > >
> > > I wonder whether we could fix the prot bits in the caller instead and we
> > > won't need to worry about the exec or the module_alloc() case. Something
> > > like:
> > >
> > > diff --git a/mm/vmalloc.c b/mm/vmalloc.c
> > > index d2a00ad4e1dd..4e8c61255b92 100644
> > > --- a/mm/vmalloc.c
> > > +++ b/mm/vmalloc.c
> > > @@ -3112,7 +3112,7 @@ void *__vmalloc_node(unsigned long size, unsigned long align,
> > >                             gfp_t gfp_mask, int node, const void *caller)
> > >  {
> > >         return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
> > > -                               gfp_mask, PAGE_KERNEL, 0, node, caller);
> > > +                       gfp_mask, pgprot_hwasan(PAGE_KERNEL), 0, node, caller);
> > >  }
> > >  /*
> > >   * This is only for performance analysis of vmalloc and stress purpose.
> > > @@ -3161,7 +3161,7 @@ EXPORT_SYMBOL(vmalloc);
> > >  void *vmalloc_no_huge(unsigned long size)
> > >  {
> > >         return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
> > > -                                   GFP_KERNEL, PAGE_KERNEL, VM_NO_HUGE_VMAP,
> > > +                                   GFP_KERNEL, pgprot_hwasan(PAGE_KERNEL), VM_NO_HUGE_VMAP,
> > >                                     NUMA_NO_NODE, __builtin_return_address(0));
> > >  }
> > >  EXPORT_SYMBOL(vmalloc_no_huge);
> > >
> > > with pgprot_hwasan() defined to pgprot_tagged() only if KASAN_HW_TAGS is
> > > enabled.
> >
> > And also change kasan_unpoison_vmalloc() to tag only if
> > pgprot_tagged() has been applied, I assume.
> >
> > Hm. Then __vmalloc_node_range() callers will never get tagged memory
> > unless requested. I suppose that's OK, most of them untag the pointer
> > anyway.
> >
> > But this won't work for SW_TAGS mode, which is also affected by the
> > exec issue and needs those kasan_reset_tag()s in module_alloc()/BPF.
> > We could invent some virtual protection bit for it and reuse
> > pgprot_hwasan(). Not sure if this would be acceptable.
>
> Ah, a pgprot_hwasan() for the sw tags is probably not acceptable as this
> requires an unnecessary pte bit. An alternative could be a GFP flag that
> gets passed only from __vmalloc_node() etc.

This will still leave the BPF JIT special case though.

So I'm leaning towards keeping my approach.

Thanks!
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/vmalloc.h b/arch/arm64/include/asm/vmalloc.h
index b9185503feae..3d35adf365bf 100644
--- a/arch/arm64/include/asm/vmalloc.h
+++ b/arch/arm64/include/asm/vmalloc.h
@@ -25,4 +25,14 @@  static inline bool arch_vmap_pmd_supported(pgprot_t prot)
 
 #endif
 
+#define arch_vmalloc_pgprot_modify arch_vmalloc_pgprot_modify
+static inline pgprot_t arch_vmalloc_pgprot_modify(pgprot_t prot)
+{
+	if (IS_ENABLED(CONFIG_KASAN_HW_TAGS) &&
+			(pgprot_val(prot) == pgprot_val(PAGE_KERNEL)))
+		prot = pgprot_tagged(prot);
+
+	return prot;
+}
+
 #endif /* _ASM_ARM64_VMALLOC_H */
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 28becb10d013..760caeedd749 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -115,6 +115,13 @@  static inline int arch_vmap_pte_supported_shift(unsigned long size)
 }
 #endif
 
+#ifndef arch_vmalloc_pgprot_modify
+static inline pgprot_t arch_vmalloc_pgprot_modify(pgprot_t prot)
+{
+	return prot;
+}
+#endif
+
 /*
  *	Highlevel APIs for driver use
  */
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 837ed355bfc6..58bd2f7f86d7 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -3060,6 +3060,8 @@  void *__vmalloc_node_range(unsigned long size, unsigned long align,
 		return NULL;
 	}
 
+	prot = arch_vmalloc_pgprot_modify(prot);
+
 	if (vmap_allow_huge && !(vm_flags & VM_NO_HUGE_VMAP)) {
 		unsigned long size_per_node;