diff mbox series

[2/3] arm64: mte: handle tags zeroing at page allocation time

Message ID 20210511073108.138837-2-pcc@google.com (mailing list archive)
State New
Headers show
Series [1/3] kasan: use separate (un)poison implementation for integrated init | expand

Commit Message

Peter Collingbourne May 11, 2021, 7:31 a.m. UTC
Currently, on an anonymous page fault, the kernel allocates a zeroed
page and maps it in user space. If the mapping is tagged (PROT_MTE),
set_pte_at() additionally clears the tags. It is, however, more
efficient to clear the tags at the same time as zeroing the data on
allocation. To avoid clearing the tags on any page (which may not be
mapped as tagged), only do this if the vma flags contain VM_MTE. This
requires introducing a new GFP flag that is used to determine whether
to clear the tags.

The DC GZVA instruction with a 0 top byte (and 0 tag) requires
top-byte-ignore. Set the TCR_EL1.{TBI1,TBID1} bits irrespective of
whether KASAN_HW is enabled.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Co-developed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Link: https://linux-review.googlesource.com/id/Id46dc94e30fe11474f7e54f5d65e7658dbdddb26
---
 arch/arm64/include/asm/mte.h  |  4 ++++
 arch/arm64/include/asm/page.h | 11 +++++++++--
 arch/arm64/lib/mte.S          | 20 ++++++++++++++++++++
 arch/arm64/mm/fault.c         | 25 +++++++++++++++++++++++++
 arch/arm64/mm/proc.S          | 10 +++++++---
 include/linux/gfp.h           |  9 +++++++--
 include/linux/highmem.h       | 10 ++++++++++
 mm/kasan/hw_tags.c            |  9 ++++++++-
 mm/page_alloc.c               | 14 +++++++++++---
 9 files changed, 101 insertions(+), 11 deletions(-)

Comments

Catalin Marinas May 11, 2021, 12:53 p.m. UTC | #1
Hi Peter,

First of all, could you please add a cover letter to your series (in
general) explaining the rationale for the patches, e.g. optimise tag
initialisation for user pages? It makes it a lot easier to review if the
overall picture is presented in the cover.

On Tue, May 11, 2021 at 12:31:07AM -0700, Peter Collingbourne wrote:
> Currently, on an anonymous page fault, the kernel allocates a zeroed
> page and maps it in user space. If the mapping is tagged (PROT_MTE),
> set_pte_at() additionally clears the tags. It is, however, more
> efficient to clear the tags at the same time as zeroing the data on
> allocation. To avoid clearing the tags on any page (which may not be
> mapped as tagged), only do this if the vma flags contain VM_MTE. This
> requires introducing a new GFP flag that is used to determine whether
> to clear the tags.
> 
> The DC GZVA instruction with a 0 top byte (and 0 tag) requires
> top-byte-ignore. Set the TCR_EL1.{TBI1,TBID1} bits irrespective of
> whether KASAN_HW is enabled.
> 
> Signed-off-by: Peter Collingbourne <pcc@google.com>
> Co-developed-by: Catalin Marinas <catalin.marinas@arm.com>
> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
> Link: https://linux-review.googlesource.com/id/Id46dc94e30fe11474f7e54f5d65e7658dbdddb26

This doesn't mention that the patch adds tag clearing on free as well.
I'd actually leave this part out for a separate patch. It's not done for
tags in current mainline when kasan is disabled, AFAICT.

> diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
> index 012cffc574e8..a0bcaa5f735e 100644
> --- a/arch/arm64/include/asm/page.h
> +++ b/arch/arm64/include/asm/page.h
> @@ -13,6 +13,7 @@
>  #ifndef __ASSEMBLY__
>  
>  #include <linux/personality.h> /* for READ_IMPLIES_EXEC */
> +#include <linux/types.h> /* for gfp_t */
>  #include <asm/pgtable-types.h>
>  
>  struct page;
> @@ -28,10 +29,16 @@ void copy_user_highpage(struct page *to, struct page *from,
>  void copy_highpage(struct page *to, struct page *from);
>  #define __HAVE_ARCH_COPY_HIGHPAGE
>  
> -#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
> -	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
> +struct page *__alloc_zeroed_user_highpage(gfp_t movableflags,
> +					  struct vm_area_struct *vma,
> +					  unsigned long vaddr);
>  #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
>  
> +#define want_zero_tags_on_free() system_supports_mte()

As I said above, unless essential to this patch, please move it to a
separate one.

Also, do we need this even when the kernel doesn't have kasan_hw?

> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> index 871c82ab0a30..8127e0c0b8fb 100644
> --- a/arch/arm64/mm/fault.c
> +++ b/arch/arm64/mm/fault.c
> @@ -921,3 +921,28 @@ void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
>  	debug_exception_exit(regs);
>  }
>  NOKPROBE_SYMBOL(do_debug_exception);
> +
> +/*
> + * Used during anonymous page fault handling.
> + */
> +struct page *__alloc_zeroed_user_highpage(gfp_t flags,
> +					  struct vm_area_struct *vma,
> +					  unsigned long vaddr)
> +{
> +	/*
> +	 * If the page is mapped with PROT_MTE, initialise the tags at the
> +	 * point of allocation and page zeroing as this is usually faster than
> +	 * separate DC ZVA and STGM.
> +	 */
> +	if (vma->vm_flags & VM_MTE)
> +		flags |= __GFP_ZEROTAGS;
> +
> +	return alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | flags, vma, vaddr);
> +}
> +
> +void tag_clear_highpage(struct page *page)
> +{
> +	mte_zero_clear_page_tags(page_address(page));
> +	page_kasan_tag_reset(page);
> +	set_bit(PG_mte_tagged, &page->flags);
> +}

Do we need the page_kasan_tag_reset() here? Maybe we do. Is it because
kasan_alloc_pages() is no longer calls kasan_unpoison_pages() below?

> diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c
> index 45e552cb9172..34362c8d0955 100644
> --- a/mm/kasan/hw_tags.c
> +++ b/mm/kasan/hw_tags.c
> @@ -242,7 +242,14 @@ void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags)
>  {
>  	bool init = !want_init_on_free() && want_init_on_alloc(flags);
>  
> -	kasan_unpoison_pages(page, order, init);
> +	if (flags & __GFP_ZEROTAGS) {
> +		int i;
> +
> +		for (i = 0; i != 1 << order; ++i)
> +			tag_clear_highpage(page + i);
> +	} else {
> +		kasan_unpoison_pages(page, order, init);
> +	}
>  }
>  
>  void kasan_free_pages(struct page *page, unsigned int order)
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 6e82a7f6fd6f..7ac0f0721d22 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -1219,10 +1219,16 @@ static int free_tail_pages_check(struct page *head_page, struct page *page)
>  	return ret;
>  }
>  
> -static void kernel_init_free_pages(struct page *page, int numpages)
> +static void kernel_init_free_pages(struct page *page, int numpages, bool zero_tags)
>  {
>  	int i;
>  
> +	if (zero_tags) {
> +		for (i = 0; i < numpages; i++)
> +			tag_clear_highpage(page + i);
> +		return;
> +	}
> +
>  	/* s390's use of memset() could override KASAN redzones. */
>  	kasan_disable_current();
>  	for (i = 0; i < numpages; i++) {

This function has another loop calling clear_highpage(). Do we end up
zeroing the page twice?

> @@ -1314,7 +1320,8 @@ static __always_inline bool free_pages_prepare(struct page *page,
>  		bool init = want_init_on_free();
>  
>  		if (init)
> -			kernel_init_free_pages(page, 1 << order);
> +			kernel_init_free_pages(page, 1 << order,
> +					       want_zero_tags_on_free());
>  		if (!skip_kasan_poison)
>  			kasan_poison_pages(page, order, init);
>  	}

I think passing 'false' here to kernel_init_free_pages() matches the
current mainline. You could make this dependent on kasan_hw being
enabled rather than just system_supports_mte(). With kasan_hw disabled,
the kernel accesses are not checked anyway, so it's pointless to erase
the tags on free.
Peter Collingbourne May 11, 2021, 8:33 p.m. UTC | #2
On Tue, May 11, 2021 at 5:54 AM Catalin Marinas <catalin.marinas@arm.com> wrote:
>
> Hi Peter,
>
> First of all, could you please add a cover letter to your series (in
> general) explaining the rationale for the patches, e.g. optimise tag
> initialisation for user pages? It makes it a lot easier to review if the
> overall picture is presented in the cover.

Sure. It seems appropriate in cases where the series is doing a number
of different things like this series, but maybe not in simpler cases
(e.g. one cleanup patch followed by a main patch with a
self-explanatory commit message).

> On Tue, May 11, 2021 at 12:31:07AM -0700, Peter Collingbourne wrote:
> > Currently, on an anonymous page fault, the kernel allocates a zeroed
> > page and maps it in user space. If the mapping is tagged (PROT_MTE),
> > set_pte_at() additionally clears the tags. It is, however, more
> > efficient to clear the tags at the same time as zeroing the data on
> > allocation. To avoid clearing the tags on any page (which may not be
> > mapped as tagged), only do this if the vma flags contain VM_MTE. This
> > requires introducing a new GFP flag that is used to determine whether
> > to clear the tags.
> >
> > The DC GZVA instruction with a 0 top byte (and 0 tag) requires
> > top-byte-ignore. Set the TCR_EL1.{TBI1,TBID1} bits irrespective of
> > whether KASAN_HW is enabled.
> >
> > Signed-off-by: Peter Collingbourne <pcc@google.com>
> > Co-developed-by: Catalin Marinas <catalin.marinas@arm.com>
> > Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
> > Link: https://linux-review.googlesource.com/id/Id46dc94e30fe11474f7e54f5d65e7658dbdddb26
>
> This doesn't mention that the patch adds tag clearing on free as well.
> I'd actually leave this part out for a separate patch. It's not done for
> tags in current mainline when kasan is disabled, AFAICT.

The tag clearing on free was thought to be necessary (because clear on
free implies no clear on alloc) but, upon further reflection, it
isn't. This is because we clear the struct page flags, including
PG_mte_tagged, on free, which means that we will set the tags if we
later end up needing to reuse the page as a tagged page. This means
that clear on free is quite inefficient, but no less efficient than
before. As you mentioned, we can leave any improvements there to a
separate patch.

> > diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
> > index 012cffc574e8..a0bcaa5f735e 100644
> > --- a/arch/arm64/include/asm/page.h
> > +++ b/arch/arm64/include/asm/page.h
> > @@ -13,6 +13,7 @@
> >  #ifndef __ASSEMBLY__
> >
> >  #include <linux/personality.h> /* for READ_IMPLIES_EXEC */
> > +#include <linux/types.h> /* for gfp_t */
> >  #include <asm/pgtable-types.h>
> >
> >  struct page;
> > @@ -28,10 +29,16 @@ void copy_user_highpage(struct page *to, struct page *from,
> >  void copy_highpage(struct page *to, struct page *from);
> >  #define __HAVE_ARCH_COPY_HIGHPAGE
> >
> > -#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
> > -     alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
> > +struct page *__alloc_zeroed_user_highpage(gfp_t movableflags,
> > +                                       struct vm_area_struct *vma,
> > +                                       unsigned long vaddr);
> >  #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
> >
> > +#define want_zero_tags_on_free() system_supports_mte()
>
> As I said above, unless essential to this patch, please move it to a
> separate one.

Will do.

> Also, do we need this even when the kernel doesn't have kasan_hw?

Yes, if we preserved PG_mte_tagged across page free/alloc then this
would be needed even without KASAN.

> > diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
> > index 871c82ab0a30..8127e0c0b8fb 100644
> > --- a/arch/arm64/mm/fault.c
> > +++ b/arch/arm64/mm/fault.c
> > @@ -921,3 +921,28 @@ void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
> >       debug_exception_exit(regs);
> >  }
> >  NOKPROBE_SYMBOL(do_debug_exception);
> > +
> > +/*
> > + * Used during anonymous page fault handling.
> > + */
> > +struct page *__alloc_zeroed_user_highpage(gfp_t flags,
> > +                                       struct vm_area_struct *vma,
> > +                                       unsigned long vaddr)
> > +{
> > +     /*
> > +      * If the page is mapped with PROT_MTE, initialise the tags at the
> > +      * point of allocation and page zeroing as this is usually faster than
> > +      * separate DC ZVA and STGM.
> > +      */
> > +     if (vma->vm_flags & VM_MTE)
> > +             flags |= __GFP_ZEROTAGS;
> > +
> > +     return alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | flags, vma, vaddr);
> > +}
> > +
> > +void tag_clear_highpage(struct page *page)
> > +{
> > +     mte_zero_clear_page_tags(page_address(page));
> > +     page_kasan_tag_reset(page);
> > +     set_bit(PG_mte_tagged, &page->flags);
> > +}
>
> Do we need the page_kasan_tag_reset() here? Maybe we do. Is it because
> kasan_alloc_pages() is no longer calls kasan_unpoison_pages() below?

Yes, otherwise the page tag will be left at an arbitrary (most likely
poison) value, which would mean that any kernel-side accesses to these
pages would fail (unless userspace happened to tag its memory using
the page tag). page_kasan_tag_reset() sets the page tag to the TCMA
tag which allows those accesses to succeed. We need to call
page_kasan_tag_reset() in mte_sync_page_tags() for similar reasons.
It's unrelated to no longer calling kasan_unpoison_pages() because
even if we were still calling it the end result would still be that
userspace's tags won't necessarily match the kernel's page tag.

> > diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c
> > index 45e552cb9172..34362c8d0955 100644
> > --- a/mm/kasan/hw_tags.c
> > +++ b/mm/kasan/hw_tags.c
> > @@ -242,7 +242,14 @@ void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags)
> >  {
> >       bool init = !want_init_on_free() && want_init_on_alloc(flags);
> >
> > -     kasan_unpoison_pages(page, order, init);
> > +     if (flags & __GFP_ZEROTAGS) {
> > +             int i;
> > +
> > +             for (i = 0; i != 1 << order; ++i)
> > +                     tag_clear_highpage(page + i);
> > +     } else {
> > +             kasan_unpoison_pages(page, order, init);
> > +     }
> >  }
> >
> >  void kasan_free_pages(struct page *page, unsigned int order)
> > diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> > index 6e82a7f6fd6f..7ac0f0721d22 100644
> > --- a/mm/page_alloc.c
> > +++ b/mm/page_alloc.c
> > @@ -1219,10 +1219,16 @@ static int free_tail_pages_check(struct page *head_page, struct page *page)
> >       return ret;
> >  }
> >
> > -static void kernel_init_free_pages(struct page *page, int numpages)
> > +static void kernel_init_free_pages(struct page *page, int numpages, bool zero_tags)
> >  {
> >       int i;
> >
> > +     if (zero_tags) {
> > +             for (i = 0; i < numpages; i++)
> > +                     tag_clear_highpage(page + i);
> > +             return;
> > +     }
> > +
> >       /* s390's use of memset() could override KASAN redzones. */
> >       kasan_disable_current();
> >       for (i = 0; i < numpages; i++) {
>
> This function has another loop calling clear_highpage(). Do we end up
> zeroing the page twice?

No because we return after the loop that calls tag_clear_highpage().

> > @@ -1314,7 +1320,8 @@ static __always_inline bool free_pages_prepare(struct page *page,
> >               bool init = want_init_on_free();
> >
> >               if (init)
> > -                     kernel_init_free_pages(page, 1 << order);
> > +                     kernel_init_free_pages(page, 1 << order,
> > +                                            want_zero_tags_on_free());
> >               if (!skip_kasan_poison)
> >                       kasan_poison_pages(page, order, init);
> >       }
>
> I think passing 'false' here to kernel_init_free_pages() matches the
> current mainline. You could make this dependent on kasan_hw being
> enabled rather than just system_supports_mte(). With kasan_hw disabled,
> the kernel accesses are not checked anyway, so it's pointless to erase
> the tags on free.

I'll just pass false here as it's unneeded for KASAN.

Peter
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index bc88a1ced0d7..67bf259ae768 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -37,6 +37,7 @@  void mte_free_tag_storage(char *storage);
 /* track which pages have valid allocation tags */
 #define PG_mte_tagged	PG_arch_2
 
+void mte_zero_clear_page_tags(void *addr);
 void mte_sync_tags(pte_t *ptep, pte_t pte);
 void mte_copy_page_tags(void *kto, const void *kfrom);
 void mte_thread_init_user(void);
@@ -53,6 +54,9 @@  int mte_ptrace_copy_tags(struct task_struct *child, long request,
 /* unused if !CONFIG_ARM64_MTE, silence the compiler */
 #define PG_mte_tagged	0
 
+static inline void mte_zero_clear_page_tags(void *addr)
+{
+}
 static inline void mte_sync_tags(pte_t *ptep, pte_t pte)
 {
 }
diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h
index 012cffc574e8..a0bcaa5f735e 100644
--- a/arch/arm64/include/asm/page.h
+++ b/arch/arm64/include/asm/page.h
@@ -13,6 +13,7 @@ 
 #ifndef __ASSEMBLY__
 
 #include <linux/personality.h> /* for READ_IMPLIES_EXEC */
+#include <linux/types.h> /* for gfp_t */
 #include <asm/pgtable-types.h>
 
 struct page;
@@ -28,10 +29,16 @@  void copy_user_highpage(struct page *to, struct page *from,
 void copy_highpage(struct page *to, struct page *from);
 #define __HAVE_ARCH_COPY_HIGHPAGE
 
-#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \
-	alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr)
+struct page *__alloc_zeroed_user_highpage(gfp_t movableflags,
+					  struct vm_area_struct *vma,
+					  unsigned long vaddr);
 #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE
 
+#define want_zero_tags_on_free() system_supports_mte()
+
+void tag_clear_highpage(struct page *to);
+#define __HAVE_ARCH_TAG_CLEAR_HIGHPAGE
+
 #define clear_user_page(page, vaddr, pg)	clear_page(page)
 #define copy_user_page(to, from, vaddr, pg)	copy_page(to, from)
 
diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S
index 351537c12f36..e83643b3995f 100644
--- a/arch/arm64/lib/mte.S
+++ b/arch/arm64/lib/mte.S
@@ -36,6 +36,26 @@  SYM_FUNC_START(mte_clear_page_tags)
 	ret
 SYM_FUNC_END(mte_clear_page_tags)
 
+/*
+ * Zero the page and tags at the same time
+ *
+ * Parameters:
+ *	x0 - address to the beginning of the page
+ */
+SYM_FUNC_START(mte_zero_clear_page_tags)
+	mrs	x1, dczid_el0
+	and	w1, w1, #0xf
+	mov	x2, #4
+	lsl	x1, x2, x1
+	and	x0, x0, #(1 << MTE_TAG_SHIFT) - 1	// clear the tag
+
+1:	dc	gzva, x0
+	add	x0, x0, x1
+	tst	x0, #(PAGE_SIZE - 1)
+	b.ne	1b
+	ret
+SYM_FUNC_END(mte_zero_clear_page_tags)
+
 /*
  * Copy the tags from the source page to the destination one
  *   x0 - address of the destination page
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 871c82ab0a30..8127e0c0b8fb 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -921,3 +921,28 @@  void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
 	debug_exception_exit(regs);
 }
 NOKPROBE_SYMBOL(do_debug_exception);
+
+/*
+ * Used during anonymous page fault handling.
+ */
+struct page *__alloc_zeroed_user_highpage(gfp_t flags,
+					  struct vm_area_struct *vma,
+					  unsigned long vaddr)
+{
+	/*
+	 * If the page is mapped with PROT_MTE, initialise the tags at the
+	 * point of allocation and page zeroing as this is usually faster than
+	 * separate DC ZVA and STGM.
+	 */
+	if (vma->vm_flags & VM_MTE)
+		flags |= __GFP_ZEROTAGS;
+
+	return alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | flags, vma, vaddr);
+}
+
+void tag_clear_highpage(struct page *page)
+{
+	mte_zero_clear_page_tags(page_address(page));
+	page_kasan_tag_reset(page);
+	set_bit(PG_mte_tagged, &page->flags);
+}
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 0a48191534ff..a27c77dbe91c 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -46,9 +46,13 @@ 
 #endif
 
 #ifdef CONFIG_KASAN_HW_TAGS
-#define TCR_KASAN_HW_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 | TCR_TBID1
+#define TCR_MTE_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 | TCR_TBID1
 #else
-#define TCR_KASAN_HW_FLAGS 0
+/*
+ * The mte_zero_clear_page_tags() implementation uses DC GZVA, which relies on
+ * TBI being enabled at EL1.
+ */
+#define TCR_MTE_FLAGS TCR_TBI1 | TCR_TBID1
 #endif
 
 /*
@@ -452,7 +456,7 @@  SYM_FUNC_START(__cpu_setup)
 	msr_s	SYS_TFSRE0_EL1, xzr
 
 	/* set the TCR_EL1 bits */
-	mov_q	x10, TCR_KASAN_HW_FLAGS
+	mov_q	x10, TCR_MTE_FLAGS
 	orr	tcr, tcr, x10
 1:
 #endif
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 11da8af06704..68ba237365dc 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -53,8 +53,9 @@  struct vm_area_struct;
 #define ___GFP_HARDWALL		0x100000u
 #define ___GFP_THISNODE		0x200000u
 #define ___GFP_ACCOUNT		0x400000u
+#define ___GFP_ZEROTAGS		0x800000u
 #ifdef CONFIG_LOCKDEP
-#define ___GFP_NOLOCKDEP	0x800000u
+#define ___GFP_NOLOCKDEP	0x1000000u
 #else
 #define ___GFP_NOLOCKDEP	0
 #endif
@@ -229,16 +230,20 @@  struct vm_area_struct;
  * %__GFP_COMP address compound page metadata.
  *
  * %__GFP_ZERO returns a zeroed page on success.
+ *
+ * %__GFP_ZEROTAGS returns a page with zeroed memory tags on success, if
+ * __GFP_ZERO is set.
  */
 #define __GFP_NOWARN	((__force gfp_t)___GFP_NOWARN)
 #define __GFP_COMP	((__force gfp_t)___GFP_COMP)
 #define __GFP_ZERO	((__force gfp_t)___GFP_ZERO)
+#define __GFP_ZEROTAGS	((__force gfp_t)___GFP_ZEROTAGS)
 
 /* Disable lockdep for GFP context tracking */
 #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP)
 
 /* Room for N __GFP_FOO bits */
-#define __GFP_BITS_SHIFT (23 + IS_ENABLED(CONFIG_LOCKDEP))
+#define __GFP_BITS_SHIFT (24 + IS_ENABLED(CONFIG_LOCKDEP))
 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
 
 /**
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 832b49b50c7b..78bba4bc47aa 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -204,6 +204,16 @@  static inline void clear_highpage(struct page *page)
 	kunmap_atomic(kaddr);
 }
 
+#ifndef __HAVE_ARCH_TAG_CLEAR_HIGHPAGE
+
+#define want_zero_tags_on_free() false
+
+static inline void tag_clear_highpage(struct page *page)
+{
+}
+
+#endif
+
 /*
  * If we pass in a base or tail page, we can zero up to PAGE_SIZE.
  * If we pass in a head page, we can zero up to the size of the compound page.
diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c
index 45e552cb9172..34362c8d0955 100644
--- a/mm/kasan/hw_tags.c
+++ b/mm/kasan/hw_tags.c
@@ -242,7 +242,14 @@  void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags)
 {
 	bool init = !want_init_on_free() && want_init_on_alloc(flags);
 
-	kasan_unpoison_pages(page, order, init);
+	if (flags & __GFP_ZEROTAGS) {
+		int i;
+
+		for (i = 0; i != 1 << order; ++i)
+			tag_clear_highpage(page + i);
+	} else {
+		kasan_unpoison_pages(page, order, init);
+	}
 }
 
 void kasan_free_pages(struct page *page, unsigned int order)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6e82a7f6fd6f..7ac0f0721d22 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1219,10 +1219,16 @@  static int free_tail_pages_check(struct page *head_page, struct page *page)
 	return ret;
 }
 
-static void kernel_init_free_pages(struct page *page, int numpages)
+static void kernel_init_free_pages(struct page *page, int numpages, bool zero_tags)
 {
 	int i;
 
+	if (zero_tags) {
+		for (i = 0; i < numpages; i++)
+			tag_clear_highpage(page + i);
+		return;
+	}
+
 	/* s390's use of memset() could override KASAN redzones. */
 	kasan_disable_current();
 	for (i = 0; i < numpages; i++) {
@@ -1314,7 +1320,8 @@  static __always_inline bool free_pages_prepare(struct page *page,
 		bool init = want_init_on_free();
 
 		if (init)
-			kernel_init_free_pages(page, 1 << order);
+			kernel_init_free_pages(page, 1 << order,
+					       want_zero_tags_on_free());
 		if (!skip_kasan_poison)
 			kasan_poison_pages(page, order, init);
 	}
@@ -2350,7 +2357,8 @@  inline void post_alloc_hook(struct page *page, unsigned int order,
 
 		kasan_unpoison_pages(page, order, init);
 		if (init)
-			kernel_init_free_pages(page, 1 << order);
+			kernel_init_free_pages(page, 1 << order,
+					       gfp_flags & __GFP_ZEROTAGS);
 	}
 
 	set_page_owner(page, order, gfp_flags);