diff mbox series

[v9,09/29] arm64: mte: Clear the tags when a page is mapped in user-space with PROT_MTE

Message ID 20200904103029.32083-10-catalin.marinas@arm.com (mailing list archive)
State New, archived
Headers show
Series arm64: Memory Tagging Extension user-space support | expand

Commit Message

Catalin Marinas Sept. 4, 2020, 10:30 a.m. UTC
Pages allocated by the kernel are not guaranteed to have the tags
zeroed, especially as the kernel does not (yet) use MTE itself. To
ensure the user can still access such pages when mapped into its address
space, clear the tags via set_pte_at(). A new page flag - PG_mte_tagged
(PG_arch_2) - is used to track pages with valid allocation tags.

Since the zero page is mapped as pte_special(), it won't be covered by
the above set_pte_at() mechanism. Clear its tags during early MTE
initialisation.

Co-developed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Steven Price <steven.price@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
---

Notes:
    v8:
    - Introduce the cpu_enable_mte() function in this patch as it was no
      longer present in the previous ones.
    
    v5:
    - Fix the handling of compound pages. Previously, set_pte_at() could
      have erased already valid tags if the first page in a compound one
      did not have the PG_mte_tagged flag set.
    - Move the multi_tag_transfer_size macro from assembler.h to mte.S.
    - Ignore pte_special() mappings and clear the tags in the zero page
      separately (since it's mapped as a special pte).
    - Clearing the tags of the zero page was moved to this patch from an
      earlier one since mte_clear_page_tags() was not available.
    
    New in v4. Replacing a previous page zeroing the tags in clear_page().

 arch/arm64/include/asm/mte.h     | 16 +++++++++++++++
 arch/arm64/include/asm/pgtable.h |  7 +++++++
 arch/arm64/kernel/cpufeature.c   | 18 +++++++++++++++++
 arch/arm64/kernel/mte.c          | 14 +++++++++++++
 arch/arm64/lib/Makefile          |  2 ++
 arch/arm64/lib/mte.S             | 34 ++++++++++++++++++++++++++++++++
 6 files changed, 91 insertions(+)
 create mode 100644 arch/arm64/lib/mte.S

Comments

Steven Price Sept. 10, 2020, 10:23 a.m. UTC | #1
On 04/09/2020 11:30, Catalin Marinas wrote:
> Pages allocated by the kernel are not guaranteed to have the tags
> zeroed, especially as the kernel does not (yet) use MTE itself. To
> ensure the user can still access such pages when mapped into its address
> space, clear the tags via set_pte_at(). A new page flag - PG_mte_tagged
> (PG_arch_2) - is used to track pages with valid allocation tags.
> 
> Since the zero page is mapped as pte_special(), it won't be covered by
> the above set_pte_at() mechanism. Clear its tags during early MTE
> initialisation.
> 
> Co-developed-by: Steven Price <steven.price@arm.com>
> Signed-off-by: Steven Price <steven.price@arm.com>
> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Will Deacon <will@kernel.org>
[...]
> diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S
> new file mode 100644
> index 000000000000..a36705640086
> --- /dev/null
> +++ b/arch/arm64/lib/mte.S
> @@ -0,0 +1,34 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (C) 2020 ARM Ltd.
> + */
> +#include <linux/linkage.h>
> +
> +#include <asm/assembler.h>
> +#include <asm/sysreg.h>
> +
> +	.arch	armv8.5-a+memtag
> +
> +/*
> + * multitag_transfer_size - set \reg to the block size that is accessed by the
> + * LDGM/STGM instructions.
> + */
> +	.macro	multitag_transfer_size, reg, tmp
> +	mrs_s	\reg, SYS_GMID_EL1
> +	ubfx	\reg, \reg, #SYS_GMID_EL1_BS_SHIFT, #SYS_GMID_EL1_BS_SIZE
> +	mov	\tmp, #4
> +	lsl	\reg, \tmp, \reg
> +	.endm
> +
> +/*
> + * Clear the tags in a page
> + *   x0 - address of the page to be cleared
> + */
> +SYM_FUNC_START(mte_clear_page_tags)
> +	multitag_transfer_size x1, x2
> +1:	stgm	xzr, [x0]
> +	add	x0, x0, x1
> +	tst	x0, #(PAGE_SIZE - 1)
> +	b.ne	1b
> +	ret
> +SYM_FUNC_END(mte_clear_page_tags)
> 

Could the value of SYS_GMID_EL1 vary between CPUs and do we therefore 
need a preempt_disable() around mte_clear_page_tags() (and other 
functions in later patches)?

Steve
Catalin Marinas Sept. 10, 2020, 10:52 a.m. UTC | #2
On Thu, Sep 10, 2020 at 11:23:33AM +0100, Steven Price wrote:
> On 04/09/2020 11:30, Catalin Marinas wrote:
> > --- /dev/null
> > +++ b/arch/arm64/lib/mte.S
> > @@ -0,0 +1,34 @@
> > +/* SPDX-License-Identifier: GPL-2.0-only */
> > +/*
> > + * Copyright (C) 2020 ARM Ltd.
> > + */
> > +#include <linux/linkage.h>
> > +
> > +#include <asm/assembler.h>
> > +#include <asm/sysreg.h>
> > +
> > +	.arch	armv8.5-a+memtag
> > +
> > +/*
> > + * multitag_transfer_size - set \reg to the block size that is accessed by the
> > + * LDGM/STGM instructions.
> > + */
> > +	.macro	multitag_transfer_size, reg, tmp
> > +	mrs_s	\reg, SYS_GMID_EL1
> > +	ubfx	\reg, \reg, #SYS_GMID_EL1_BS_SHIFT, #SYS_GMID_EL1_BS_SIZE
> > +	mov	\tmp, #4
> > +	lsl	\reg, \tmp, \reg
> > +	.endm
> > +
> > +/*
> > + * Clear the tags in a page
> > + *   x0 - address of the page to be cleared
> > + */
> > +SYM_FUNC_START(mte_clear_page_tags)
> > +	multitag_transfer_size x1, x2
> > +1:	stgm	xzr, [x0]
> > +	add	x0, x0, x1
> > +	tst	x0, #(PAGE_SIZE - 1)
> > +	b.ne	1b
> > +	ret
> > +SYM_FUNC_END(mte_clear_page_tags)
> 
> Could the value of SYS_GMID_EL1 vary between CPUs and do we therefore need a
> preempt_disable() around mte_clear_page_tags() (and other functions in later
> patches)?

If they differ, disabling preemption here is not sufficient. We'd have
to trap the GMID_EL1 access at EL2 as well and emulate it (we do this
for CTR_EL0 in dcache_line_size).

I don't want to proactively implement this just in case we'll have
broken hardware (I feel a bit more optimistic today ;)).
Steven Price Sept. 10, 2020, 11:12 a.m. UTC | #3
On 10/09/2020 11:52, Catalin Marinas wrote:
> On Thu, Sep 10, 2020 at 11:23:33AM +0100, Steven Price wrote:
>> On 04/09/2020 11:30, Catalin Marinas wrote:
>>> --- /dev/null
>>> +++ b/arch/arm64/lib/mte.S
>>> @@ -0,0 +1,34 @@
>>> +/* SPDX-License-Identifier: GPL-2.0-only */
>>> +/*
>>> + * Copyright (C) 2020 ARM Ltd.
>>> + */
>>> +#include <linux/linkage.h>
>>> +
>>> +#include <asm/assembler.h>
>>> +#include <asm/sysreg.h>
>>> +
>>> +	.arch	armv8.5-a+memtag
>>> +
>>> +/*
>>> + * multitag_transfer_size - set \reg to the block size that is accessed by the
>>> + * LDGM/STGM instructions.
>>> + */
>>> +	.macro	multitag_transfer_size, reg, tmp
>>> +	mrs_s	\reg, SYS_GMID_EL1
>>> +	ubfx	\reg, \reg, #SYS_GMID_EL1_BS_SHIFT, #SYS_GMID_EL1_BS_SIZE
>>> +	mov	\tmp, #4
>>> +	lsl	\reg, \tmp, \reg
>>> +	.endm
>>> +
>>> +/*
>>> + * Clear the tags in a page
>>> + *   x0 - address of the page to be cleared
>>> + */
>>> +SYM_FUNC_START(mte_clear_page_tags)
>>> +	multitag_transfer_size x1, x2
>>> +1:	stgm	xzr, [x0]
>>> +	add	x0, x0, x1
>>> +	tst	x0, #(PAGE_SIZE - 1)
>>> +	b.ne	1b
>>> +	ret
>>> +SYM_FUNC_END(mte_clear_page_tags)
>>
>> Could the value of SYS_GMID_EL1 vary between CPUs and do we therefore need a
>> preempt_disable() around mte_clear_page_tags() (and other functions in later
>> patches)?
> 
> If they differ, disabling preemption here is not sufficient. We'd have
> to trap the GMID_EL1 access at EL2 as well and emulate it (we do this
> for CTR_EL0 in dcache_line_size).

Hmm, good point. It's actually not possible to properly emulate this - 
EL2 can trap GMID_EL1 to provide a different (presumably smaller) size, 
but LDGM/STGM will still read/store the number of tags of the underlying 
hardware. While simple loops like we've got at the moment won't care 
(we'll just end up doing useless work), it won't be architecturally 
correct. The guest can always deduce the underlying value. So I think we 
can safely consider this broken hardware.

> I don't want to proactively implement this just in case we'll have
> broken hardware (I feel a bit more optimistic today ;)).

Given the above I think if we do have broken hardware the only sane 
thing to do would be to provide a way of overriding 
multitag_transfer_size to return the smallest size of all CPUs. Which 
works well enough for the uses we've currently got.

Steve
Catalin Marinas Sept. 10, 2020, 11:55 a.m. UTC | #4
On Thu, Sep 10, 2020 at 12:12:27PM +0100, Steven Price wrote:
> On 10/09/2020 11:52, Catalin Marinas wrote:
> > On Thu, Sep 10, 2020 at 11:23:33AM +0100, Steven Price wrote:
> > > On 04/09/2020 11:30, Catalin Marinas wrote:
> > > > --- /dev/null
> > > > +++ b/arch/arm64/lib/mte.S
> > > > @@ -0,0 +1,34 @@
> > > > +/* SPDX-License-Identifier: GPL-2.0-only */
> > > > +/*
> > > > + * Copyright (C) 2020 ARM Ltd.
> > > > + */
> > > > +#include <linux/linkage.h>
> > > > +
> > > > +#include <asm/assembler.h>
> > > > +#include <asm/sysreg.h>
> > > > +
> > > > +	.arch	armv8.5-a+memtag
> > > > +
> > > > +/*
> > > > + * multitag_transfer_size - set \reg to the block size that is accessed by the
> > > > + * LDGM/STGM instructions.
> > > > + */
> > > > +	.macro	multitag_transfer_size, reg, tmp
> > > > +	mrs_s	\reg, SYS_GMID_EL1
> > > > +	ubfx	\reg, \reg, #SYS_GMID_EL1_BS_SHIFT, #SYS_GMID_EL1_BS_SIZE
> > > > +	mov	\tmp, #4
> > > > +	lsl	\reg, \tmp, \reg
> > > > +	.endm
> > > > +
> > > > +/*
> > > > + * Clear the tags in a page
> > > > + *   x0 - address of the page to be cleared
> > > > + */
> > > > +SYM_FUNC_START(mte_clear_page_tags)
> > > > +	multitag_transfer_size x1, x2
> > > > +1:	stgm	xzr, [x0]
> > > > +	add	x0, x0, x1
> > > > +	tst	x0, #(PAGE_SIZE - 1)
> > > > +	b.ne	1b
> > > > +	ret
> > > > +SYM_FUNC_END(mte_clear_page_tags)
> > > 
> > > Could the value of SYS_GMID_EL1 vary between CPUs and do we therefore need a
> > > preempt_disable() around mte_clear_page_tags() (and other functions in later
> > > patches)?
> > 
> > If they differ, disabling preemption here is not sufficient. We'd have
> > to trap the GMID_EL1 access at EL2 as well and emulate it (we do this
> > for CTR_EL0 in dcache_line_size).
> 
> Hmm, good point. It's actually not possible to properly emulate this - EL2
> can trap GMID_EL1 to provide a different (presumably smaller) size, but
> LDGM/STGM will still read/store the number of tags of the underlying
> hardware. While simple loops like we've got at the moment won't care (we'll
> just end up doing useless work), it won't be architecturally correct. The
> guest can always deduce the underlying value. So I think we can safely
> consider this broken hardware.

I think that's similar to the DC ZVA (and DCZID_EL0.BS) case where
faking it could lead to data corruption if the software assumes it
writes a maximum number of bytes.

(I meant to raise a ticket with the architects to make this a
requirement in the ARM ARM but forgot about it)

> > I don't want to proactively implement this just in case we'll have
> > broken hardware (I feel a bit more optimistic today ;)).
> 
> Given the above I think if we do have broken hardware the only sane thing to
> do would be to provide a way of overriding multitag_transfer_size to return
> the smallest size of all CPUs. Which works well enough for the uses we've
> currently got.

If we do have such broken hardware, we should probably drop the STGM
instructions in favour of STG or ST2G. Luckily, STGM/LDGM are not
available in user space.
Steven Price Sept. 10, 2020, 12:43 p.m. UTC | #5
On 10/09/2020 12:55, Catalin Marinas wrote:
> On Thu, Sep 10, 2020 at 12:12:27PM +0100, Steven Price wrote:
>> On 10/09/2020 11:52, Catalin Marinas wrote:
>>> On Thu, Sep 10, 2020 at 11:23:33AM +0100, Steven Price wrote:
>>>> On 04/09/2020 11:30, Catalin Marinas wrote:
>>>>> --- /dev/null
>>>>> +++ b/arch/arm64/lib/mte.S
>>>>> @@ -0,0 +1,34 @@
>>>>> +/* SPDX-License-Identifier: GPL-2.0-only */
>>>>> +/*
>>>>> + * Copyright (C) 2020 ARM Ltd.
>>>>> + */
>>>>> +#include <linux/linkage.h>
>>>>> +
>>>>> +#include <asm/assembler.h>
>>>>> +#include <asm/sysreg.h>
>>>>> +
>>>>> +	.arch	armv8.5-a+memtag
>>>>> +
>>>>> +/*
>>>>> + * multitag_transfer_size - set \reg to the block size that is accessed by the
>>>>> + * LDGM/STGM instructions.
>>>>> + */
>>>>> +	.macro	multitag_transfer_size, reg, tmp
>>>>> +	mrs_s	\reg, SYS_GMID_EL1
>>>>> +	ubfx	\reg, \reg, #SYS_GMID_EL1_BS_SHIFT, #SYS_GMID_EL1_BS_SIZE
>>>>> +	mov	\tmp, #4
>>>>> +	lsl	\reg, \tmp, \reg
>>>>> +	.endm
>>>>> +
>>>>> +/*
>>>>> + * Clear the tags in a page
>>>>> + *   x0 - address of the page to be cleared
>>>>> + */
>>>>> +SYM_FUNC_START(mte_clear_page_tags)
>>>>> +	multitag_transfer_size x1, x2
>>>>> +1:	stgm	xzr, [x0]
>>>>> +	add	x0, x0, x1
>>>>> +	tst	x0, #(PAGE_SIZE - 1)
>>>>> +	b.ne	1b
>>>>> +	ret
>>>>> +SYM_FUNC_END(mte_clear_page_tags)
>>>>
>>>> Could the value of SYS_GMID_EL1 vary between CPUs and do we therefore need a
>>>> preempt_disable() around mte_clear_page_tags() (and other functions in later
>>>> patches)?
>>>
>>> If they differ, disabling preemption here is not sufficient. We'd have
>>> to trap the GMID_EL1 access at EL2 as well and emulate it (we do this
>>> for CTR_EL0 in dcache_line_size).
>>
>> Hmm, good point. It's actually not possible to properly emulate this - EL2
>> can trap GMID_EL1 to provide a different (presumably smaller) size, but
>> LDGM/STGM will still read/store the number of tags of the underlying
>> hardware. While simple loops like we've got at the moment won't care (we'll
>> just end up doing useless work), it won't be architecturally correct. The
>> guest can always deduce the underlying value. So I think we can safely
>> consider this broken hardware.
> 
> I think that's similar to the DC ZVA (and DCZID_EL0.BS) case where
> faking it could lead to data corruption if the software assumes it
> writes a maximum number of bytes.
> 
> (I meant to raise a ticket with the architects to make this a
> requirement in the ARM ARM but forgot about it)

Yes, that looks like exactly the same issue.

>>> I don't want to proactively implement this just in case we'll have
>>> broken hardware (I feel a bit more optimistic today ;)).
>>
>> Given the above I think if we do have broken hardware the only sane thing to
>> do would be to provide a way of overriding multitag_transfer_size to return
>> the smallest size of all CPUs. Which works well enough for the uses we've
>> currently got.
> 
> If we do have such broken hardware, we should probably drop the STGM
> instructions in favour of STG or ST2G. Luckily, STGM/LDGM are not
> available in user space.
> 

STGM should be safe the way we're doing it as long as the block size 
we're using is <= the hardware block size (it'll just write multiple 
times, but we're writing more than the maximum block size so there's no 
data loss). Although it would be worthwhile to benchmark on the hardware 
to see whether it's actually worth it in that case or if STG/ST2G is 
actually faster.

Steve
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index a0bf310da74b..1716b3d02489 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -7,12 +7,28 @@ 
 
 #ifndef __ASSEMBLY__
 
+#include <linux/page-flags.h>
+
+#include <asm/pgtable-types.h>
+
+void mte_clear_page_tags(void *addr);
+
 #ifdef CONFIG_ARM64_MTE
 
+/* track which pages have valid allocation tags */
+#define PG_mte_tagged	PG_arch_2
+
+void mte_sync_tags(pte_t *ptep, pte_t pte);
 void flush_mte_state(void);
 
 #else
 
+/* unused if !CONFIG_ARM64_MTE, silence the compiler */
+#define PG_mte_tagged	0
+
+static inline void mte_sync_tags(pte_t *ptep, pte_t pte)
+{
+}
 static inline void flush_mte_state(void)
 {
 }
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index d5d3fbe73953..0a205a8e91b2 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -9,6 +9,7 @@ 
 #include <asm/proc-fns.h>
 
 #include <asm/memory.h>
+#include <asm/mte.h>
 #include <asm/pgtable-hwdef.h>
 #include <asm/pgtable-prot.h>
 #include <asm/tlbflush.h>
@@ -90,6 +91,8 @@  extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 #define pte_user_exec(pte)	(!(pte_val(pte) & PTE_UXN))
 #define pte_cont(pte)		(!!(pte_val(pte) & PTE_CONT))
 #define pte_devmap(pte)		(!!(pte_val(pte) & PTE_DEVMAP))
+#define pte_tagged(pte)		((pte_val(pte) & PTE_ATTRINDX_MASK) == \
+				 PTE_ATTRINDX(MT_NORMAL_TAGGED))
 
 #define pte_cont_addr_end(addr, end)						\
 ({	unsigned long __boundary = ((addr) + CONT_PTE_SIZE) & CONT_PTE_MASK;	\
@@ -284,6 +287,10 @@  static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
 	if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte))
 		__sync_icache_dcache(pte);
 
+	if (system_supports_mte() &&
+	    pte_present(pte) && pte_tagged(pte) && !pte_special(pte))
+		mte_sync_tags(ptep, pte);
+
 	__check_racy_pte_update(mm, ptep, pte);
 
 	set_pte(ptep, pte);
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index fabc8a237223..add9da5d8ea3 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -75,6 +75,7 @@ 
 #include <asm/cpu_ops.h>
 #include <asm/fpsimd.h>
 #include <asm/mmu_context.h>
+#include <asm/mte.h>
 #include <asm/processor.h>
 #include <asm/sysreg.h>
 #include <asm/traps.h>
@@ -1704,6 +1705,22 @@  static void bti_enable(const struct arm64_cpu_capabilities *__unused)
 }
 #endif /* CONFIG_ARM64_BTI */
 
+#ifdef CONFIG_ARM64_MTE
+static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
+{
+	static bool cleared_zero_page = false;
+
+	/*
+	 * Clear the tags in the zero page. This needs to be done via the
+	 * linear map which has the Tagged attribute.
+	 */
+	if (!cleared_zero_page) {
+		cleared_zero_page = true;
+		mte_clear_page_tags(lm_alias(empty_zero_page));
+	}
+}
+#endif /* CONFIG_ARM64_MTE */
+
 /* Internal helper functions to match cpu capability type */
 static bool
 cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap)
@@ -2133,6 +2150,7 @@  static const struct arm64_cpu_capabilities arm64_features[] = {
 		.field_pos = ID_AA64PFR1_MTE_SHIFT,
 		.min_field_value = ID_AA64PFR1_MTE,
 		.sign = FTR_UNSIGNED,
+		.cpu_enable = cpu_enable_mte,
 	},
 #endif /* CONFIG_ARM64_MTE */
 	{},
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 032016823957..5bf9bbed5a25 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -3,12 +3,26 @@ 
  * Copyright (C) 2020 ARM Ltd.
  */
 
+#include <linux/bitops.h>
+#include <linux/mm.h>
 #include <linux/thread_info.h>
 
 #include <asm/cpufeature.h>
 #include <asm/mte.h>
 #include <asm/sysreg.h>
 
+void mte_sync_tags(pte_t *ptep, pte_t pte)
+{
+	struct page *page = pte_page(pte);
+	long i, nr_pages = compound_nr(page);
+
+	/* if PG_mte_tagged is set, tags have already been initialised */
+	for (i = 0; i < nr_pages; i++, page++) {
+		if (!test_and_set_bit(PG_mte_tagged, &page->flags))
+			mte_clear_page_tags(page_address(page));
+	}
+}
+
 void flush_mte_state(void)
 {
 	if (!system_supports_mte())
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 2fc253466dbf..d31e1169d9b8 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -16,3 +16,5 @@  lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) += uaccess_flushcache.o
 obj-$(CONFIG_CRC32) += crc32.o
 
 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
+
+obj-$(CONFIG_ARM64_MTE) += mte.o
diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S
new file mode 100644
index 000000000000..a36705640086
--- /dev/null
+++ b/arch/arm64/lib/mte.S
@@ -0,0 +1,34 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2020 ARM Ltd.
+ */
+#include <linux/linkage.h>
+
+#include <asm/assembler.h>
+#include <asm/sysreg.h>
+
+	.arch	armv8.5-a+memtag
+
+/*
+ * multitag_transfer_size - set \reg to the block size that is accessed by the
+ * LDGM/STGM instructions.
+ */
+	.macro	multitag_transfer_size, reg, tmp
+	mrs_s	\reg, SYS_GMID_EL1
+	ubfx	\reg, \reg, #SYS_GMID_EL1_BS_SHIFT, #SYS_GMID_EL1_BS_SIZE
+	mov	\tmp, #4
+	lsl	\reg, \tmp, \reg
+	.endm
+
+/*
+ * Clear the tags in a page
+ *   x0 - address of the page to be cleared
+ */
+SYM_FUNC_START(mte_clear_page_tags)
+	multitag_transfer_size x1, x2
+1:	stgm	xzr, [x0]
+	add	x0, x0, x1
+	tst	x0, #(PAGE_SIZE - 1)
+	b.ne	1b
+	ret
+SYM_FUNC_END(mte_clear_page_tags)