diff mbox series

[v12,5/8] arm64: kvm: Save/restore MTE registers

Message ID 20210517123239.8025-6-steven.price@arm.com (mailing list archive)
State New, archived
Headers show
Series MTE support for KVM guest | expand

Commit Message

Steven Price May 17, 2021, 12:32 p.m. UTC
Define the new system registers that MTE introduces and context switch
them. The MTE feature is still hidden from the ID register as it isn't
supported in a VM yet.

Signed-off-by: Steven Price <steven.price@arm.com>
---
 arch/arm64/include/asm/kvm_host.h          |  6 ++
 arch/arm64/include/asm/kvm_mte.h           | 66 ++++++++++++++++++++++
 arch/arm64/include/asm/sysreg.h            |  3 +-
 arch/arm64/kernel/asm-offsets.c            |  3 +
 arch/arm64/kvm/hyp/entry.S                 |  7 +++
 arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 21 +++++++
 arch/arm64/kvm/sys_regs.c                  | 22 ++++++--
 7 files changed, 123 insertions(+), 5 deletions(-)
 create mode 100644 arch/arm64/include/asm/kvm_mte.h

Comments

Marc Zyngier May 17, 2021, 5:17 p.m. UTC | #1
On Mon, 17 May 2021 13:32:36 +0100,
Steven Price <steven.price@arm.com> wrote:
> 
> Define the new system registers that MTE introduces and context switch
> them. The MTE feature is still hidden from the ID register as it isn't
> supported in a VM yet.
> 
> Signed-off-by: Steven Price <steven.price@arm.com>
> ---
>  arch/arm64/include/asm/kvm_host.h          |  6 ++
>  arch/arm64/include/asm/kvm_mte.h           | 66 ++++++++++++++++++++++
>  arch/arm64/include/asm/sysreg.h            |  3 +-
>  arch/arm64/kernel/asm-offsets.c            |  3 +
>  arch/arm64/kvm/hyp/entry.S                 |  7 +++
>  arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 21 +++++++
>  arch/arm64/kvm/sys_regs.c                  | 22 ++++++--
>  7 files changed, 123 insertions(+), 5 deletions(-)
>  create mode 100644 arch/arm64/include/asm/kvm_mte.h
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index afaa5333f0e4..309e36cc1b42 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -208,6 +208,12 @@ enum vcpu_sysreg {
>  	CNTP_CVAL_EL0,
>  	CNTP_CTL_EL0,
>  
> +	/* Memory Tagging Extension registers */
> +	RGSR_EL1,	/* Random Allocation Tag Seed Register */
> +	GCR_EL1,	/* Tag Control Register */
> +	TFSR_EL1,	/* Tag Fault Status Register (EL1) */
> +	TFSRE0_EL1,	/* Tag Fault Status Register (EL0) */
> +
>  	/* 32bit specific registers. Keep them at the end of the range */
>  	DACR32_EL2,	/* Domain Access Control Register */
>  	IFSR32_EL2,	/* Instruction Fault Status Register */
> diff --git a/arch/arm64/include/asm/kvm_mte.h b/arch/arm64/include/asm/kvm_mte.h
> new file mode 100644
> index 000000000000..6541c7d6ce06
> --- /dev/null
> +++ b/arch/arm64/include/asm/kvm_mte.h
> @@ -0,0 +1,66 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2020 ARM Ltd.
> + */
> +#ifndef __ASM_KVM_MTE_H
> +#define __ASM_KVM_MTE_H
> +
> +#ifdef __ASSEMBLY__
> +
> +#include <asm/sysreg.h>
> +
> +#ifdef CONFIG_ARM64_MTE
> +
> +.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1
> +alternative_if_not ARM64_MTE
> +	b	.L__skip_switch\@
> +alternative_else_nop_endif
> +	mrs	\reg1, hcr_el2
> +	and	\reg1, \reg1, #(HCR_ATA)
> +	cbz	\reg1, .L__skip_switch\@
> +
> +	mrs_s	\reg1, SYS_RGSR_EL1
> +	str	\reg1, [\h_ctxt, #CPU_RGSR_EL1]
> +	mrs_s	\reg1, SYS_GCR_EL1
> +	str	\reg1, [\h_ctxt, #CPU_GCR_EL1]
> +
> +	ldr	\reg1, [\g_ctxt, #CPU_RGSR_EL1]
> +	msr_s	SYS_RGSR_EL1, \reg1
> +	ldr	\reg1, [\g_ctxt, #CPU_GCR_EL1]
> +	msr_s	SYS_GCR_EL1, \reg1
> +
> +.L__skip_switch\@:
> +.endm
> +
> +.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1
> +alternative_if_not ARM64_MTE
> +	b	.L__skip_switch\@
> +alternative_else_nop_endif
> +	mrs	\reg1, hcr_el2
> +	and	\reg1, \reg1, #(HCR_ATA)
> +	cbz	\reg1, .L__skip_switch\@
> +
> +	mrs_s	\reg1, SYS_RGSR_EL1
> +	str	\reg1, [\g_ctxt, #CPU_RGSR_EL1]
> +	mrs_s	\reg1, SYS_GCR_EL1
> +	str	\reg1, [\g_ctxt, #CPU_GCR_EL1]
> +
> +	ldr	\reg1, [\h_ctxt, #CPU_RGSR_EL1]
> +	msr_s	SYS_RGSR_EL1, \reg1
> +	ldr	\reg1, [\h_ctxt, #CPU_GCR_EL1]
> +	msr_s	SYS_GCR_EL1, \reg1

What is the rational for not having any synchronisation here? It is
quite uncommon to allocate memory at EL2, but VHE can perform all kind
of tricks.

> +
> +.L__skip_switch\@:
> +.endm
> +
> +#else /* CONFIG_ARM64_MTE */
> +
> +.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1
> +.endm
> +
> +.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1
> +.endm
> +
> +#endif /* CONFIG_ARM64_MTE */
> +#endif /* __ASSEMBLY__ */
> +#endif /* __ASM_KVM_MTE_H */
> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> index 65d15700a168..347ccac2341e 100644
> --- a/arch/arm64/include/asm/sysreg.h
> +++ b/arch/arm64/include/asm/sysreg.h
> @@ -651,7 +651,8 @@
>  
>  #define INIT_SCTLR_EL2_MMU_ON						\
>  	(SCTLR_ELx_M  | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_ELx_I |	\
> -	 SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | SCTLR_EL2_RES1)
> +	 SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 |		\
> +	 SCTLR_ELx_ITFSB | SCTLR_EL2_RES1)
>  
>  #define INIT_SCTLR_EL2_MMU_OFF \
>  	(SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
> diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
> index 0cb34ccb6e73..6b489a8462f0 100644
> --- a/arch/arm64/kernel/asm-offsets.c
> +++ b/arch/arm64/kernel/asm-offsets.c
> @@ -111,6 +111,9 @@ int main(void)
>    DEFINE(VCPU_WORKAROUND_FLAGS,	offsetof(struct kvm_vcpu, arch.workaround_flags));
>    DEFINE(VCPU_HCR_EL2,		offsetof(struct kvm_vcpu, arch.hcr_el2));
>    DEFINE(CPU_USER_PT_REGS,	offsetof(struct kvm_cpu_context, regs));
> +  DEFINE(CPU_RGSR_EL1,		offsetof(struct kvm_cpu_context, sys_regs[RGSR_EL1]));
> +  DEFINE(CPU_GCR_EL1,		offsetof(struct kvm_cpu_context, sys_regs[GCR_EL1]));
> +  DEFINE(CPU_TFSRE0_EL1,	offsetof(struct kvm_cpu_context, sys_regs[TFSRE0_EL1]));

TFSRE0_EL1 is never accessed from assembly code. Leftover from a
previous version?

>    DEFINE(CPU_APIAKEYLO_EL1,	offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1]));
>    DEFINE(CPU_APIBKEYLO_EL1,	offsetof(struct kvm_cpu_context, sys_regs[APIBKEYLO_EL1]));
>    DEFINE(CPU_APDAKEYLO_EL1,	offsetof(struct kvm_cpu_context, sys_regs[APDAKEYLO_EL1]));
> diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
> index e831d3dfd50d..435346ea1504 100644
> --- a/arch/arm64/kvm/hyp/entry.S
> +++ b/arch/arm64/kvm/hyp/entry.S
> @@ -13,6 +13,7 @@
>  #include <asm/kvm_arm.h>
>  #include <asm/kvm_asm.h>
>  #include <asm/kvm_mmu.h>
> +#include <asm/kvm_mte.h>
>  #include <asm/kvm_ptrauth.h>
>  
>  	.text
> @@ -51,6 +52,9 @@ alternative_else_nop_endif
>  
>  	add	x29, x0, #VCPU_CONTEXT
>  
> +	// mte_switch_to_guest(g_ctxt, h_ctxt, tmp1)
> +	mte_switch_to_guest x29, x1, x2
> +
>  	// Macro ptrauth_switch_to_guest format:
>  	// 	ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3)
>  	// The below macro to restore guest keys is not implemented in C code
> @@ -142,6 +146,9 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL)
>  	// when this feature is enabled for kernel code.
>  	ptrauth_switch_to_hyp x1, x2, x3, x4, x5
>  
> +	// mte_switch_to_hyp(g_ctxt, h_ctxt, reg1)
> +	mte_switch_to_hyp x1, x2, x3
> +
>  	// Restore hyp's sp_el0
>  	restore_sp_el0 x2, x3
>  
> diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
> index cce43bfe158f..de7e14c862e6 100644
> --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
> +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
> @@ -14,6 +14,7 @@
>  #include <asm/kvm_asm.h>
>  #include <asm/kvm_emulate.h>
>  #include <asm/kvm_hyp.h>
> +#include <asm/kvm_mmu.h>
>  
>  static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
>  {
> @@ -26,6 +27,16 @@ static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
>  	ctxt_sys_reg(ctxt, TPIDRRO_EL0)	= read_sysreg(tpidrro_el0);
>  }
>  
> +static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt)
> +{
> +	struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu;
> +
> +	if (!vcpu)
> +		vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt);
> +
> +	return kvm_has_mte(kern_hyp_va(vcpu->kvm));
> +}
> +
>  static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
>  {
>  	ctxt_sys_reg(ctxt, CSSELR_EL1)	= read_sysreg(csselr_el1);
> @@ -46,6 +57,11 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
>  	ctxt_sys_reg(ctxt, PAR_EL1)	= read_sysreg_par();
>  	ctxt_sys_reg(ctxt, TPIDR_EL1)	= read_sysreg(tpidr_el1);
>  
> +	if (ctxt_has_mte(ctxt)) {
> +		ctxt_sys_reg(ctxt, TFSR_EL1) = read_sysreg_el1(SYS_TFSR);
> +		ctxt_sys_reg(ctxt, TFSRE0_EL1) = read_sysreg_s(SYS_TFSRE0_EL1);
> +	}

I remember suggesting that this is slightly heavier than necessary.

On nVHE, TFSRE0_EL1 could be moved to load/put, as we never run
userspace with a vcpu loaded. The same holds of course for VHE, but we
also can move TFSR_EL1 to load/put, as the host uses TFSR_EL2.

Do you see any issue with that?

> +
>  	ctxt_sys_reg(ctxt, SP_EL1)	= read_sysreg(sp_el1);
>  	ctxt_sys_reg(ctxt, ELR_EL1)	= read_sysreg_el1(SYS_ELR);
>  	ctxt_sys_reg(ctxt, SPSR_EL1)	= read_sysreg_el1(SYS_SPSR);
> @@ -107,6 +123,11 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
>  	write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1),	par_el1);
>  	write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1),	tpidr_el1);
>  
> +	if (ctxt_has_mte(ctxt)) {
> +		write_sysreg_el1(ctxt_sys_reg(ctxt, TFSR_EL1), SYS_TFSR);
> +		write_sysreg_s(ctxt_sys_reg(ctxt, TFSRE0_EL1), SYS_TFSRE0_EL1);
> +	}
> +
>  	if (!has_vhe() &&
>  	    cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) &&
>  	    ctxt->__hyp_running_vcpu) {
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index 24a844cb79ca..88adbc2286f2 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -1305,6 +1305,20 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
>  	return true;
>  }
>  
> +static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
> +				   const struct sys_reg_desc *rd)
> +{
> +	return REG_HIDDEN;
> +}
> +
> +#define MTE_REG(name) {				\
> +	SYS_DESC(SYS_##name),			\
> +	.access = undef_access,			\
> +	.reset = reset_unknown,			\
> +	.reg = name,				\
> +	.visibility = mte_visibility,		\
> +}
> +
>  /* sys_reg_desc initialiser for known cpufeature ID registers */
>  #define ID_SANITISED(name) {			\
>  	SYS_DESC(SYS_##name),			\
> @@ -1473,8 +1487,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
>  	{ SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 },
>  	{ SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 },
>  
> -	{ SYS_DESC(SYS_RGSR_EL1), undef_access },
> -	{ SYS_DESC(SYS_GCR_EL1), undef_access },
> +	MTE_REG(RGSR_EL1),
> +	MTE_REG(GCR_EL1),
>  
>  	{ SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility },
>  	{ SYS_DESC(SYS_TRFCR_EL1), undef_access },
> @@ -1501,8 +1515,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
>  	{ SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi },
>  	{ SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi },
>  
> -	{ SYS_DESC(SYS_TFSR_EL1), undef_access },
> -	{ SYS_DESC(SYS_TFSRE0_EL1), undef_access },
> +	MTE_REG(TFSR_EL1),
> +	MTE_REG(TFSRE0_EL1),
>  
>  	{ SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 },
>  	{ SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 },

Thanks,

	M.
Steven Price May 19, 2021, 1:04 p.m. UTC | #2
On 17/05/2021 18:17, Marc Zyngier wrote:
> On Mon, 17 May 2021 13:32:36 +0100,
> Steven Price <steven.price@arm.com> wrote:
>>
>> Define the new system registers that MTE introduces and context switch
>> them. The MTE feature is still hidden from the ID register as it isn't
>> supported in a VM yet.
>>
>> Signed-off-by: Steven Price <steven.price@arm.com>
>> ---
>>  arch/arm64/include/asm/kvm_host.h          |  6 ++
>>  arch/arm64/include/asm/kvm_mte.h           | 66 ++++++++++++++++++++++
>>  arch/arm64/include/asm/sysreg.h            |  3 +-
>>  arch/arm64/kernel/asm-offsets.c            |  3 +
>>  arch/arm64/kvm/hyp/entry.S                 |  7 +++
>>  arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 21 +++++++
>>  arch/arm64/kvm/sys_regs.c                  | 22 ++++++--
>>  7 files changed, 123 insertions(+), 5 deletions(-)
>>  create mode 100644 arch/arm64/include/asm/kvm_mte.h
>>
>> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
>> index afaa5333f0e4..309e36cc1b42 100644
>> --- a/arch/arm64/include/asm/kvm_host.h
>> +++ b/arch/arm64/include/asm/kvm_host.h
>> @@ -208,6 +208,12 @@ enum vcpu_sysreg {
>>  	CNTP_CVAL_EL0,
>>  	CNTP_CTL_EL0,
>>  
>> +	/* Memory Tagging Extension registers */
>> +	RGSR_EL1,	/* Random Allocation Tag Seed Register */
>> +	GCR_EL1,	/* Tag Control Register */
>> +	TFSR_EL1,	/* Tag Fault Status Register (EL1) */
>> +	TFSRE0_EL1,	/* Tag Fault Status Register (EL0) */
>> +
>>  	/* 32bit specific registers. Keep them at the end of the range */
>>  	DACR32_EL2,	/* Domain Access Control Register */
>>  	IFSR32_EL2,	/* Instruction Fault Status Register */
>> diff --git a/arch/arm64/include/asm/kvm_mte.h b/arch/arm64/include/asm/kvm_mte.h
>> new file mode 100644
>> index 000000000000..6541c7d6ce06
>> --- /dev/null
>> +++ b/arch/arm64/include/asm/kvm_mte.h
>> @@ -0,0 +1,66 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +/*
>> + * Copyright (C) 2020 ARM Ltd.
>> + */
>> +#ifndef __ASM_KVM_MTE_H
>> +#define __ASM_KVM_MTE_H
>> +
>> +#ifdef __ASSEMBLY__
>> +
>> +#include <asm/sysreg.h>
>> +
>> +#ifdef CONFIG_ARM64_MTE
>> +
>> +.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1
>> +alternative_if_not ARM64_MTE
>> +	b	.L__skip_switch\@
>> +alternative_else_nop_endif
>> +	mrs	\reg1, hcr_el2
>> +	and	\reg1, \reg1, #(HCR_ATA)
>> +	cbz	\reg1, .L__skip_switch\@
>> +
>> +	mrs_s	\reg1, SYS_RGSR_EL1
>> +	str	\reg1, [\h_ctxt, #CPU_RGSR_EL1]
>> +	mrs_s	\reg1, SYS_GCR_EL1
>> +	str	\reg1, [\h_ctxt, #CPU_GCR_EL1]
>> +
>> +	ldr	\reg1, [\g_ctxt, #CPU_RGSR_EL1]
>> +	msr_s	SYS_RGSR_EL1, \reg1
>> +	ldr	\reg1, [\g_ctxt, #CPU_GCR_EL1]
>> +	msr_s	SYS_GCR_EL1, \reg1
>> +
>> +.L__skip_switch\@:
>> +.endm
>> +
>> +.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1
>> +alternative_if_not ARM64_MTE
>> +	b	.L__skip_switch\@
>> +alternative_else_nop_endif
>> +	mrs	\reg1, hcr_el2
>> +	and	\reg1, \reg1, #(HCR_ATA)
>> +	cbz	\reg1, .L__skip_switch\@
>> +
>> +	mrs_s	\reg1, SYS_RGSR_EL1
>> +	str	\reg1, [\g_ctxt, #CPU_RGSR_EL1]
>> +	mrs_s	\reg1, SYS_GCR_EL1
>> +	str	\reg1, [\g_ctxt, #CPU_GCR_EL1]
>> +
>> +	ldr	\reg1, [\h_ctxt, #CPU_RGSR_EL1]
>> +	msr_s	SYS_RGSR_EL1, \reg1
>> +	ldr	\reg1, [\h_ctxt, #CPU_GCR_EL1]
>> +	msr_s	SYS_GCR_EL1, \reg1
> 
> What is the rational for not having any synchronisation here? It is
> quite uncommon to allocate memory at EL2, but VHE can perform all kind
> of tricks.

I don't follow. This is part of the __guest_exit path and there's an ISB
at the end of that - is that not sufficient? I don't see any possibility
for allocating memory before that. What am I missing?

>> +
>> +.L__skip_switch\@:
>> +.endm
>> +
>> +#else /* CONFIG_ARM64_MTE */
>> +
>> +.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1
>> +.endm
>> +
>> +.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1
>> +.endm
>> +
>> +#endif /* CONFIG_ARM64_MTE */
>> +#endif /* __ASSEMBLY__ */
>> +#endif /* __ASM_KVM_MTE_H */
>> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
>> index 65d15700a168..347ccac2341e 100644
>> --- a/arch/arm64/include/asm/sysreg.h
>> +++ b/arch/arm64/include/asm/sysreg.h
>> @@ -651,7 +651,8 @@
>>  
>>  #define INIT_SCTLR_EL2_MMU_ON						\
>>  	(SCTLR_ELx_M  | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_ELx_I |	\
>> -	 SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | SCTLR_EL2_RES1)
>> +	 SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 |		\
>> +	 SCTLR_ELx_ITFSB | SCTLR_EL2_RES1)
>>  
>>  #define INIT_SCTLR_EL2_MMU_OFF \
>>  	(SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
>> diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
>> index 0cb34ccb6e73..6b489a8462f0 100644
>> --- a/arch/arm64/kernel/asm-offsets.c
>> +++ b/arch/arm64/kernel/asm-offsets.c
>> @@ -111,6 +111,9 @@ int main(void)
>>    DEFINE(VCPU_WORKAROUND_FLAGS,	offsetof(struct kvm_vcpu, arch.workaround_flags));
>>    DEFINE(VCPU_HCR_EL2,		offsetof(struct kvm_vcpu, arch.hcr_el2));
>>    DEFINE(CPU_USER_PT_REGS,	offsetof(struct kvm_cpu_context, regs));
>> +  DEFINE(CPU_RGSR_EL1,		offsetof(struct kvm_cpu_context, sys_regs[RGSR_EL1]));
>> +  DEFINE(CPU_GCR_EL1,		offsetof(struct kvm_cpu_context, sys_regs[GCR_EL1]));
>> +  DEFINE(CPU_TFSRE0_EL1,	offsetof(struct kvm_cpu_context, sys_regs[TFSRE0_EL1]));
> 
> TFSRE0_EL1 is never accessed from assembly code. Leftover from a
> previous version?

Indeed, I will drop it.

>>    DEFINE(CPU_APIAKEYLO_EL1,	offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1]));
>>    DEFINE(CPU_APIBKEYLO_EL1,	offsetof(struct kvm_cpu_context, sys_regs[APIBKEYLO_EL1]));
>>    DEFINE(CPU_APDAKEYLO_EL1,	offsetof(struct kvm_cpu_context, sys_regs[APDAKEYLO_EL1]));
>> diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
>> index e831d3dfd50d..435346ea1504 100644
>> --- a/arch/arm64/kvm/hyp/entry.S
>> +++ b/arch/arm64/kvm/hyp/entry.S
>> @@ -13,6 +13,7 @@
>>  #include <asm/kvm_arm.h>
>>  #include <asm/kvm_asm.h>
>>  #include <asm/kvm_mmu.h>
>> +#include <asm/kvm_mte.h>
>>  #include <asm/kvm_ptrauth.h>
>>  
>>  	.text
>> @@ -51,6 +52,9 @@ alternative_else_nop_endif
>>  
>>  	add	x29, x0, #VCPU_CONTEXT
>>  
>> +	// mte_switch_to_guest(g_ctxt, h_ctxt, tmp1)
>> +	mte_switch_to_guest x29, x1, x2
>> +
>>  	// Macro ptrauth_switch_to_guest format:
>>  	// 	ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3)
>>  	// The below macro to restore guest keys is not implemented in C code
>> @@ -142,6 +146,9 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL)
>>  	// when this feature is enabled for kernel code.
>>  	ptrauth_switch_to_hyp x1, x2, x3, x4, x5
>>  
>> +	// mte_switch_to_hyp(g_ctxt, h_ctxt, reg1)
>> +	mte_switch_to_hyp x1, x2, x3
>> +
>>  	// Restore hyp's sp_el0
>>  	restore_sp_el0 x2, x3
>>  
>> diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
>> index cce43bfe158f..de7e14c862e6 100644
>> --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
>> +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
>> @@ -14,6 +14,7 @@
>>  #include <asm/kvm_asm.h>
>>  #include <asm/kvm_emulate.h>
>>  #include <asm/kvm_hyp.h>
>> +#include <asm/kvm_mmu.h>
>>  
>>  static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
>>  {
>> @@ -26,6 +27,16 @@ static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
>>  	ctxt_sys_reg(ctxt, TPIDRRO_EL0)	= read_sysreg(tpidrro_el0);
>>  }
>>  
>> +static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt)
>> +{
>> +	struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu;
>> +
>> +	if (!vcpu)
>> +		vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt);
>> +
>> +	return kvm_has_mte(kern_hyp_va(vcpu->kvm));
>> +}
>> +
>>  static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
>>  {
>>  	ctxt_sys_reg(ctxt, CSSELR_EL1)	= read_sysreg(csselr_el1);
>> @@ -46,6 +57,11 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
>>  	ctxt_sys_reg(ctxt, PAR_EL1)	= read_sysreg_par();
>>  	ctxt_sys_reg(ctxt, TPIDR_EL1)	= read_sysreg(tpidr_el1);
>>  
>> +	if (ctxt_has_mte(ctxt)) {
>> +		ctxt_sys_reg(ctxt, TFSR_EL1) = read_sysreg_el1(SYS_TFSR);
>> +		ctxt_sys_reg(ctxt, TFSRE0_EL1) = read_sysreg_s(SYS_TFSRE0_EL1);
>> +	}
> 
> I remember suggesting that this is slightly heavier than necessary.
> 
> On nVHE, TFSRE0_EL1 could be moved to load/put, as we never run
> userspace with a vcpu loaded. The same holds of course for VHE, but we
> also can move TFSR_EL1 to load/put, as the host uses TFSR_EL2.
> 
> Do you see any issue with that?

The comment[1] I made before was:

  For TFSR_EL1 + VHE I believe it is synchronised only on vcpu_load/put -
  __sysreg_save_el1_state() is called from kvm_vcpu_load_sysregs_vhe().

  TFSRE0_EL1 potentially could be improved. I have to admit I was unsure
  if it should be in __sysreg_save_user_state() instead. However AFAICT
  that is called at the same time as __sysreg_save_el1_state() and there's
  no optimisation for nVHE. And given it's an _EL1 register this seemed
  like the logic place.

  Am I missing something here? Potentially there are other registers to be
  optimised (TPIDRRO_EL0 looks like a possiblity), but IMHO that doesn't
  belong in this series.

For VHE TFSR_EL1 is already only saved/restored on load/put
(__sysreg_save_el1_state() is called from kvm_vcpu_put_sysregs_vhe()).

TFSRE0_EL1 could be moved, but I'm not sure where it should live as I
mentioned above.

[1] https://lore.kernel.org/kvmarm/b16b65b5-d27f-7f86-fe0c-38a951e7d3ae@arm.com/

Thanks,

Steve

>> +
>>  	ctxt_sys_reg(ctxt, SP_EL1)	= read_sysreg(sp_el1);
>>  	ctxt_sys_reg(ctxt, ELR_EL1)	= read_sysreg_el1(SYS_ELR);
>>  	ctxt_sys_reg(ctxt, SPSR_EL1)	= read_sysreg_el1(SYS_SPSR);
>> @@ -107,6 +123,11 @@ static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
>>  	write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1),	par_el1);
>>  	write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1),	tpidr_el1);
>>  
>> +	if (ctxt_has_mte(ctxt)) {
>> +		write_sysreg_el1(ctxt_sys_reg(ctxt, TFSR_EL1), SYS_TFSR);
>> +		write_sysreg_s(ctxt_sys_reg(ctxt, TFSRE0_EL1), SYS_TFSRE0_EL1);
>> +	}
>> +
>>  	if (!has_vhe() &&
>>  	    cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) &&
>>  	    ctxt->__hyp_running_vcpu) {
>> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
>> index 24a844cb79ca..88adbc2286f2 100644
>> --- a/arch/arm64/kvm/sys_regs.c
>> +++ b/arch/arm64/kvm/sys_regs.c
>> @@ -1305,6 +1305,20 @@ static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
>>  	return true;
>>  }
>>  
>> +static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
>> +				   const struct sys_reg_desc *rd)
>> +{
>> +	return REG_HIDDEN;
>> +}
>> +
>> +#define MTE_REG(name) {				\
>> +	SYS_DESC(SYS_##name),			\
>> +	.access = undef_access,			\
>> +	.reset = reset_unknown,			\
>> +	.reg = name,				\
>> +	.visibility = mte_visibility,		\
>> +}
>> +
>>  /* sys_reg_desc initialiser for known cpufeature ID registers */
>>  #define ID_SANITISED(name) {			\
>>  	SYS_DESC(SYS_##name),			\
>> @@ -1473,8 +1487,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
>>  	{ SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 },
>>  	{ SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 },
>>  
>> -	{ SYS_DESC(SYS_RGSR_EL1), undef_access },
>> -	{ SYS_DESC(SYS_GCR_EL1), undef_access },
>> +	MTE_REG(RGSR_EL1),
>> +	MTE_REG(GCR_EL1),
>>  
>>  	{ SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility },
>>  	{ SYS_DESC(SYS_TRFCR_EL1), undef_access },
>> @@ -1501,8 +1515,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
>>  	{ SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi },
>>  	{ SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi },
>>  
>> -	{ SYS_DESC(SYS_TFSR_EL1), undef_access },
>> -	{ SYS_DESC(SYS_TFSRE0_EL1), undef_access },
>> +	MTE_REG(TFSR_EL1),
>> +	MTE_REG(TFSRE0_EL1),
>>  
>>  	{ SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 },
>>  	{ SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 },
> 
> Thanks,
> 
> 	M.
>
Marc Zyngier May 20, 2021, 9:46 a.m. UTC | #3
On Wed, 19 May 2021 14:04:20 +0100,
Steven Price <steven.price@arm.com> wrote:
> 
> On 17/05/2021 18:17, Marc Zyngier wrote:
> > On Mon, 17 May 2021 13:32:36 +0100,
> > Steven Price <steven.price@arm.com> wrote:
> >>
> >> Define the new system registers that MTE introduces and context switch
> >> them. The MTE feature is still hidden from the ID register as it isn't
> >> supported in a VM yet.
> >>
> >> Signed-off-by: Steven Price <steven.price@arm.com>
> >> ---
> >>  arch/arm64/include/asm/kvm_host.h          |  6 ++
> >>  arch/arm64/include/asm/kvm_mte.h           | 66 ++++++++++++++++++++++
> >>  arch/arm64/include/asm/sysreg.h            |  3 +-
> >>  arch/arm64/kernel/asm-offsets.c            |  3 +
> >>  arch/arm64/kvm/hyp/entry.S                 |  7 +++
> >>  arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 21 +++++++
> >>  arch/arm64/kvm/sys_regs.c                  | 22 ++++++--
> >>  7 files changed, 123 insertions(+), 5 deletions(-)
> >>  create mode 100644 arch/arm64/include/asm/kvm_mte.h
> >>
> >> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> >> index afaa5333f0e4..309e36cc1b42 100644
> >> --- a/arch/arm64/include/asm/kvm_host.h
> >> +++ b/arch/arm64/include/asm/kvm_host.h
> >> @@ -208,6 +208,12 @@ enum vcpu_sysreg {
> >>  	CNTP_CVAL_EL0,
> >>  	CNTP_CTL_EL0,
> >>  
> >> +	/* Memory Tagging Extension registers */
> >> +	RGSR_EL1,	/* Random Allocation Tag Seed Register */
> >> +	GCR_EL1,	/* Tag Control Register */
> >> +	TFSR_EL1,	/* Tag Fault Status Register (EL1) */
> >> +	TFSRE0_EL1,	/* Tag Fault Status Register (EL0) */
> >> +
> >>  	/* 32bit specific registers. Keep them at the end of the range */
> >>  	DACR32_EL2,	/* Domain Access Control Register */
> >>  	IFSR32_EL2,	/* Instruction Fault Status Register */
> >> diff --git a/arch/arm64/include/asm/kvm_mte.h b/arch/arm64/include/asm/kvm_mte.h
> >> new file mode 100644
> >> index 000000000000..6541c7d6ce06
> >> --- /dev/null
> >> +++ b/arch/arm64/include/asm/kvm_mte.h
> >> @@ -0,0 +1,66 @@
> >> +/* SPDX-License-Identifier: GPL-2.0 */
> >> +/*
> >> + * Copyright (C) 2020 ARM Ltd.
> >> + */
> >> +#ifndef __ASM_KVM_MTE_H
> >> +#define __ASM_KVM_MTE_H
> >> +
> >> +#ifdef __ASSEMBLY__
> >> +
> >> +#include <asm/sysreg.h>
> >> +
> >> +#ifdef CONFIG_ARM64_MTE
> >> +
> >> +.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1
> >> +alternative_if_not ARM64_MTE
> >> +	b	.L__skip_switch\@
> >> +alternative_else_nop_endif
> >> +	mrs	\reg1, hcr_el2
> >> +	and	\reg1, \reg1, #(HCR_ATA)
> >> +	cbz	\reg1, .L__skip_switch\@
> >> +
> >> +	mrs_s	\reg1, SYS_RGSR_EL1
> >> +	str	\reg1, [\h_ctxt, #CPU_RGSR_EL1]
> >> +	mrs_s	\reg1, SYS_GCR_EL1
> >> +	str	\reg1, [\h_ctxt, #CPU_GCR_EL1]
> >> +
> >> +	ldr	\reg1, [\g_ctxt, #CPU_RGSR_EL1]
> >> +	msr_s	SYS_RGSR_EL1, \reg1
> >> +	ldr	\reg1, [\g_ctxt, #CPU_GCR_EL1]
> >> +	msr_s	SYS_GCR_EL1, \reg1
> >> +
> >> +.L__skip_switch\@:
> >> +.endm
> >> +
> >> +.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1
> >> +alternative_if_not ARM64_MTE
> >> +	b	.L__skip_switch\@
> >> +alternative_else_nop_endif
> >> +	mrs	\reg1, hcr_el2
> >> +	and	\reg1, \reg1, #(HCR_ATA)
> >> +	cbz	\reg1, .L__skip_switch\@
> >> +
> >> +	mrs_s	\reg1, SYS_RGSR_EL1
> >> +	str	\reg1, [\g_ctxt, #CPU_RGSR_EL1]
> >> +	mrs_s	\reg1, SYS_GCR_EL1
> >> +	str	\reg1, [\g_ctxt, #CPU_GCR_EL1]
> >> +
> >> +	ldr	\reg1, [\h_ctxt, #CPU_RGSR_EL1]
> >> +	msr_s	SYS_RGSR_EL1, \reg1
> >> +	ldr	\reg1, [\h_ctxt, #CPU_GCR_EL1]
> >> +	msr_s	SYS_GCR_EL1, \reg1
> > 
> > What is the rational for not having any synchronisation here? It is
> > quite uncommon to allocate memory at EL2, but VHE can perform all kind
> > of tricks.
> 
> I don't follow. This is part of the __guest_exit path and there's an ISB
> at the end of that - is that not sufficient? I don't see any possibility
> for allocating memory before that. What am I missing?

Which ISB?  We have a few in the SError handling code, but that's
conditioned on not having RAS. With any RAS-enabled CPU, we return to
C code early, since we don't need any extra synchronisation (see the
comment about the absence of ISB on this path).

I would really like to ensure that we return to C code in the exact
state we left it.

> 
> >> +
> >> +.L__skip_switch\@:
> >> +.endm
> >> +
> >> +#else /* CONFIG_ARM64_MTE */
> >> +
> >> +.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1
> >> +.endm
> >> +
> >> +.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1
> >> +.endm
> >> +
> >> +#endif /* CONFIG_ARM64_MTE */
> >> +#endif /* __ASSEMBLY__ */
> >> +#endif /* __ASM_KVM_MTE_H */
> >> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
> >> index 65d15700a168..347ccac2341e 100644
> >> --- a/arch/arm64/include/asm/sysreg.h
> >> +++ b/arch/arm64/include/asm/sysreg.h
> >> @@ -651,7 +651,8 @@
> >>  
> >>  #define INIT_SCTLR_EL2_MMU_ON						\
> >>  	(SCTLR_ELx_M  | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_ELx_I |	\
> >> -	 SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | SCTLR_EL2_RES1)
> >> +	 SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 |		\
> >> +	 SCTLR_ELx_ITFSB | SCTLR_EL2_RES1)
> >>  
> >>  #define INIT_SCTLR_EL2_MMU_OFF \
> >>  	(SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
> >> diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
> >> index 0cb34ccb6e73..6b489a8462f0 100644
> >> --- a/arch/arm64/kernel/asm-offsets.c
> >> +++ b/arch/arm64/kernel/asm-offsets.c
> >> @@ -111,6 +111,9 @@ int main(void)
> >>    DEFINE(VCPU_WORKAROUND_FLAGS,	offsetof(struct kvm_vcpu, arch.workaround_flags));
> >>    DEFINE(VCPU_HCR_EL2,		offsetof(struct kvm_vcpu, arch.hcr_el2));
> >>    DEFINE(CPU_USER_PT_REGS,	offsetof(struct kvm_cpu_context, regs));
> >> +  DEFINE(CPU_RGSR_EL1,		offsetof(struct kvm_cpu_context, sys_regs[RGSR_EL1]));
> >> +  DEFINE(CPU_GCR_EL1,		offsetof(struct kvm_cpu_context, sys_regs[GCR_EL1]));
> >> +  DEFINE(CPU_TFSRE0_EL1,	offsetof(struct kvm_cpu_context, sys_regs[TFSRE0_EL1]));
> > 
> > TFSRE0_EL1 is never accessed from assembly code. Leftover from a
> > previous version?
> 
> Indeed, I will drop it.
> 
> >>    DEFINE(CPU_APIAKEYLO_EL1,	offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1]));
> >>    DEFINE(CPU_APIBKEYLO_EL1,	offsetof(struct kvm_cpu_context, sys_regs[APIBKEYLO_EL1]));
> >>    DEFINE(CPU_APDAKEYLO_EL1,	offsetof(struct kvm_cpu_context, sys_regs[APDAKEYLO_EL1]));
> >> diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
> >> index e831d3dfd50d..435346ea1504 100644
> >> --- a/arch/arm64/kvm/hyp/entry.S
> >> +++ b/arch/arm64/kvm/hyp/entry.S
> >> @@ -13,6 +13,7 @@
> >>  #include <asm/kvm_arm.h>
> >>  #include <asm/kvm_asm.h>
> >>  #include <asm/kvm_mmu.h>
> >> +#include <asm/kvm_mte.h>
> >>  #include <asm/kvm_ptrauth.h>
> >>  
> >>  	.text
> >> @@ -51,6 +52,9 @@ alternative_else_nop_endif
> >>  
> >>  	add	x29, x0, #VCPU_CONTEXT
> >>  
> >> +	// mte_switch_to_guest(g_ctxt, h_ctxt, tmp1)
> >> +	mte_switch_to_guest x29, x1, x2
> >> +
> >>  	// Macro ptrauth_switch_to_guest format:
> >>  	// 	ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3)
> >>  	// The below macro to restore guest keys is not implemented in C code
> >> @@ -142,6 +146,9 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL)
> >>  	// when this feature is enabled for kernel code.
> >>  	ptrauth_switch_to_hyp x1, x2, x3, x4, x5
> >>  
> >> +	// mte_switch_to_hyp(g_ctxt, h_ctxt, reg1)
> >> +	mte_switch_to_hyp x1, x2, x3
> >> +
> >>  	// Restore hyp's sp_el0
> >>  	restore_sp_el0 x2, x3
> >>  
> >> diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
> >> index cce43bfe158f..de7e14c862e6 100644
> >> --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
> >> +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
> >> @@ -14,6 +14,7 @@
> >>  #include <asm/kvm_asm.h>
> >>  #include <asm/kvm_emulate.h>
> >>  #include <asm/kvm_hyp.h>
> >> +#include <asm/kvm_mmu.h>
> >>  
> >>  static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
> >>  {
> >> @@ -26,6 +27,16 @@ static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
> >>  	ctxt_sys_reg(ctxt, TPIDRRO_EL0)	= read_sysreg(tpidrro_el0);
> >>  }
> >>  
> >> +static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt)
> >> +{
> >> +	struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu;
> >> +
> >> +	if (!vcpu)
> >> +		vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt);
> >> +
> >> +	return kvm_has_mte(kern_hyp_va(vcpu->kvm));
> >> +}
> >> +
> >>  static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
> >>  {
> >>  	ctxt_sys_reg(ctxt, CSSELR_EL1)	= read_sysreg(csselr_el1);
> >> @@ -46,6 +57,11 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
> >>  	ctxt_sys_reg(ctxt, PAR_EL1)	= read_sysreg_par();
> >>  	ctxt_sys_reg(ctxt, TPIDR_EL1)	= read_sysreg(tpidr_el1);
> >>  
> >> +	if (ctxt_has_mte(ctxt)) {
> >> +		ctxt_sys_reg(ctxt, TFSR_EL1) = read_sysreg_el1(SYS_TFSR);
> >> +		ctxt_sys_reg(ctxt, TFSRE0_EL1) = read_sysreg_s(SYS_TFSRE0_EL1);
> >> +	}
> > 
> > I remember suggesting that this is slightly heavier than necessary.
> > 
> > On nVHE, TFSRE0_EL1 could be moved to load/put, as we never run
> > userspace with a vcpu loaded. The same holds of course for VHE, but we
> > also can move TFSR_EL1 to load/put, as the host uses TFSR_EL2.
> > 
> > Do you see any issue with that?
> 
> The comment[1] I made before was:

Ah, I totally missed this email (or can't remember reading it, which
amounts to the same thing). Apologies for that.

>   For TFSR_EL1 + VHE I believe it is synchronised only on vcpu_load/put -
>   __sysreg_save_el1_state() is called from kvm_vcpu_load_sysregs_vhe().
> 
>   TFSRE0_EL1 potentially could be improved. I have to admit I was unsure
>   if it should be in __sysreg_save_user_state() instead. However AFAICT
>   that is called at the same time as __sysreg_save_el1_state() and there's
>   no optimisation for nVHE. And given it's an _EL1 register this seemed
>   like the logic place.
>
>   Am I missing something here? Potentially there are other registers to be
>   optimised (TPIDRRO_EL0 looks like a possiblity), but IMHO that doesn't
>   belong in this series.
> 
> For VHE TFSR_EL1 is already only saved/restored on load/put
> (__sysreg_save_el1_state() is called from kvm_vcpu_put_sysregs_vhe()).
> 
> TFSRE0_EL1 could be moved, but I'm not sure where it should live as I
> mentioned above.

Yeah, this looks fine, please ignore my rambling.

Thanks,

	M.
Steven Price May 20, 2021, 3:21 p.m. UTC | #4
On 20/05/2021 10:46, Marc Zyngier wrote:
> On Wed, 19 May 2021 14:04:20 +0100,
> Steven Price <steven.price@arm.com> wrote:
>>
>> On 17/05/2021 18:17, Marc Zyngier wrote:
>>> On Mon, 17 May 2021 13:32:36 +0100,
>>> Steven Price <steven.price@arm.com> wrote:
>>>>
>>>> Define the new system registers that MTE introduces and context switch
>>>> them. The MTE feature is still hidden from the ID register as it isn't
>>>> supported in a VM yet.
>>>>
>>>> Signed-off-by: Steven Price <steven.price@arm.com>
>>>> ---
>>>>  arch/arm64/include/asm/kvm_host.h          |  6 ++
>>>>  arch/arm64/include/asm/kvm_mte.h           | 66 ++++++++++++++++++++++
>>>>  arch/arm64/include/asm/sysreg.h            |  3 +-
>>>>  arch/arm64/kernel/asm-offsets.c            |  3 +
>>>>  arch/arm64/kvm/hyp/entry.S                 |  7 +++
>>>>  arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h | 21 +++++++
>>>>  arch/arm64/kvm/sys_regs.c                  | 22 ++++++--
>>>>  7 files changed, 123 insertions(+), 5 deletions(-)
>>>>  create mode 100644 arch/arm64/include/asm/kvm_mte.h
>>>>
>>>> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
>>>> index afaa5333f0e4..309e36cc1b42 100644
>>>> --- a/arch/arm64/include/asm/kvm_host.h
>>>> +++ b/arch/arm64/include/asm/kvm_host.h
>>>> @@ -208,6 +208,12 @@ enum vcpu_sysreg {
>>>>  	CNTP_CVAL_EL0,
>>>>  	CNTP_CTL_EL0,
>>>>  
>>>> +	/* Memory Tagging Extension registers */
>>>> +	RGSR_EL1,	/* Random Allocation Tag Seed Register */
>>>> +	GCR_EL1,	/* Tag Control Register */
>>>> +	TFSR_EL1,	/* Tag Fault Status Register (EL1) */
>>>> +	TFSRE0_EL1,	/* Tag Fault Status Register (EL0) */
>>>> +
>>>>  	/* 32bit specific registers. Keep them at the end of the range */
>>>>  	DACR32_EL2,	/* Domain Access Control Register */
>>>>  	IFSR32_EL2,	/* Instruction Fault Status Register */
>>>> diff --git a/arch/arm64/include/asm/kvm_mte.h b/arch/arm64/include/asm/kvm_mte.h
>>>> new file mode 100644
>>>> index 000000000000..6541c7d6ce06
>>>> --- /dev/null
>>>> +++ b/arch/arm64/include/asm/kvm_mte.h
>>>> @@ -0,0 +1,66 @@
>>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>>> +/*
>>>> + * Copyright (C) 2020 ARM Ltd.
>>>> + */
>>>> +#ifndef __ASM_KVM_MTE_H
>>>> +#define __ASM_KVM_MTE_H
>>>> +
>>>> +#ifdef __ASSEMBLY__
>>>> +
>>>> +#include <asm/sysreg.h>
>>>> +
>>>> +#ifdef CONFIG_ARM64_MTE
>>>> +
>>>> +.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1
>>>> +alternative_if_not ARM64_MTE
>>>> +	b	.L__skip_switch\@
>>>> +alternative_else_nop_endif
>>>> +	mrs	\reg1, hcr_el2
>>>> +	and	\reg1, \reg1, #(HCR_ATA)
>>>> +	cbz	\reg1, .L__skip_switch\@
>>>> +
>>>> +	mrs_s	\reg1, SYS_RGSR_EL1
>>>> +	str	\reg1, [\h_ctxt, #CPU_RGSR_EL1]
>>>> +	mrs_s	\reg1, SYS_GCR_EL1
>>>> +	str	\reg1, [\h_ctxt, #CPU_GCR_EL1]
>>>> +
>>>> +	ldr	\reg1, [\g_ctxt, #CPU_RGSR_EL1]
>>>> +	msr_s	SYS_RGSR_EL1, \reg1
>>>> +	ldr	\reg1, [\g_ctxt, #CPU_GCR_EL1]
>>>> +	msr_s	SYS_GCR_EL1, \reg1
>>>> +
>>>> +.L__skip_switch\@:
>>>> +.endm
>>>> +
>>>> +.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1
>>>> +alternative_if_not ARM64_MTE
>>>> +	b	.L__skip_switch\@
>>>> +alternative_else_nop_endif
>>>> +	mrs	\reg1, hcr_el2
>>>> +	and	\reg1, \reg1, #(HCR_ATA)
>>>> +	cbz	\reg1, .L__skip_switch\@
>>>> +
>>>> +	mrs_s	\reg1, SYS_RGSR_EL1
>>>> +	str	\reg1, [\g_ctxt, #CPU_RGSR_EL1]
>>>> +	mrs_s	\reg1, SYS_GCR_EL1
>>>> +	str	\reg1, [\g_ctxt, #CPU_GCR_EL1]
>>>> +
>>>> +	ldr	\reg1, [\h_ctxt, #CPU_RGSR_EL1]
>>>> +	msr_s	SYS_RGSR_EL1, \reg1
>>>> +	ldr	\reg1, [\h_ctxt, #CPU_GCR_EL1]
>>>> +	msr_s	SYS_GCR_EL1, \reg1
>>>
>>> What is the rational for not having any synchronisation here? It is
>>> quite uncommon to allocate memory at EL2, but VHE can perform all kind
>>> of tricks.
>>
>> I don't follow. This is part of the __guest_exit path and there's an ISB
>> at the end of that - is that not sufficient? I don't see any possibility
>> for allocating memory before that. What am I missing?
> 
> Which ISB?  We have a few in the SError handling code, but that's
> conditioned on not having RAS. With any RAS-enabled CPU, we return to
> C code early, since we don't need any extra synchronisation (see the
> comment about the absence of ISB on this path).

Ah, I clearly didn't read the code (or comment) carefully enough -
indeed with RAS we're potentially skipping the ISB.

> I would really like to ensure that we return to C code in the exact
> state we left it.

Agreed, I'll stick an ISB at the end of mte_switch_to_hyp. Although
there's clearly room for optimisation here as ptrauth_switch_to_hyp has
a similar ISB.

>>
>>>> +
>>>> +.L__skip_switch\@:
>>>> +.endm
>>>> +
>>>> +#else /* CONFIG_ARM64_MTE */
>>>> +
>>>> +.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1
>>>> +.endm
>>>> +
>>>> +.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1
>>>> +.endm
>>>> +
>>>> +#endif /* CONFIG_ARM64_MTE */
>>>> +#endif /* __ASSEMBLY__ */
>>>> +#endif /* __ASM_KVM_MTE_H */
>>>> diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
>>>> index 65d15700a168..347ccac2341e 100644
>>>> --- a/arch/arm64/include/asm/sysreg.h
>>>> +++ b/arch/arm64/include/asm/sysreg.h
>>>> @@ -651,7 +651,8 @@
>>>>  
>>>>  #define INIT_SCTLR_EL2_MMU_ON						\
>>>>  	(SCTLR_ELx_M  | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_ELx_I |	\
>>>> -	 SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | SCTLR_EL2_RES1)
>>>> +	 SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 |		\
>>>> +	 SCTLR_ELx_ITFSB | SCTLR_EL2_RES1)
>>>>  
>>>>  #define INIT_SCTLR_EL2_MMU_OFF \
>>>>  	(SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
>>>> diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
>>>> index 0cb34ccb6e73..6b489a8462f0 100644
>>>> --- a/arch/arm64/kernel/asm-offsets.c
>>>> +++ b/arch/arm64/kernel/asm-offsets.c
>>>> @@ -111,6 +111,9 @@ int main(void)
>>>>    DEFINE(VCPU_WORKAROUND_FLAGS,	offsetof(struct kvm_vcpu, arch.workaround_flags));
>>>>    DEFINE(VCPU_HCR_EL2,		offsetof(struct kvm_vcpu, arch.hcr_el2));
>>>>    DEFINE(CPU_USER_PT_REGS,	offsetof(struct kvm_cpu_context, regs));
>>>> +  DEFINE(CPU_RGSR_EL1,		offsetof(struct kvm_cpu_context, sys_regs[RGSR_EL1]));
>>>> +  DEFINE(CPU_GCR_EL1,		offsetof(struct kvm_cpu_context, sys_regs[GCR_EL1]));
>>>> +  DEFINE(CPU_TFSRE0_EL1,	offsetof(struct kvm_cpu_context, sys_regs[TFSRE0_EL1]));
>>>
>>> TFSRE0_EL1 is never accessed from assembly code. Leftover from a
>>> previous version?
>>
>> Indeed, I will drop it.
>>
>>>>    DEFINE(CPU_APIAKEYLO_EL1,	offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1]));
>>>>    DEFINE(CPU_APIBKEYLO_EL1,	offsetof(struct kvm_cpu_context, sys_regs[APIBKEYLO_EL1]));
>>>>    DEFINE(CPU_APDAKEYLO_EL1,	offsetof(struct kvm_cpu_context, sys_regs[APDAKEYLO_EL1]));
>>>> diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
>>>> index e831d3dfd50d..435346ea1504 100644
>>>> --- a/arch/arm64/kvm/hyp/entry.S
>>>> +++ b/arch/arm64/kvm/hyp/entry.S
>>>> @@ -13,6 +13,7 @@
>>>>  #include <asm/kvm_arm.h>
>>>>  #include <asm/kvm_asm.h>
>>>>  #include <asm/kvm_mmu.h>
>>>> +#include <asm/kvm_mte.h>
>>>>  #include <asm/kvm_ptrauth.h>
>>>>  
>>>>  	.text
>>>> @@ -51,6 +52,9 @@ alternative_else_nop_endif
>>>>  
>>>>  	add	x29, x0, #VCPU_CONTEXT
>>>>  
>>>> +	// mte_switch_to_guest(g_ctxt, h_ctxt, tmp1)
>>>> +	mte_switch_to_guest x29, x1, x2
>>>> +
>>>>  	// Macro ptrauth_switch_to_guest format:
>>>>  	// 	ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3)
>>>>  	// The below macro to restore guest keys is not implemented in C code
>>>> @@ -142,6 +146,9 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL)
>>>>  	// when this feature is enabled for kernel code.
>>>>  	ptrauth_switch_to_hyp x1, x2, x3, x4, x5
>>>>  
>>>> +	// mte_switch_to_hyp(g_ctxt, h_ctxt, reg1)
>>>> +	mte_switch_to_hyp x1, x2, x3
>>>> +
>>>>  	// Restore hyp's sp_el0
>>>>  	restore_sp_el0 x2, x3
>>>>  
>>>> diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
>>>> index cce43bfe158f..de7e14c862e6 100644
>>>> --- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
>>>> +++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
>>>> @@ -14,6 +14,7 @@
>>>>  #include <asm/kvm_asm.h>
>>>>  #include <asm/kvm_emulate.h>
>>>>  #include <asm/kvm_hyp.h>
>>>> +#include <asm/kvm_mmu.h>
>>>>  
>>>>  static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
>>>>  {
>>>> @@ -26,6 +27,16 @@ static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
>>>>  	ctxt_sys_reg(ctxt, TPIDRRO_EL0)	= read_sysreg(tpidrro_el0);
>>>>  }
>>>>  
>>>> +static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt)
>>>> +{
>>>> +	struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu;
>>>> +
>>>> +	if (!vcpu)
>>>> +		vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt);
>>>> +
>>>> +	return kvm_has_mte(kern_hyp_va(vcpu->kvm));
>>>> +}
>>>> +
>>>>  static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
>>>>  {
>>>>  	ctxt_sys_reg(ctxt, CSSELR_EL1)	= read_sysreg(csselr_el1);
>>>> @@ -46,6 +57,11 @@ static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
>>>>  	ctxt_sys_reg(ctxt, PAR_EL1)	= read_sysreg_par();
>>>>  	ctxt_sys_reg(ctxt, TPIDR_EL1)	= read_sysreg(tpidr_el1);
>>>>  
>>>> +	if (ctxt_has_mte(ctxt)) {
>>>> +		ctxt_sys_reg(ctxt, TFSR_EL1) = read_sysreg_el1(SYS_TFSR);
>>>> +		ctxt_sys_reg(ctxt, TFSRE0_EL1) = read_sysreg_s(SYS_TFSRE0_EL1);
>>>> +	}
>>>
>>> I remember suggesting that this is slightly heavier than necessary.
>>>
>>> On nVHE, TFSRE0_EL1 could be moved to load/put, as we never run
>>> userspace with a vcpu loaded. The same holds of course for VHE, but we
>>> also can move TFSR_EL1 to load/put, as the host uses TFSR_EL2.
>>>
>>> Do you see any issue with that?
>>
>> The comment[1] I made before was:
> 
> Ah, I totally missed this email (or can't remember reading it, which
> amounts to the same thing). Apologies for that.
> 
>>   For TFSR_EL1 + VHE I believe it is synchronised only on vcpu_load/put -
>>   __sysreg_save_el1_state() is called from kvm_vcpu_load_sysregs_vhe().
>>
>>   TFSRE0_EL1 potentially could be improved. I have to admit I was unsure
>>   if it should be in __sysreg_save_user_state() instead. However AFAICT
>>   that is called at the same time as __sysreg_save_el1_state() and there's
>>   no optimisation for nVHE. And given it's an _EL1 register this seemed
>>   like the logic place.
>>
>>   Am I missing something here? Potentially there are other registers to be
>>   optimised (TPIDRRO_EL0 looks like a possiblity), but IMHO that doesn't
>>   belong in this series.
>>
>> For VHE TFSR_EL1 is already only saved/restored on load/put
>> (__sysreg_save_el1_state() is called from kvm_vcpu_put_sysregs_vhe()).
>>
>> TFSRE0_EL1 could be moved, but I'm not sure where it should live as I
>> mentioned above.
> 
> Yeah, this looks fine, please ignore my rambling.

No problem!

Thanks,

Steve
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index afaa5333f0e4..309e36cc1b42 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -208,6 +208,12 @@  enum vcpu_sysreg {
 	CNTP_CVAL_EL0,
 	CNTP_CTL_EL0,
 
+	/* Memory Tagging Extension registers */
+	RGSR_EL1,	/* Random Allocation Tag Seed Register */
+	GCR_EL1,	/* Tag Control Register */
+	TFSR_EL1,	/* Tag Fault Status Register (EL1) */
+	TFSRE0_EL1,	/* Tag Fault Status Register (EL0) */
+
 	/* 32bit specific registers. Keep them at the end of the range */
 	DACR32_EL2,	/* Domain Access Control Register */
 	IFSR32_EL2,	/* Instruction Fault Status Register */
diff --git a/arch/arm64/include/asm/kvm_mte.h b/arch/arm64/include/asm/kvm_mte.h
new file mode 100644
index 000000000000..6541c7d6ce06
--- /dev/null
+++ b/arch/arm64/include/asm/kvm_mte.h
@@ -0,0 +1,66 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 ARM Ltd.
+ */
+#ifndef __ASM_KVM_MTE_H
+#define __ASM_KVM_MTE_H
+
+#ifdef __ASSEMBLY__
+
+#include <asm/sysreg.h>
+
+#ifdef CONFIG_ARM64_MTE
+
+.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1
+alternative_if_not ARM64_MTE
+	b	.L__skip_switch\@
+alternative_else_nop_endif
+	mrs	\reg1, hcr_el2
+	and	\reg1, \reg1, #(HCR_ATA)
+	cbz	\reg1, .L__skip_switch\@
+
+	mrs_s	\reg1, SYS_RGSR_EL1
+	str	\reg1, [\h_ctxt, #CPU_RGSR_EL1]
+	mrs_s	\reg1, SYS_GCR_EL1
+	str	\reg1, [\h_ctxt, #CPU_GCR_EL1]
+
+	ldr	\reg1, [\g_ctxt, #CPU_RGSR_EL1]
+	msr_s	SYS_RGSR_EL1, \reg1
+	ldr	\reg1, [\g_ctxt, #CPU_GCR_EL1]
+	msr_s	SYS_GCR_EL1, \reg1
+
+.L__skip_switch\@:
+.endm
+
+.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1
+alternative_if_not ARM64_MTE
+	b	.L__skip_switch\@
+alternative_else_nop_endif
+	mrs	\reg1, hcr_el2
+	and	\reg1, \reg1, #(HCR_ATA)
+	cbz	\reg1, .L__skip_switch\@
+
+	mrs_s	\reg1, SYS_RGSR_EL1
+	str	\reg1, [\g_ctxt, #CPU_RGSR_EL1]
+	mrs_s	\reg1, SYS_GCR_EL1
+	str	\reg1, [\g_ctxt, #CPU_GCR_EL1]
+
+	ldr	\reg1, [\h_ctxt, #CPU_RGSR_EL1]
+	msr_s	SYS_RGSR_EL1, \reg1
+	ldr	\reg1, [\h_ctxt, #CPU_GCR_EL1]
+	msr_s	SYS_GCR_EL1, \reg1
+
+.L__skip_switch\@:
+.endm
+
+#else /* CONFIG_ARM64_MTE */
+
+.macro mte_switch_to_guest g_ctxt, h_ctxt, reg1
+.endm
+
+.macro mte_switch_to_hyp g_ctxt, h_ctxt, reg1
+.endm
+
+#endif /* CONFIG_ARM64_MTE */
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_KVM_MTE_H */
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 65d15700a168..347ccac2341e 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -651,7 +651,8 @@ 
 
 #define INIT_SCTLR_EL2_MMU_ON						\
 	(SCTLR_ELx_M  | SCTLR_ELx_C | SCTLR_ELx_SA | SCTLR_ELx_I |	\
-	 SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 | SCTLR_EL2_RES1)
+	 SCTLR_ELx_IESB | SCTLR_ELx_WXN | ENDIAN_SET_EL2 |		\
+	 SCTLR_ELx_ITFSB | SCTLR_EL2_RES1)
 
 #define INIT_SCTLR_EL2_MMU_OFF \
 	(SCTLR_EL2_RES1 | ENDIAN_SET_EL2)
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 0cb34ccb6e73..6b489a8462f0 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -111,6 +111,9 @@  int main(void)
   DEFINE(VCPU_WORKAROUND_FLAGS,	offsetof(struct kvm_vcpu, arch.workaround_flags));
   DEFINE(VCPU_HCR_EL2,		offsetof(struct kvm_vcpu, arch.hcr_el2));
   DEFINE(CPU_USER_PT_REGS,	offsetof(struct kvm_cpu_context, regs));
+  DEFINE(CPU_RGSR_EL1,		offsetof(struct kvm_cpu_context, sys_regs[RGSR_EL1]));
+  DEFINE(CPU_GCR_EL1,		offsetof(struct kvm_cpu_context, sys_regs[GCR_EL1]));
+  DEFINE(CPU_TFSRE0_EL1,	offsetof(struct kvm_cpu_context, sys_regs[TFSRE0_EL1]));
   DEFINE(CPU_APIAKEYLO_EL1,	offsetof(struct kvm_cpu_context, sys_regs[APIAKEYLO_EL1]));
   DEFINE(CPU_APIBKEYLO_EL1,	offsetof(struct kvm_cpu_context, sys_regs[APIBKEYLO_EL1]));
   DEFINE(CPU_APDAKEYLO_EL1,	offsetof(struct kvm_cpu_context, sys_regs[APDAKEYLO_EL1]));
diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index e831d3dfd50d..435346ea1504 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -13,6 +13,7 @@ 
 #include <asm/kvm_arm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmu.h>
+#include <asm/kvm_mte.h>
 #include <asm/kvm_ptrauth.h>
 
 	.text
@@ -51,6 +52,9 @@  alternative_else_nop_endif
 
 	add	x29, x0, #VCPU_CONTEXT
 
+	// mte_switch_to_guest(g_ctxt, h_ctxt, tmp1)
+	mte_switch_to_guest x29, x1, x2
+
 	// Macro ptrauth_switch_to_guest format:
 	// 	ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3)
 	// The below macro to restore guest keys is not implemented in C code
@@ -142,6 +146,9 @@  SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL)
 	// when this feature is enabled for kernel code.
 	ptrauth_switch_to_hyp x1, x2, x3, x4, x5
 
+	// mte_switch_to_hyp(g_ctxt, h_ctxt, reg1)
+	mte_switch_to_hyp x1, x2, x3
+
 	// Restore hyp's sp_el0
 	restore_sp_el0 x2, x3
 
diff --git a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
index cce43bfe158f..de7e14c862e6 100644
--- a/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
+++ b/arch/arm64/kvm/hyp/include/hyp/sysreg-sr.h
@@ -14,6 +14,7 @@ 
 #include <asm/kvm_asm.h>
 #include <asm/kvm_emulate.h>
 #include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
 
 static inline void __sysreg_save_common_state(struct kvm_cpu_context *ctxt)
 {
@@ -26,6 +27,16 @@  static inline void __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
 	ctxt_sys_reg(ctxt, TPIDRRO_EL0)	= read_sysreg(tpidrro_el0);
 }
 
+static inline bool ctxt_has_mte(struct kvm_cpu_context *ctxt)
+{
+	struct kvm_vcpu *vcpu = ctxt->__hyp_running_vcpu;
+
+	if (!vcpu)
+		vcpu = container_of(ctxt, struct kvm_vcpu, arch.ctxt);
+
+	return kvm_has_mte(kern_hyp_va(vcpu->kvm));
+}
+
 static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
 {
 	ctxt_sys_reg(ctxt, CSSELR_EL1)	= read_sysreg(csselr_el1);
@@ -46,6 +57,11 @@  static inline void __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
 	ctxt_sys_reg(ctxt, PAR_EL1)	= read_sysreg_par();
 	ctxt_sys_reg(ctxt, TPIDR_EL1)	= read_sysreg(tpidr_el1);
 
+	if (ctxt_has_mte(ctxt)) {
+		ctxt_sys_reg(ctxt, TFSR_EL1) = read_sysreg_el1(SYS_TFSR);
+		ctxt_sys_reg(ctxt, TFSRE0_EL1) = read_sysreg_s(SYS_TFSRE0_EL1);
+	}
+
 	ctxt_sys_reg(ctxt, SP_EL1)	= read_sysreg(sp_el1);
 	ctxt_sys_reg(ctxt, ELR_EL1)	= read_sysreg_el1(SYS_ELR);
 	ctxt_sys_reg(ctxt, SPSR_EL1)	= read_sysreg_el1(SYS_SPSR);
@@ -107,6 +123,11 @@  static inline void __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
 	write_sysreg(ctxt_sys_reg(ctxt, PAR_EL1),	par_el1);
 	write_sysreg(ctxt_sys_reg(ctxt, TPIDR_EL1),	tpidr_el1);
 
+	if (ctxt_has_mte(ctxt)) {
+		write_sysreg_el1(ctxt_sys_reg(ctxt, TFSR_EL1), SYS_TFSR);
+		write_sysreg_s(ctxt_sys_reg(ctxt, TFSRE0_EL1), SYS_TFSRE0_EL1);
+	}
+
 	if (!has_vhe() &&
 	    cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) &&
 	    ctxt->__hyp_running_vcpu) {
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 24a844cb79ca..88adbc2286f2 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1305,6 +1305,20 @@  static bool access_ccsidr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 	return true;
 }
 
+static unsigned int mte_visibility(const struct kvm_vcpu *vcpu,
+				   const struct sys_reg_desc *rd)
+{
+	return REG_HIDDEN;
+}
+
+#define MTE_REG(name) {				\
+	SYS_DESC(SYS_##name),			\
+	.access = undef_access,			\
+	.reset = reset_unknown,			\
+	.reg = name,				\
+	.visibility = mte_visibility,		\
+}
+
 /* sys_reg_desc initialiser for known cpufeature ID registers */
 #define ID_SANITISED(name) {			\
 	SYS_DESC(SYS_##name),			\
@@ -1473,8 +1487,8 @@  static const struct sys_reg_desc sys_reg_descs[] = {
 	{ SYS_DESC(SYS_ACTLR_EL1), access_actlr, reset_actlr, ACTLR_EL1 },
 	{ SYS_DESC(SYS_CPACR_EL1), NULL, reset_val, CPACR_EL1, 0 },
 
-	{ SYS_DESC(SYS_RGSR_EL1), undef_access },
-	{ SYS_DESC(SYS_GCR_EL1), undef_access },
+	MTE_REG(RGSR_EL1),
+	MTE_REG(GCR_EL1),
 
 	{ SYS_DESC(SYS_ZCR_EL1), NULL, reset_val, ZCR_EL1, 0, .visibility = sve_visibility },
 	{ SYS_DESC(SYS_TRFCR_EL1), undef_access },
@@ -1501,8 +1515,8 @@  static const struct sys_reg_desc sys_reg_descs[] = {
 	{ SYS_DESC(SYS_ERXMISC0_EL1), trap_raz_wi },
 	{ SYS_DESC(SYS_ERXMISC1_EL1), trap_raz_wi },
 
-	{ SYS_DESC(SYS_TFSR_EL1), undef_access },
-	{ SYS_DESC(SYS_TFSRE0_EL1), undef_access },
+	MTE_REG(TFSR_EL1),
+	MTE_REG(TFSRE0_EL1),
 
 	{ SYS_DESC(SYS_FAR_EL1), access_vm_reg, reset_unknown, FAR_EL1 },
 	{ SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 },