diff mbox series

[v2,11/13] KVM: arm64: nv: Add emulation for ERETAx instructions

Message ID 20240226100601.2379693-12-maz@kernel.org (mailing list archive)
State New, archived
Headers show
Series KVM/arm64: Add NV support for ERET and PAuth | expand

Commit Message

Marc Zyngier Feb. 26, 2024, 10:05 a.m. UTC
FEAT_NV has the interesting property of relying on ERET being
trapped. An added complexity is that it also traps ERETAA and
ERETAB, meaning that the Pointer Authentication aspect of these
instruction must be emulated.

Add an emulation of Pointer Authentication, limited to ERETAx
(always using SP_EL2 as the modifier and ELR_EL2 as the pointer),
using the Generic Authentication instructions.

The emulation, however small, is placed in its own compilation
unit so that it can be avoided if the configuration doesn't
include it (or the toolchan in not up to the task).

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/include/asm/kvm_nested.h    |  12 ++
 arch/arm64/include/asm/pgtable-hwdef.h |   1 +
 arch/arm64/kvm/Makefile                |   1 +
 arch/arm64/kvm/pauth.c                 | 196 +++++++++++++++++++++++++
 4 files changed, 210 insertions(+)
 create mode 100644 arch/arm64/kvm/pauth.c

Comments

Joey Gouly March 7, 2024, 1:39 p.m. UTC | #1
Note that this is my first time looking at PAuth.

On Mon, Feb 26, 2024 at 10:05:59AM +0000, Marc Zyngier wrote:
> FEAT_NV has the interesting property of relying on ERET being
> trapped. An added complexity is that it also traps ERETAA and
> ERETAB, meaning that the Pointer Authentication aspect of these
> instruction must be emulated.
> 
> Add an emulation of Pointer Authentication, limited to ERETAx
> (always using SP_EL2 as the modifier and ELR_EL2 as the pointer),
> using the Generic Authentication instructions.
> 
> The emulation, however small, is placed in its own compilation
> unit so that it can be avoided if the configuration doesn't
> include it (or the toolchan in not up to the task).
> 
> Signed-off-by: Marc Zyngier <maz@kernel.org>
> ---
>  arch/arm64/include/asm/kvm_nested.h    |  12 ++
>  arch/arm64/include/asm/pgtable-hwdef.h |   1 +
>  arch/arm64/kvm/Makefile                |   1 +
>  arch/arm64/kvm/pauth.c                 | 196 +++++++++++++++++++++++++
>  4 files changed, 210 insertions(+)
>  create mode 100644 arch/arm64/kvm/pauth.c
> 
> diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
> index dbc4e3a67356..5e0ab0596246 100644
> --- a/arch/arm64/include/asm/kvm_nested.h
> +++ b/arch/arm64/include/asm/kvm_nested.h
> @@ -64,4 +64,16 @@ extern bool forward_smc_trap(struct kvm_vcpu *vcpu);
>  
>  int kvm_init_nv_sysregs(struct kvm *kvm);
>  
> +#ifdef CONFIG_ARM64_PTR_AUTH
> +bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr);
> +#else
> +static inline bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr)
> +{
> +	/* We really should never execute this... */
> +	WARN_ON_ONCE(1);
> +	*elr = 0xbad9acc0debadbad;
> +	return false;
> +}
> +#endif
> +
>  #endif /* __ARM64_KVM_NESTED_H */
> diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
> index e4944d517c99..bb88e9ef6296 100644
> --- a/arch/arm64/include/asm/pgtable-hwdef.h
> +++ b/arch/arm64/include/asm/pgtable-hwdef.h
> @@ -277,6 +277,7 @@
>  #define TCR_TBI1		(UL(1) << 38)
>  #define TCR_HA			(UL(1) << 39)
>  #define TCR_HD			(UL(1) << 40)
> +#define TCR_TBID0		(UL(1) << 51)
>  #define TCR_TBID1		(UL(1) << 52)
>  #define TCR_NFD0		(UL(1) << 53)
>  #define TCR_NFD1		(UL(1) << 54)
> diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
> index c0c050e53157..04882b577575 100644
> --- a/arch/arm64/kvm/Makefile
> +++ b/arch/arm64/kvm/Makefile
> @@ -23,6 +23,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
>  	 vgic/vgic-its.o vgic/vgic-debug.o
>  
>  kvm-$(CONFIG_HW_PERF_EVENTS)  += pmu-emul.o pmu.o
> +kvm-$(CONFIG_ARM64_PTR_AUTH)  += pauth.o
>  
>  always-y := hyp_constants.h hyp-constants.s
>  
> diff --git a/arch/arm64/kvm/pauth.c b/arch/arm64/kvm/pauth.c
> new file mode 100644
> index 000000000000..a3a5c404375b
> --- /dev/null
> +++ b/arch/arm64/kvm/pauth.c
> @@ -0,0 +1,196 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (C) 2024 - Google LLC
> + * Author: Marc Zyngier <maz@kernel.org>
> + *
> + * Primitive PAuth emulation for ERETAA/ERETAB.
> + *
> + * This code assumes that is is run from EL2, and that it is part of
> + * the emulation of ERETAx for a guest hypervisor. That's a lot of
> + * baked-in assumptions and shortcuts.
> + *
> + * Do no reuse for anything else!
> + */
> +
> +#include <linux/kvm_host.h>
> +
> +#include <asm/kvm_emulate.h>
> +#include <asm/pointer_auth.h>
> +
> +static u64 compute_pac(struct kvm_vcpu *vcpu, u64 ptr,
> +		       struct ptrauth_key ikey)
> +{
> +	struct ptrauth_key gkey;
> +	u64 mod, pac = 0;
> +
> +	preempt_disable();
> +
> +	if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU))
> +		mod = __vcpu_sys_reg(vcpu, SP_EL2);
> +	else
> +		mod = read_sysreg(sp_el1);
> +
> +	gkey.lo = read_sysreg_s(SYS_APGAKEYLO_EL1);
> +	gkey.hi = read_sysreg_s(SYS_APGAKEYHI_EL1);
> +
> +	__ptrauth_key_install_nosync(APGA, ikey);
> +	isb();
> +
> +	asm volatile(ARM64_ASM_PREAMBLE ".arch_extension pauth\n"
> +		     "pacga %0, %1, %2" : "=r" (pac) : "r" (ptr), "r" (mod));

To use `pacga`, we require that the Address authentication and Generic
authentication use the same algorithm, right?

There doesn't seem to be a check for that up front. There is kinda a check for
that if the PAC doesn't match, (by kvm_has_pauth()).

I'm just pointing out that this looks a little odd to me, to get your input,
not sure if there's actually anything wrong here.

> +	isb();
> +
> +	__ptrauth_key_install_nosync(APGA, gkey);
> +
> +	preempt_enable();
> +
> +	/* PAC in the top 32bits */
> +	return pac;
> +}
> +
> +static bool effective_tbi(struct kvm_vcpu *vcpu, bool bit55)
> +{
> +	u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
> +	bool tbi, tbid;
> +
> +	/*
> +	 * Since we are authenticating an instruction address, we have
> +	 * to take TBID into account. If E2H==0, ignore VA[55], as
> +	 * TCR_EL2 only has a single TBI/TBID. If VA[55] was set in
> +	 * this case, this is likely a guest bug...
> +	 */
> +	if (!vcpu_el2_e2h_is_set(vcpu)) {
> +		tbi = tcr & BIT(20);
> +		tbid = tcr & BIT(29);
> +	} else if (bit55) {
> +		tbi = tcr & TCR_TBI1;
> +		tbid = tcr & TCR_TBID1;
> +	} else {
> +		tbi = tcr & TCR_TBI0;
> +		tbid = tcr & TCR_TBID0;
> +	}
> +
> +	return tbi && !tbid;
> +}
> +
> +static int compute_bottom_pac(struct kvm_vcpu *vcpu, bool bit55)
> +{
> +	static const int maxtxsz = 39; // Revisit these two values once
> +	static const int mintxsz = 16; // (if) we support TTST/LVA/LVA2
> +	u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
> +	int txsz;
> +
> +	if (!vcpu_el2_e2h_is_set(vcpu) || !bit55)
> +		txsz = FIELD_GET(TCR_T0SZ_MASK, tcr);
> +	else
> +		txsz = FIELD_GET(TCR_T1SZ_MASK, tcr);
> +
> +	return 64 - clamp(txsz, mintxsz, maxtxsz);
> +}
> +
> +static u64 compute_pac_mask(struct kvm_vcpu *vcpu, bool bit55)
> +{
> +	int bottom_pac;
> +	u64 mask;
> +
> +	bottom_pac = compute_bottom_pac(vcpu, bit55);
> +
> +	mask = GENMASK(54, bottom_pac);
> +	if (!effective_tbi(vcpu, bit55))
> +		mask |= GENMASK(63, 56);
> +
> +	return mask;
> +}
> +
> +static u64 to_canonical_addr(struct kvm_vcpu *vcpu, u64 ptr, u64 mask)
> +{
> +	bool bit55 = !!(ptr & BIT(55));
> +
> +	if (bit55)
> +		return ptr | mask;
> +
> +	return ptr & ~mask;
> +}
> +
> +static u64 corrupt_addr(struct kvm_vcpu *vcpu, u64 ptr)
> +{
> +	bool bit55 = !!(ptr & BIT(55));
> +	u64 mask, error_code;
> +	int shift;
> +
> +	if (effective_tbi(vcpu, bit55)) {
> +		mask = GENMASK(54, 53);
> +		shift = 53;
> +	} else {
> +		mask = GENMASK(62, 61);
> +		shift = 61;
> +	}
> +
> +	if (esr_iss_is_eretab(kvm_vcpu_get_esr(vcpu)))
> +		error_code = 2 << shift;
> +	else
> +		error_code = 1 << shift;
> +
> +	ptr &= ~mask;
> +	ptr |= error_code;
> +
> +	return ptr;
> +}
> +
> +/*
> + * Authenticate an ERETAA/ERETAB instruction, returning true if the
> + * authentication succeeded and false otherwise. In all cases, *elr
> + * contains the VA to ERET to. Potential exception injection is left
> + * to the caller.
> + */
> +bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr)
> +{
> +	u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
> +	u64 esr = kvm_vcpu_get_esr(vcpu);
> +	u64 ptr, cptr, pac, mask;
> +	struct ptrauth_key ikey;
> +
> +	*elr = ptr = vcpu_read_sys_reg(vcpu, ELR_EL2);
> +
> +	/* We assume we're already in the context of an ERETAx */
> +	if (esr_iss_is_eretab(esr)) {
> +		if (!(sctlr & SCTLR_EL1_EnIB))
> +			return true;
> +
> +		ikey.lo = __vcpu_sys_reg(vcpu, APIBKEYLO_EL1);
> +		ikey.hi = __vcpu_sys_reg(vcpu, APIBKEYHI_EL1);
> +	} else {
> +		if (!(sctlr & SCTLR_EL1_EnIA))
> +			return true;
> +
> +		ikey.lo = __vcpu_sys_reg(vcpu, APIAKEYLO_EL1);
> +		ikey.hi = __vcpu_sys_reg(vcpu, APIAKEYHI_EL1);
> +	}
> +
> +	mask = compute_pac_mask(vcpu, !!(ptr & BIT(55)));
> +	cptr = to_canonical_addr(vcpu, ptr, mask);
> +
> +	pac = compute_pac(vcpu, cptr, ikey);
> +
> +	/*
> +	 * Slightly deviate from the pseudocode: if we have a PAC
> +	 * match with the signed pointer, then it must be good.
> +	 * Anything after this point is pure error handling.
> +	 */
> +	if ((pac & mask) == (ptr & mask)) {
> +		*elr = cptr;
> +		return true;
> +	}
> +
> +	/*
> +	 * Authentication failed, corrupt the canonical address if
> +	 * PAuth2 isn't implemented, or some XORing if it is.
> +	 */
> +	if (!kvm_has_pauth(vcpu->kvm, PAuth2))
> +		cptr = corrupt_addr(vcpu, cptr);
> +	else
> +		cptr = ptr ^ (pac & mask);
> +
> +	*elr = cptr;
> +	return false;
> +}

Thanks,
Joey
Marc Zyngier March 7, 2024, 2:24 p.m. UTC | #2
On Thu, 07 Mar 2024 13:39:12 +0000,
Joey Gouly <joey.gouly@arm.com> wrote:
> 
> Note that this is my first time looking at PAuth.

I'm sorry! ;-)

> 
> On Mon, Feb 26, 2024 at 10:05:59AM +0000, Marc Zyngier wrote:
> > FEAT_NV has the interesting property of relying on ERET being
> > trapped. An added complexity is that it also traps ERETAA and
> > ERETAB, meaning that the Pointer Authentication aspect of these
> > instruction must be emulated.
> > 
> > Add an emulation of Pointer Authentication, limited to ERETAx
> > (always using SP_EL2 as the modifier and ELR_EL2 as the pointer),
> > using the Generic Authentication instructions.
> > 
> > The emulation, however small, is placed in its own compilation
> > unit so that it can be avoided if the configuration doesn't
> > include it (or the toolchan in not up to the task).
> > 
> > Signed-off-by: Marc Zyngier <maz@kernel.org>
> > ---
> >  arch/arm64/include/asm/kvm_nested.h    |  12 ++
> >  arch/arm64/include/asm/pgtable-hwdef.h |   1 +
> >  arch/arm64/kvm/Makefile                |   1 +
> >  arch/arm64/kvm/pauth.c                 | 196 +++++++++++++++++++++++++
> >  4 files changed, 210 insertions(+)
> >  create mode 100644 arch/arm64/kvm/pauth.c
> > 
> > diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
> > index dbc4e3a67356..5e0ab0596246 100644
> > --- a/arch/arm64/include/asm/kvm_nested.h
> > +++ b/arch/arm64/include/asm/kvm_nested.h
> > @@ -64,4 +64,16 @@ extern bool forward_smc_trap(struct kvm_vcpu *vcpu);
> >  
> >  int kvm_init_nv_sysregs(struct kvm *kvm);
> >  
> > +#ifdef CONFIG_ARM64_PTR_AUTH
> > +bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr);
> > +#else
> > +static inline bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr)
> > +{
> > +	/* We really should never execute this... */
> > +	WARN_ON_ONCE(1);
> > +	*elr = 0xbad9acc0debadbad;
> > +	return false;
> > +}
> > +#endif
> > +
> >  #endif /* __ARM64_KVM_NESTED_H */
> > diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
> > index e4944d517c99..bb88e9ef6296 100644
> > --- a/arch/arm64/include/asm/pgtable-hwdef.h
> > +++ b/arch/arm64/include/asm/pgtable-hwdef.h
> > @@ -277,6 +277,7 @@
> >  #define TCR_TBI1		(UL(1) << 38)
> >  #define TCR_HA			(UL(1) << 39)
> >  #define TCR_HD			(UL(1) << 40)
> > +#define TCR_TBID0		(UL(1) << 51)
> >  #define TCR_TBID1		(UL(1) << 52)
> >  #define TCR_NFD0		(UL(1) << 53)
> >  #define TCR_NFD1		(UL(1) << 54)
> > diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
> > index c0c050e53157..04882b577575 100644
> > --- a/arch/arm64/kvm/Makefile
> > +++ b/arch/arm64/kvm/Makefile
> > @@ -23,6 +23,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
> >  	 vgic/vgic-its.o vgic/vgic-debug.o
> >  
> >  kvm-$(CONFIG_HW_PERF_EVENTS)  += pmu-emul.o pmu.o
> > +kvm-$(CONFIG_ARM64_PTR_AUTH)  += pauth.o
> >  
> >  always-y := hyp_constants.h hyp-constants.s
> >  
> > diff --git a/arch/arm64/kvm/pauth.c b/arch/arm64/kvm/pauth.c
> > new file mode 100644
> > index 000000000000..a3a5c404375b
> > --- /dev/null
> > +++ b/arch/arm64/kvm/pauth.c
> > @@ -0,0 +1,196 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Copyright (C) 2024 - Google LLC
> > + * Author: Marc Zyngier <maz@kernel.org>
> > + *
> > + * Primitive PAuth emulation for ERETAA/ERETAB.
> > + *
> > + * This code assumes that is is run from EL2, and that it is part of
> > + * the emulation of ERETAx for a guest hypervisor. That's a lot of
> > + * baked-in assumptions and shortcuts.
> > + *
> > + * Do no reuse for anything else!
> > + */
> > +
> > +#include <linux/kvm_host.h>
> > +
> > +#include <asm/kvm_emulate.h>
> > +#include <asm/pointer_auth.h>
> > +
> > +static u64 compute_pac(struct kvm_vcpu *vcpu, u64 ptr,
> > +		       struct ptrauth_key ikey)
> > +{
> > +	struct ptrauth_key gkey;
> > +	u64 mod, pac = 0;
> > +
> > +	preempt_disable();
> > +
> > +	if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU))
> > +		mod = __vcpu_sys_reg(vcpu, SP_EL2);
> > +	else
> > +		mod = read_sysreg(sp_el1);
> > +
> > +	gkey.lo = read_sysreg_s(SYS_APGAKEYLO_EL1);
> > +	gkey.hi = read_sysreg_s(SYS_APGAKEYHI_EL1);
> > +
> > +	__ptrauth_key_install_nosync(APGA, ikey);
> > +	isb();
> > +
> > +	asm volatile(ARM64_ASM_PREAMBLE ".arch_extension pauth\n"
> > +		     "pacga %0, %1, %2" : "=r" (pac) : "r" (ptr), "r" (mod));
> 
> To use `pacga`, we require that the Address authentication and Generic
> authentication use the same algorithm, right?

Indeed. It is a strong requirement, and if we don't have that, nothing
works.

Or rather, emulating ERETAx becomes so bloody complicated it isn't
funny: you need to somehow to replay the auth in the context of a
guest so that all of TCR_EL2, SP_EL2, SCTLR_EL2, HCR_EL2, ELR_EL2 are
correctly set, get the value back, and compare it to the one you are
trying to authenticate.

Not happening! Mark and I talked about it ages ago and concluded it
was absolutely insane. PACGA allows us to sidestep the whole thing and
reconstruct the PAC like the pseudocode does using the ComputePAC()
function.

> There doesn't seem to be a check for that up front. There is kinda a check for
> that if the PAC doesn't match, (by kvm_has_pauth()).

Indeed. The main issue is that it is really hard to prevent that
unless we forbid it for all KVM guests, not just NV guests. It is also
that I don't know of any HW that would implement two different auth
methods (which would be really bizarre from an implementation
perspective).

I'm happy to harden system_has_full_ptr_auth() in that case, which
would do the trick.

Thanks,

	M.
Joey Gouly March 8, 2024, 5:20 p.m. UTC | #3
Phew..

On Mon, Feb 26, 2024 at 10:05:59AM +0000, Marc Zyngier wrote:
> FEAT_NV has the interesting property of relying on ERET being
> trapped. An added complexity is that it also traps ERETAA and
> ERETAB, meaning that the Pointer Authentication aspect of these
> instruction must be emulated.
> 
> Add an emulation of Pointer Authentication, limited to ERETAx
> (always using SP_EL2 as the modifier and ELR_EL2 as the pointer),
> using the Generic Authentication instructions.
> 
> The emulation, however small, is placed in its own compilation
> unit so that it can be avoided if the configuration doesn't
> include it (or the toolchan in not up to the task).
> 
> Signed-off-by: Marc Zyngier <maz@kernel.org>
> ---
>  arch/arm64/include/asm/kvm_nested.h    |  12 ++
>  arch/arm64/include/asm/pgtable-hwdef.h |   1 +
>  arch/arm64/kvm/Makefile                |   1 +
>  arch/arm64/kvm/pauth.c                 | 196 +++++++++++++++++++++++++
>  4 files changed, 210 insertions(+)
>  create mode 100644 arch/arm64/kvm/pauth.c
> 
> diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
> index dbc4e3a67356..5e0ab0596246 100644
> --- a/arch/arm64/include/asm/kvm_nested.h
> +++ b/arch/arm64/include/asm/kvm_nested.h
> @@ -64,4 +64,16 @@ extern bool forward_smc_trap(struct kvm_vcpu *vcpu);
>  
>  int kvm_init_nv_sysregs(struct kvm *kvm);
>  
> +#ifdef CONFIG_ARM64_PTR_AUTH
> +bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr);
> +#else
> +static inline bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr)
> +{
> +	/* We really should never execute this... */
> +	WARN_ON_ONCE(1);
> +	*elr = 0xbad9acc0debadbad;
> +	return false;
> +}
> +#endif
> +
>  #endif /* __ARM64_KVM_NESTED_H */
> diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
> index e4944d517c99..bb88e9ef6296 100644
> --- a/arch/arm64/include/asm/pgtable-hwdef.h
> +++ b/arch/arm64/include/asm/pgtable-hwdef.h
> @@ -277,6 +277,7 @@
>  #define TCR_TBI1		(UL(1) << 38)
>  #define TCR_HA			(UL(1) << 39)
>  #define TCR_HD			(UL(1) << 40)
> +#define TCR_TBID0		(UL(1) << 51)
>  #define TCR_TBID1		(UL(1) << 52)
>  #define TCR_NFD0		(UL(1) << 53)
>  #define TCR_NFD1		(UL(1) << 54)
> diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
> index c0c050e53157..04882b577575 100644
> --- a/arch/arm64/kvm/Makefile
> +++ b/arch/arm64/kvm/Makefile
> @@ -23,6 +23,7 @@ kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
>  	 vgic/vgic-its.o vgic/vgic-debug.o
>  
>  kvm-$(CONFIG_HW_PERF_EVENTS)  += pmu-emul.o pmu.o
> +kvm-$(CONFIG_ARM64_PTR_AUTH)  += pauth.o
>  
>  always-y := hyp_constants.h hyp-constants.s
>  
> diff --git a/arch/arm64/kvm/pauth.c b/arch/arm64/kvm/pauth.c
> new file mode 100644
> index 000000000000..a3a5c404375b
> --- /dev/null
> +++ b/arch/arm64/kvm/pauth.c
> @@ -0,0 +1,196 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (C) 2024 - Google LLC
> + * Author: Marc Zyngier <maz@kernel.org>
> + *
> + * Primitive PAuth emulation for ERETAA/ERETAB.
> + *
> + * This code assumes that is is run from EL2, and that it is part of
> + * the emulation of ERETAx for a guest hypervisor. That's a lot of
> + * baked-in assumptions and shortcuts.
> + *
> + * Do no reuse for anything else!
> + */
> +
> +#include <linux/kvm_host.h>
> +
> +#include <asm/kvm_emulate.h>
> +#include <asm/pointer_auth.h>
> +
> +static u64 compute_pac(struct kvm_vcpu *vcpu, u64 ptr,
> +		       struct ptrauth_key ikey)
> +{
> +	struct ptrauth_key gkey;
> +	u64 mod, pac = 0;
> +
> +	preempt_disable();
> +
> +	if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU))
> +		mod = __vcpu_sys_reg(vcpu, SP_EL2);
> +	else
> +		mod = read_sysreg(sp_el1);
> +
> +	gkey.lo = read_sysreg_s(SYS_APGAKEYLO_EL1);
> +	gkey.hi = read_sysreg_s(SYS_APGAKEYHI_EL1);
> +
> +	__ptrauth_key_install_nosync(APGA, ikey);
> +	isb();
> +
> +	asm volatile(ARM64_ASM_PREAMBLE ".arch_extension pauth\n"
> +		     "pacga %0, %1, %2" : "=r" (pac) : "r" (ptr), "r" (mod));
> +	isb();
> +
> +	__ptrauth_key_install_nosync(APGA, gkey);
> +
> +	preempt_enable();
> +
> +	/* PAC in the top 32bits */
> +	return pac;
> +}
> +
> +static bool effective_tbi(struct kvm_vcpu *vcpu, bool bit55)
> +{
> +	u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
> +	bool tbi, tbid;
> +
> +	/*
> +	 * Since we are authenticating an instruction address, we have
> +	 * to take TBID into account. If E2H==0, ignore VA[55], as
> +	 * TCR_EL2 only has a single TBI/TBID. If VA[55] was set in
> +	 * this case, this is likely a guest bug...
> +	 */
> +	if (!vcpu_el2_e2h_is_set(vcpu)) {
> +		tbi = tcr & BIT(20);
> +		tbid = tcr & BIT(29);
> +	} else if (bit55) {
> +		tbi = tcr & TCR_TBI1;
> +		tbid = tcr & TCR_TBID1;
> +	} else {
> +		tbi = tcr & TCR_TBI0;
> +		tbid = tcr & TCR_TBID0;
> +	}
> +
> +	return tbi && !tbid;
> +}
> +
> +static int compute_bottom_pac(struct kvm_vcpu *vcpu, bool bit55)
> +{
> +	static const int maxtxsz = 39; // Revisit these two values once
> +	static const int mintxsz = 16; // (if) we support TTST/LVA/LVA2
> +	u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
> +	int txsz;
> +
> +	if (!vcpu_el2_e2h_is_set(vcpu) || !bit55)
> +		txsz = FIELD_GET(TCR_T0SZ_MASK, tcr);
> +	else
> +		txsz = FIELD_GET(TCR_T1SZ_MASK, tcr);
> +
> +	return 64 - clamp(txsz, mintxsz, maxtxsz);
> +}
> +
> +static u64 compute_pac_mask(struct kvm_vcpu *vcpu, bool bit55)
> +{
> +	int bottom_pac;
> +	u64 mask;
> +
> +	bottom_pac = compute_bottom_pac(vcpu, bit55);
> +
> +	mask = GENMASK(54, bottom_pac);
> +	if (!effective_tbi(vcpu, bit55))
> +		mask |= GENMASK(63, 56);
> +
> +	return mask;
> +}
> +
> +static u64 to_canonical_addr(struct kvm_vcpu *vcpu, u64 ptr, u64 mask)
> +{
> +	bool bit55 = !!(ptr & BIT(55));
> +
> +	if (bit55)
> +		return ptr | mask;
> +
> +	return ptr & ~mask;
> +}
> +
> +static u64 corrupt_addr(struct kvm_vcpu *vcpu, u64 ptr)
> +{
> +	bool bit55 = !!(ptr & BIT(55));
> +	u64 mask, error_code;
> +	int shift;
> +
> +	if (effective_tbi(vcpu, bit55)) {
> +		mask = GENMASK(54, 53);
> +		shift = 53;
> +	} else {
> +		mask = GENMASK(62, 61);
> +		shift = 61;
> +	}
> +
> +	if (esr_iss_is_eretab(kvm_vcpu_get_esr(vcpu)))
> +		error_code = 2 << shift;
> +	else
> +		error_code = 1 << shift;
> +
> +	ptr &= ~mask;
> +	ptr |= error_code;
> +
> +	return ptr;
> +}
> +
> +/*
> + * Authenticate an ERETAA/ERETAB instruction, returning true if the
> + * authentication succeeded and false otherwise. In all cases, *elr
> + * contains the VA to ERET to. Potential exception injection is left
> + * to the caller.
> + */
> +bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr)
> +{
> +	u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
> +	u64 esr = kvm_vcpu_get_esr(vcpu);
> +	u64 ptr, cptr, pac, mask;
> +	struct ptrauth_key ikey;
> +
> +	*elr = ptr = vcpu_read_sys_reg(vcpu, ELR_EL2);
> +
> +	/* We assume we're already in the context of an ERETAx */
> +	if (esr_iss_is_eretab(esr)) {
> +		if (!(sctlr & SCTLR_EL1_EnIB))
> +			return true;
> +
> +		ikey.lo = __vcpu_sys_reg(vcpu, APIBKEYLO_EL1);
> +		ikey.hi = __vcpu_sys_reg(vcpu, APIBKEYHI_EL1);
> +	} else {
> +		if (!(sctlr & SCTLR_EL1_EnIA))
> +			return true;
> +
> +		ikey.lo = __vcpu_sys_reg(vcpu, APIAKEYLO_EL1);
> +		ikey.hi = __vcpu_sys_reg(vcpu, APIAKEYHI_EL1);
> +	}
> +
> +	mask = compute_pac_mask(vcpu, !!(ptr & BIT(55)));
> +	cptr = to_canonical_addr(vcpu, ptr, mask);
> +
> +	pac = compute_pac(vcpu, cptr, ikey);
> +
> +	/*
> +	 * Slightly deviate from the pseudocode: if we have a PAC
> +	 * match with the signed pointer, then it must be good.
> +	 * Anything after this point is pure error handling.
> +	 */
> +	if ((pac & mask) == (ptr & mask)) {
> +		*elr = cptr;
> +		return true;
> +	}
> +
> +	/*
> +	 * Authentication failed, corrupt the canonical address if
> +	 * PAuth2 isn't implemented, or some XORing if it is.
> +	 */
> +	if (!kvm_has_pauth(vcpu->kvm, PAuth2))
> +		cptr = corrupt_addr(vcpu, cptr);
> +	else
> +		cptr = ptr ^ (pac & mask);
> +
> +	*elr = cptr;
> +	return false;
> +}

Each function in this file is quite small, but there's certainly a lot of
complexity and background knowledge required to understand them!

I spent quite some time on each part to see if it matches what I understood
from the Arm ARM.

Reviewed-by: Joey Gouly <joey.gouly@arm.com>


A side note / thing I considered. KVM doesn't currently handle ERET exceptions
from EL1.

1. If an ERETA{A,B} were executed from a nested EL1 guest, that would be
trapped up to Host KVM at EL2.

2. kvm_hyp_handle_eret() returns false since it's not from vEL2.  Inside
kvm_handle_eret(), is_hyp_ctxt() is false so the exception is injected into
vEL2 (via kvm_inject_nested_sync()).

3. vEL2 gets the exception, kvm_hyp_handle_eret() returns false as before.
Inside kvm_handle_eret(), is_hyp_ctxt() is also false, so
kvm_inject_nested_sync() is called but now errors out since vcpu_has_nv() is
false.

Is that flow right? Am I missing something?

Thanks,
Joey
Marc Zyngier March 8, 2024, 5:54 p.m. UTC | #4
On Fri, 08 Mar 2024 17:20:59 +0000,
Joey Gouly <joey.gouly@arm.com> wrote:
> 
> Phew..

[...]

> Each function in this file is quite small, but there's certainly a lot of
> complexity and background knowledge required to understand them!
> 
> I spent quite some time on each part to see if it matches what I understood
> from the Arm ARM.
> 
> Reviewed-by: Joey Gouly <joey.gouly@arm.com>

Thanks a lot for putting up with it, much appreciated.

> A side note / thing I considered. KVM doesn't currently handle ERET exceptions
> from EL1.

EL1 is ambiguous here. Is that EL1 from the PoV of the guest?

>
> 1. If an ERETA{A,B} were executed from a nested EL1 guest, that would be
> trapped up to Host KVM at EL2.

There are two possibilities for that (assuming EL1 from the PoV of a
L1 guest):

(1) this EL1 guest is itself a guest hypervisor (i.e. we are running
    an L1 guest which itself is using NV and running an L2 which
    itself is a hypervisor). In that case, ERET* would have to be
    trapped to EL2 and re-injected. Note that we do not support NV
    under NV. Yet...

(2) the L2 guest is not a hypervisor (no recursive NV), but the L1
    hypervisor has set HFGITR_EL2.ERET==1. We'd have to re-inject the
    exception into L1, just like in the precedent case.

If neither HCR_EL2.NV nor HFGITR_EL2.ERET are set, then no ERET* gets
trapped at all. Crucially, when running an L2 guest that doesn't isn't
itself a hypervisor (no nested NV), we do not trap ERET* at all.

In a way, the NV overhead is mostly when running L1. Once you run L2,
the overhead "vanishes", to some extent (as long as you don't exit,
because that's where the cost is).

> 2. kvm_hyp_handle_eret() returns false since it's not from vEL2.  Inside
> kvm_handle_eret(), is_hyp_ctxt() is false so the exception is injected into
> vEL2 (via kvm_inject_nested_sync()).
> 
> 3. vEL2 gets the exception, kvm_hyp_handle_eret() returns false as before.
> Inside kvm_handle_eret(), is_hyp_ctxt() is also false, so
> kvm_inject_nested_sync() is called but now errors out since vcpu_has_nv() is
> false.
> 
> Is that flow right? Am I missing something?

I'm not sure. The cases where ERET gets trapped are really limited to
the above two cases.

Thanks,

	M.
Joey Gouly March 12, 2024, 10:46 a.m. UTC | #5
On Fri, Mar 08, 2024 at 05:54:20PM +0000, Marc Zyngier wrote:
> On Fri, 08 Mar 2024 17:20:59 +0000,
> Joey Gouly <joey.gouly@arm.com> wrote:
> > 
> > Phew..
> 
> [...]
> 
> > Each function in this file is quite small, but there's certainly a lot of
> > complexity and background knowledge required to understand them!
> > 
> > I spent quite some time on each part to see if it matches what I understood
> > from the Arm ARM.
> > 
> > Reviewed-by: Joey Gouly <joey.gouly@arm.com>
> 
> Thanks a lot for putting up with it, much appreciated.
> 
> > A side note / thing I considered. KVM doesn't currently handle ERET exceptions
> > from EL1.
> 
> EL1 is ambiguous here. Is that EL1 from the PoV of the guest?

Yes I meant an EL1 guest (not vEL2).

> 
> >
> > 1. If an ERETA{A,B} were executed from a nested EL1 guest, that would be
> > trapped up to Host KVM at EL2.
> 
> There are two possibilities for that (assuming EL1 from the PoV of a
> L1 guest):
> 
> (1) this EL1 guest is itself a guest hypervisor (i.e. we are running
>     an L1 guest which itself is using NV and running an L2 which
>     itself is a hypervisor). In that case, ERET* would have to be
>     trapped to EL2 and re-injected. Note that we do not support NV
>     under NV. Yet...
> 
> (2) the L2 guest is not a hypervisor (no recursive NV), but the L1
>     hypervisor has set HFGITR_EL2.ERET==1. We'd have to re-inject the
>     exception into L1, just like in the precedent case.
> 
> If neither HCR_EL2.NV nor HFGITR_EL2.ERET are set, then no ERET* gets
> trapped at all. Crucially, when running an L2 guest that doesn't isn't
> itself a hypervisor (no nested NV), we do not trap ERET* at all.

That was the missing part. __compute_hcr() only adds HCR_EL2.NV when
is_hyp_ctxt() is true. When I conjured up this scenario, I had HCR_EL2.NV set
(in my head) for the L2 EL1 guest, which is not the case.

> 
> In a way, the NV overhead is mostly when running L1. Once you run L2,
> the overhead "vanishes", to some extent (as long as you don't exit,
> because that's where the cost is).
> 
> > 2. kvm_hyp_handle_eret() returns false since it's not from vEL2.  Inside
> > kvm_handle_eret(), is_hyp_ctxt() is false so the exception is injected into
> > vEL2 (via kvm_inject_nested_sync()).
> > 
> > 3. vEL2 gets the exception, kvm_hyp_handle_eret() returns false as before.
> > Inside kvm_handle_eret(), is_hyp_ctxt() is also false, so
> > kvm_inject_nested_sync() is called but now errors out since vcpu_has_nv() is
> > false.
> > 
> > Is that flow right? Am I missing something?
> 
> I'm not sure. The cases where ERET gets trapped are really limited to
> the above two cases.
> 

Thanks for the explanation,

Joey
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kvm_nested.h b/arch/arm64/include/asm/kvm_nested.h
index dbc4e3a67356..5e0ab0596246 100644
--- a/arch/arm64/include/asm/kvm_nested.h
+++ b/arch/arm64/include/asm/kvm_nested.h
@@ -64,4 +64,16 @@  extern bool forward_smc_trap(struct kvm_vcpu *vcpu);
 
 int kvm_init_nv_sysregs(struct kvm *kvm);
 
+#ifdef CONFIG_ARM64_PTR_AUTH
+bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr);
+#else
+static inline bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr)
+{
+	/* We really should never execute this... */
+	WARN_ON_ONCE(1);
+	*elr = 0xbad9acc0debadbad;
+	return false;
+}
+#endif
+
 #endif /* __ARM64_KVM_NESTED_H */
diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h
index e4944d517c99..bb88e9ef6296 100644
--- a/arch/arm64/include/asm/pgtable-hwdef.h
+++ b/arch/arm64/include/asm/pgtable-hwdef.h
@@ -277,6 +277,7 @@ 
 #define TCR_TBI1		(UL(1) << 38)
 #define TCR_HA			(UL(1) << 39)
 #define TCR_HD			(UL(1) << 40)
+#define TCR_TBID0		(UL(1) << 51)
 #define TCR_TBID1		(UL(1) << 52)
 #define TCR_NFD0		(UL(1) << 53)
 #define TCR_NFD1		(UL(1) << 54)
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index c0c050e53157..04882b577575 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -23,6 +23,7 @@  kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \
 	 vgic/vgic-its.o vgic/vgic-debug.o
 
 kvm-$(CONFIG_HW_PERF_EVENTS)  += pmu-emul.o pmu.o
+kvm-$(CONFIG_ARM64_PTR_AUTH)  += pauth.o
 
 always-y := hyp_constants.h hyp-constants.s
 
diff --git a/arch/arm64/kvm/pauth.c b/arch/arm64/kvm/pauth.c
new file mode 100644
index 000000000000..a3a5c404375b
--- /dev/null
+++ b/arch/arm64/kvm/pauth.c
@@ -0,0 +1,196 @@ 
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2024 - Google LLC
+ * Author: Marc Zyngier <maz@kernel.org>
+ *
+ * Primitive PAuth emulation for ERETAA/ERETAB.
+ *
+ * This code assumes that is is run from EL2, and that it is part of
+ * the emulation of ERETAx for a guest hypervisor. That's a lot of
+ * baked-in assumptions and shortcuts.
+ *
+ * Do no reuse for anything else!
+ */
+
+#include <linux/kvm_host.h>
+
+#include <asm/kvm_emulate.h>
+#include <asm/pointer_auth.h>
+
+static u64 compute_pac(struct kvm_vcpu *vcpu, u64 ptr,
+		       struct ptrauth_key ikey)
+{
+	struct ptrauth_key gkey;
+	u64 mod, pac = 0;
+
+	preempt_disable();
+
+	if (!vcpu_get_flag(vcpu, SYSREGS_ON_CPU))
+		mod = __vcpu_sys_reg(vcpu, SP_EL2);
+	else
+		mod = read_sysreg(sp_el1);
+
+	gkey.lo = read_sysreg_s(SYS_APGAKEYLO_EL1);
+	gkey.hi = read_sysreg_s(SYS_APGAKEYHI_EL1);
+
+	__ptrauth_key_install_nosync(APGA, ikey);
+	isb();
+
+	asm volatile(ARM64_ASM_PREAMBLE ".arch_extension pauth\n"
+		     "pacga %0, %1, %2" : "=r" (pac) : "r" (ptr), "r" (mod));
+	isb();
+
+	__ptrauth_key_install_nosync(APGA, gkey);
+
+	preempt_enable();
+
+	/* PAC in the top 32bits */
+	return pac;
+}
+
+static bool effective_tbi(struct kvm_vcpu *vcpu, bool bit55)
+{
+	u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
+	bool tbi, tbid;
+
+	/*
+	 * Since we are authenticating an instruction address, we have
+	 * to take TBID into account. If E2H==0, ignore VA[55], as
+	 * TCR_EL2 only has a single TBI/TBID. If VA[55] was set in
+	 * this case, this is likely a guest bug...
+	 */
+	if (!vcpu_el2_e2h_is_set(vcpu)) {
+		tbi = tcr & BIT(20);
+		tbid = tcr & BIT(29);
+	} else if (bit55) {
+		tbi = tcr & TCR_TBI1;
+		tbid = tcr & TCR_TBID1;
+	} else {
+		tbi = tcr & TCR_TBI0;
+		tbid = tcr & TCR_TBID0;
+	}
+
+	return tbi && !tbid;
+}
+
+static int compute_bottom_pac(struct kvm_vcpu *vcpu, bool bit55)
+{
+	static const int maxtxsz = 39; // Revisit these two values once
+	static const int mintxsz = 16; // (if) we support TTST/LVA/LVA2
+	u64 tcr = vcpu_read_sys_reg(vcpu, TCR_EL2);
+	int txsz;
+
+	if (!vcpu_el2_e2h_is_set(vcpu) || !bit55)
+		txsz = FIELD_GET(TCR_T0SZ_MASK, tcr);
+	else
+		txsz = FIELD_GET(TCR_T1SZ_MASK, tcr);
+
+	return 64 - clamp(txsz, mintxsz, maxtxsz);
+}
+
+static u64 compute_pac_mask(struct kvm_vcpu *vcpu, bool bit55)
+{
+	int bottom_pac;
+	u64 mask;
+
+	bottom_pac = compute_bottom_pac(vcpu, bit55);
+
+	mask = GENMASK(54, bottom_pac);
+	if (!effective_tbi(vcpu, bit55))
+		mask |= GENMASK(63, 56);
+
+	return mask;
+}
+
+static u64 to_canonical_addr(struct kvm_vcpu *vcpu, u64 ptr, u64 mask)
+{
+	bool bit55 = !!(ptr & BIT(55));
+
+	if (bit55)
+		return ptr | mask;
+
+	return ptr & ~mask;
+}
+
+static u64 corrupt_addr(struct kvm_vcpu *vcpu, u64 ptr)
+{
+	bool bit55 = !!(ptr & BIT(55));
+	u64 mask, error_code;
+	int shift;
+
+	if (effective_tbi(vcpu, bit55)) {
+		mask = GENMASK(54, 53);
+		shift = 53;
+	} else {
+		mask = GENMASK(62, 61);
+		shift = 61;
+	}
+
+	if (esr_iss_is_eretab(kvm_vcpu_get_esr(vcpu)))
+		error_code = 2 << shift;
+	else
+		error_code = 1 << shift;
+
+	ptr &= ~mask;
+	ptr |= error_code;
+
+	return ptr;
+}
+
+/*
+ * Authenticate an ERETAA/ERETAB instruction, returning true if the
+ * authentication succeeded and false otherwise. In all cases, *elr
+ * contains the VA to ERET to. Potential exception injection is left
+ * to the caller.
+ */
+bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr)
+{
+	u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL2);
+	u64 esr = kvm_vcpu_get_esr(vcpu);
+	u64 ptr, cptr, pac, mask;
+	struct ptrauth_key ikey;
+
+	*elr = ptr = vcpu_read_sys_reg(vcpu, ELR_EL2);
+
+	/* We assume we're already in the context of an ERETAx */
+	if (esr_iss_is_eretab(esr)) {
+		if (!(sctlr & SCTLR_EL1_EnIB))
+			return true;
+
+		ikey.lo = __vcpu_sys_reg(vcpu, APIBKEYLO_EL1);
+		ikey.hi = __vcpu_sys_reg(vcpu, APIBKEYHI_EL1);
+	} else {
+		if (!(sctlr & SCTLR_EL1_EnIA))
+			return true;
+
+		ikey.lo = __vcpu_sys_reg(vcpu, APIAKEYLO_EL1);
+		ikey.hi = __vcpu_sys_reg(vcpu, APIAKEYHI_EL1);
+	}
+
+	mask = compute_pac_mask(vcpu, !!(ptr & BIT(55)));
+	cptr = to_canonical_addr(vcpu, ptr, mask);
+
+	pac = compute_pac(vcpu, cptr, ikey);
+
+	/*
+	 * Slightly deviate from the pseudocode: if we have a PAC
+	 * match with the signed pointer, then it must be good.
+	 * Anything after this point is pure error handling.
+	 */
+	if ((pac & mask) == (ptr & mask)) {
+		*elr = cptr;
+		return true;
+	}
+
+	/*
+	 * Authentication failed, corrupt the canonical address if
+	 * PAuth2 isn't implemented, or some XORing if it is.
+	 */
+	if (!kvm_has_pauth(vcpu->kvm, PAuth2))
+		cptr = corrupt_addr(vcpu, cptr);
+	else
+		cptr = ptr ^ (pac & mask);
+
+	*elr = cptr;
+	return false;
+}