diff mbox series

[v2,06/13] KVM: arm64: nv: Fast-track 'InHost' exception returns

Message ID 20240226100601.2379693-7-maz@kernel.org (mailing list archive)
State New, archived
Headers show
Series KVM/arm64: Add NV support for ERET and PAuth | expand

Commit Message

Marc Zyngier Feb. 26, 2024, 10:05 a.m. UTC
A significant part of the FEAT_NV extension is to trap ERET
instructions so that the hypervisor gets a chance to switch
from a vEL2 L1 guest to an EL1 L2 guest.

But this also has the unfortunate consequence of trapping ERET
in unsuspecting circumstances, such as staying at vEL2 (interrupt
handling while being in the guest hypervisor), or returning to host
userspace in the case of a VHE guest.

Although we already make some effort to handle these ERET quicker
by not doing the put/load dance, it is still way too far down the
line for it to be efficient enough.

For these cases, it would ideal to ERET directly, no question asked.
Of course, we can't do that. But the next best thing is to do it as
early as possible, in fixup_guest_exit(), much as we would handle
FPSIMD exceptions.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/emulate-nested.c | 29 +++-------------------
 arch/arm64/kvm/hyp/vhe/switch.c | 44 +++++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+), 26 deletions(-)

Comments

Joey Gouly Feb. 28, 2024, 4:08 p.m. UTC | #1
On Mon, Feb 26, 2024 at 10:05:54AM +0000, Marc Zyngier wrote:
> A significant part of the FEAT_NV extension is to trap ERET
> instructions so that the hypervisor gets a chance to switch
> from a vEL2 L1 guest to an EL1 L2 guest.
> 
> But this also has the unfortunate consequence of trapping ERET
> in unsuspecting circumstances, such as staying at vEL2 (interrupt
> handling while being in the guest hypervisor), or returning to host
> userspace in the case of a VHE guest.
> 
> Although we already make some effort to handle these ERET quicker
> by not doing the put/load dance, it is still way too far down the
> line for it to be efficient enough.
> 
> For these cases, it would ideal to ERET directly, no question asked.
> Of course, we can't do that. But the next best thing is to do it as
> early as possible, in fixup_guest_exit(), much as we would handle
> FPSIMD exceptions.
> 
> Signed-off-by: Marc Zyngier <maz@kernel.org>
> ---
>  arch/arm64/kvm/emulate-nested.c | 29 +++-------------------
>  arch/arm64/kvm/hyp/vhe/switch.c | 44 +++++++++++++++++++++++++++++++++
>  2 files changed, 47 insertions(+), 26 deletions(-)
> 
> diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
> index 2d80e81ae650..63a74c0330f1 100644
> --- a/arch/arm64/kvm/emulate-nested.c
> +++ b/arch/arm64/kvm/emulate-nested.c
> @@ -2172,8 +2172,7 @@ static u64 kvm_check_illegal_exception_return(struct kvm_vcpu *vcpu, u64 spsr)
>  
>  void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu)
>  {
> -	u64 spsr, elr, mode;
> -	bool direct_eret;
> +	u64 spsr, elr;
>  
>  	/*
>  	 * Forward this trap to the virtual EL2 if the virtual
> @@ -2182,33 +2181,11 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu)
>  	if (forward_traps(vcpu, HCR_NV))
>  		return;
>  
> -	/*
> -	 * Going through the whole put/load motions is a waste of time
> -	 * if this is a VHE guest hypervisor returning to its own
> -	 * userspace, or the hypervisor performing a local exception
> -	 * return. No need to save/restore registers, no need to
> -	 * switch S2 MMU. Just do the canonical ERET.
> -	 */
> -	spsr = vcpu_read_sys_reg(vcpu, SPSR_EL2);
> -	spsr = kvm_check_illegal_exception_return(vcpu, spsr);
> -
> -	mode = spsr & (PSR_MODE_MASK | PSR_MODE32_BIT);
> -
> -	direct_eret  = (mode == PSR_MODE_EL0t &&
> -			vcpu_el2_e2h_is_set(vcpu) &&
> -			vcpu_el2_tge_is_set(vcpu));
> -	direct_eret |= (mode == PSR_MODE_EL2h || mode == PSR_MODE_EL2t);
> -
> -	if (direct_eret) {
> -		*vcpu_pc(vcpu) = vcpu_read_sys_reg(vcpu, ELR_EL2);
> -		*vcpu_cpsr(vcpu) = spsr;
> -		trace_kvm_nested_eret(vcpu, *vcpu_pc(vcpu), spsr);
> -		return;
> -	}
> -
>  	preempt_disable();
>  	kvm_arch_vcpu_put(vcpu);
>  
> +	spsr = __vcpu_sys_reg(vcpu, SPSR_EL2);
> +	spsr = kvm_check_illegal_exception_return(vcpu, spsr);
>  	elr = __vcpu_sys_reg(vcpu, ELR_EL2);
>  
>  	trace_kvm_nested_eret(vcpu, elr, spsr);
> diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
> index d5fdcea2b366..eaf242b8e0cf 100644
> --- a/arch/arm64/kvm/hyp/vhe/switch.c
> +++ b/arch/arm64/kvm/hyp/vhe/switch.c
> @@ -206,6 +206,49 @@ void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu)
>  	__vcpu_put_switch_sysregs(vcpu);
>  }
>  
> +static bool kvm_hyp_handle_eret(struct kvm_vcpu *vcpu, u64 *exit_code)
> +{
> +	u64 spsr, mode;
> +
> +	/*
> +	 * Going through the whole put/load motions is a waste of time
> +	 * if this is a VHE guest hypervisor returning to its own
> +	 * userspace, or the hypervisor performing a local exception
> +	 * return. No need to save/restore registers, no need to
> +	 * switch S2 MMU. Just do the canonical ERET.
> +	 *
> +	 * Unless the trap has to be forwarded further down the line,
> +	 * of course...
> +	 */
> +	if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_NV)
> +		return false;
> +
> +	spsr = read_sysreg_el1(SYS_SPSR);
> +	mode = spsr & (PSR_MODE_MASK | PSR_MODE32_BIT);
> +
> +	switch (mode) {
> +	case PSR_MODE_EL0t:
> +		if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)))
> +			return false;
> +		break;
> +	case PSR_MODE_EL2t:
> +		mode = PSR_MODE_EL1t;
> +		break;
> +	case PSR_MODE_EL2h:
> +		mode = PSR_MODE_EL1h;
> +		break;
> +	default:
> +		return false;
> +	}

Thanks for pointing out to_hw_pstate() (off-list), I spent far too long trying
to understand how the original code converted PSTATE.M from (v)EL2 to EL1, and
missed that while browsing.

Seems hard to re-use to_hw_pstate() here, since we want the early returns.

> +
> +	spsr = (spsr & ~(PSR_MODE_MASK | PSR_MODE32_BIT)) | mode;

I don't think we need to mask out PSR_MODE32_BIT here again, since if it was
set in `mode`, it wouldn't have matched in the switch statement. It's possibly
out of 'defensiveness' though. And I'm being nitpicky.

> +
> +	write_sysreg_el2(spsr, SYS_SPSR);
> +	write_sysreg_el2(read_sysreg_el1(SYS_ELR), SYS_ELR);
> +
> +	return true;
> +}
> +
>  static const exit_handler_fn hyp_exit_handlers[] = {
>  	[0 ... ESR_ELx_EC_MAX]		= NULL,
>  	[ESR_ELx_EC_CP15_32]		= kvm_hyp_handle_cp15_32,
> @@ -216,6 +259,7 @@ static const exit_handler_fn hyp_exit_handlers[] = {
>  	[ESR_ELx_EC_DABT_LOW]		= kvm_hyp_handle_dabt_low,
>  	[ESR_ELx_EC_WATCHPT_LOW]	= kvm_hyp_handle_watchpt_low,
>  	[ESR_ELx_EC_PAC]		= kvm_hyp_handle_ptrauth,
> +	[ESR_ELx_EC_ERET]		= kvm_hyp_handle_eret,
>  	[ESR_ELx_EC_MOPS]		= kvm_hyp_handle_mops,
>  };
>  

Otherwise,

Reviewed-by: Joey Gouly <joey.gouly@arm.com>

Thanks,
Joey
Marc Zyngier Feb. 29, 2024, 1:44 p.m. UTC | #2
On Wed, 28 Feb 2024 16:08:00 +0000,
Joey Gouly <joey.gouly@arm.com> wrote:
> 
> On Mon, Feb 26, 2024 at 10:05:54AM +0000, Marc Zyngier wrote:
> > A significant part of the FEAT_NV extension is to trap ERET
> > instructions so that the hypervisor gets a chance to switch
> > from a vEL2 L1 guest to an EL1 L2 guest.
> > 
> > But this also has the unfortunate consequence of trapping ERET
> > in unsuspecting circumstances, such as staying at vEL2 (interrupt
> > handling while being in the guest hypervisor), or returning to host
> > userspace in the case of a VHE guest.
> > 
> > Although we already make some effort to handle these ERET quicker
> > by not doing the put/load dance, it is still way too far down the
> > line for it to be efficient enough.
> > 
> > For these cases, it would ideal to ERET directly, no question asked.
> > Of course, we can't do that. But the next best thing is to do it as
> > early as possible, in fixup_guest_exit(), much as we would handle
> > FPSIMD exceptions.
> > 
> > Signed-off-by: Marc Zyngier <maz@kernel.org>
> > ---
> >  arch/arm64/kvm/emulate-nested.c | 29 +++-------------------
> >  arch/arm64/kvm/hyp/vhe/switch.c | 44 +++++++++++++++++++++++++++++++++
> >  2 files changed, 47 insertions(+), 26 deletions(-)
> > 
> > diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
> > index 2d80e81ae650..63a74c0330f1 100644
> > --- a/arch/arm64/kvm/emulate-nested.c
> > +++ b/arch/arm64/kvm/emulate-nested.c
> > @@ -2172,8 +2172,7 @@ static u64 kvm_check_illegal_exception_return(struct kvm_vcpu *vcpu, u64 spsr)
> >  
> >  void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu)
> >  {
> > -	u64 spsr, elr, mode;
> > -	bool direct_eret;
> > +	u64 spsr, elr;
> >  
> >  	/*
> >  	 * Forward this trap to the virtual EL2 if the virtual
> > @@ -2182,33 +2181,11 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu)
> >  	if (forward_traps(vcpu, HCR_NV))
> >  		return;
> >  
> > -	/*
> > -	 * Going through the whole put/load motions is a waste of time
> > -	 * if this is a VHE guest hypervisor returning to its own
> > -	 * userspace, or the hypervisor performing a local exception
> > -	 * return. No need to save/restore registers, no need to
> > -	 * switch S2 MMU. Just do the canonical ERET.
> > -	 */
> > -	spsr = vcpu_read_sys_reg(vcpu, SPSR_EL2);
> > -	spsr = kvm_check_illegal_exception_return(vcpu, spsr);
> > -
> > -	mode = spsr & (PSR_MODE_MASK | PSR_MODE32_BIT);
> > -
> > -	direct_eret  = (mode == PSR_MODE_EL0t &&
> > -			vcpu_el2_e2h_is_set(vcpu) &&
> > -			vcpu_el2_tge_is_set(vcpu));
> > -	direct_eret |= (mode == PSR_MODE_EL2h || mode == PSR_MODE_EL2t);
> > -
> > -	if (direct_eret) {
> > -		*vcpu_pc(vcpu) = vcpu_read_sys_reg(vcpu, ELR_EL2);
> > -		*vcpu_cpsr(vcpu) = spsr;
> > -		trace_kvm_nested_eret(vcpu, *vcpu_pc(vcpu), spsr);
> > -		return;
> > -	}
> > -
> >  	preempt_disable();
> >  	kvm_arch_vcpu_put(vcpu);
> >  
> > +	spsr = __vcpu_sys_reg(vcpu, SPSR_EL2);
> > +	spsr = kvm_check_illegal_exception_return(vcpu, spsr);
> >  	elr = __vcpu_sys_reg(vcpu, ELR_EL2);
> >  
> >  	trace_kvm_nested_eret(vcpu, elr, spsr);
> > diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
> > index d5fdcea2b366..eaf242b8e0cf 100644
> > --- a/arch/arm64/kvm/hyp/vhe/switch.c
> > +++ b/arch/arm64/kvm/hyp/vhe/switch.c
> > @@ -206,6 +206,49 @@ void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu)
> >  	__vcpu_put_switch_sysregs(vcpu);
> >  }
> >  
> > +static bool kvm_hyp_handle_eret(struct kvm_vcpu *vcpu, u64 *exit_code)
> > +{
> > +	u64 spsr, mode;
> > +
> > +	/*
> > +	 * Going through the whole put/load motions is a waste of time
> > +	 * if this is a VHE guest hypervisor returning to its own
> > +	 * userspace, or the hypervisor performing a local exception
> > +	 * return. No need to save/restore registers, no need to
> > +	 * switch S2 MMU. Just do the canonical ERET.
> > +	 *
> > +	 * Unless the trap has to be forwarded further down the line,
> > +	 * of course...
> > +	 */
> > +	if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_NV)
> > +		return false;
> > +
> > +	spsr = read_sysreg_el1(SYS_SPSR);
> > +	mode = spsr & (PSR_MODE_MASK | PSR_MODE32_BIT);
> > +
> > +	switch (mode) {
> > +	case PSR_MODE_EL0t:
> > +		if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)))
> > +			return false;
> > +		break;
> > +	case PSR_MODE_EL2t:
> > +		mode = PSR_MODE_EL1t;
> > +		break;
> > +	case PSR_MODE_EL2h:
> > +		mode = PSR_MODE_EL1h;
> > +		break;
> > +	default:
> > +		return false;
> > +	}
> 
> Thanks for pointing out to_hw_pstate() (off-list), I spent far too long trying
> to understand how the original code converted PSTATE.M from (v)EL2 to EL1, and
> missed that while browsing.
> 
> Seems hard to re-use to_hw_pstate() here, since we want the early
> returns.

Indeed. I tried to fit it in, but ended up checking for things twice,
which isn't great either.

> 
> > +
> > +	spsr = (spsr & ~(PSR_MODE_MASK | PSR_MODE32_BIT)) | mode;
> 
> I don't think we need to mask out PSR_MODE32_BIT here again, since if it was
> set in `mode`, it wouldn't have matched in the switch statement. It's possibly
> out of 'defensiveness' though. And I'm being nitpicky.

It's a sanity thing. We want to make sure all of M[4:0] are cleared
before or'ing the new mode. I agree that we wouldn't be there if
PSR_MODE_32BIT was set, but this matches the usage in most other
places in the code.

> 
> > +
> > +	write_sysreg_el2(spsr, SYS_SPSR);
> > +	write_sysreg_el2(read_sysreg_el1(SYS_ELR), SYS_ELR);
> > +
> > +	return true;
> > +}
> > +
> >  static const exit_handler_fn hyp_exit_handlers[] = {
> >  	[0 ... ESR_ELx_EC_MAX]		= NULL,
> >  	[ESR_ELx_EC_CP15_32]		= kvm_hyp_handle_cp15_32,
> > @@ -216,6 +259,7 @@ static const exit_handler_fn hyp_exit_handlers[] = {
> >  	[ESR_ELx_EC_DABT_LOW]		= kvm_hyp_handle_dabt_low,
> >  	[ESR_ELx_EC_WATCHPT_LOW]	= kvm_hyp_handle_watchpt_low,
> >  	[ESR_ELx_EC_PAC]		= kvm_hyp_handle_ptrauth,
> > +	[ESR_ELx_EC_ERET]		= kvm_hyp_handle_eret,
> >  	[ESR_ELx_EC_MOPS]		= kvm_hyp_handle_mops,
> >  };
> >  
> 
> Otherwise,
> 
> Reviewed-by: Joey Gouly <joey.gouly@arm.com>

Thanks!

	M.
diff mbox series

Patch

diff --git a/arch/arm64/kvm/emulate-nested.c b/arch/arm64/kvm/emulate-nested.c
index 2d80e81ae650..63a74c0330f1 100644
--- a/arch/arm64/kvm/emulate-nested.c
+++ b/arch/arm64/kvm/emulate-nested.c
@@ -2172,8 +2172,7 @@  static u64 kvm_check_illegal_exception_return(struct kvm_vcpu *vcpu, u64 spsr)
 
 void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu)
 {
-	u64 spsr, elr, mode;
-	bool direct_eret;
+	u64 spsr, elr;
 
 	/*
 	 * Forward this trap to the virtual EL2 if the virtual
@@ -2182,33 +2181,11 @@  void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu)
 	if (forward_traps(vcpu, HCR_NV))
 		return;
 
-	/*
-	 * Going through the whole put/load motions is a waste of time
-	 * if this is a VHE guest hypervisor returning to its own
-	 * userspace, or the hypervisor performing a local exception
-	 * return. No need to save/restore registers, no need to
-	 * switch S2 MMU. Just do the canonical ERET.
-	 */
-	spsr = vcpu_read_sys_reg(vcpu, SPSR_EL2);
-	spsr = kvm_check_illegal_exception_return(vcpu, spsr);
-
-	mode = spsr & (PSR_MODE_MASK | PSR_MODE32_BIT);
-
-	direct_eret  = (mode == PSR_MODE_EL0t &&
-			vcpu_el2_e2h_is_set(vcpu) &&
-			vcpu_el2_tge_is_set(vcpu));
-	direct_eret |= (mode == PSR_MODE_EL2h || mode == PSR_MODE_EL2t);
-
-	if (direct_eret) {
-		*vcpu_pc(vcpu) = vcpu_read_sys_reg(vcpu, ELR_EL2);
-		*vcpu_cpsr(vcpu) = spsr;
-		trace_kvm_nested_eret(vcpu, *vcpu_pc(vcpu), spsr);
-		return;
-	}
-
 	preempt_disable();
 	kvm_arch_vcpu_put(vcpu);
 
+	spsr = __vcpu_sys_reg(vcpu, SPSR_EL2);
+	spsr = kvm_check_illegal_exception_return(vcpu, spsr);
 	elr = __vcpu_sys_reg(vcpu, ELR_EL2);
 
 	trace_kvm_nested_eret(vcpu, elr, spsr);
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index d5fdcea2b366..eaf242b8e0cf 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -206,6 +206,49 @@  void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu)
 	__vcpu_put_switch_sysregs(vcpu);
 }
 
+static bool kvm_hyp_handle_eret(struct kvm_vcpu *vcpu, u64 *exit_code)
+{
+	u64 spsr, mode;
+
+	/*
+	 * Going through the whole put/load motions is a waste of time
+	 * if this is a VHE guest hypervisor returning to its own
+	 * userspace, or the hypervisor performing a local exception
+	 * return. No need to save/restore registers, no need to
+	 * switch S2 MMU. Just do the canonical ERET.
+	 *
+	 * Unless the trap has to be forwarded further down the line,
+	 * of course...
+	 */
+	if (__vcpu_sys_reg(vcpu, HCR_EL2) & HCR_NV)
+		return false;
+
+	spsr = read_sysreg_el1(SYS_SPSR);
+	mode = spsr & (PSR_MODE_MASK | PSR_MODE32_BIT);
+
+	switch (mode) {
+	case PSR_MODE_EL0t:
+		if (!(vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)))
+			return false;
+		break;
+	case PSR_MODE_EL2t:
+		mode = PSR_MODE_EL1t;
+		break;
+	case PSR_MODE_EL2h:
+		mode = PSR_MODE_EL1h;
+		break;
+	default:
+		return false;
+	}
+
+	spsr = (spsr & ~(PSR_MODE_MASK | PSR_MODE32_BIT)) | mode;
+
+	write_sysreg_el2(spsr, SYS_SPSR);
+	write_sysreg_el2(read_sysreg_el1(SYS_ELR), SYS_ELR);
+
+	return true;
+}
+
 static const exit_handler_fn hyp_exit_handlers[] = {
 	[0 ... ESR_ELx_EC_MAX]		= NULL,
 	[ESR_ELx_EC_CP15_32]		= kvm_hyp_handle_cp15_32,
@@ -216,6 +259,7 @@  static const exit_handler_fn hyp_exit_handlers[] = {
 	[ESR_ELx_EC_DABT_LOW]		= kvm_hyp_handle_dabt_low,
 	[ESR_ELx_EC_WATCHPT_LOW]	= kvm_hyp_handle_watchpt_low,
 	[ESR_ELx_EC_PAC]		= kvm_hyp_handle_ptrauth,
+	[ESR_ELx_EC_ERET]		= kvm_hyp_handle_eret,
 	[ESR_ELx_EC_MOPS]		= kvm_hyp_handle_mops,
 };