diff mbox series

[RFC,v2,13/23] KVM: arm64/sve: Context switch the SVE registers

Message ID 1538141967-15375-14-git-send-email-Dave.Martin@arm.com (mailing list archive)
State New, archived
Headers show
Series KVM: arm64: Initial support for SVE guests | expand

Commit Message

Dave Martin Sept. 28, 2018, 1:39 p.m. UTC
In order to give each vcpu its own view of the SVE registers, this
patch adds context storage via a new sve_state pointer in struct
vcpu_arch.  An additional member sve_max_vl is also added for each
vcpu, to determine the maximum vector length visible to the guest
and thus the value to be configured in ZCR_EL2.LEN while the is
active.  This also determines the layout and size of the storage in
sve_state, which is read and written by the same backend functions
that are used for context-switching the SVE state for host tasks.

On SVE-enabled vcpus, SVE access traps are now handled by switching
in the vcpu's SVE context and disabling the trap before returning
to the guest.  On other vcpus, the trap is not handled and an exit
back to the host occurs, where the handle_sve() fallback path
reflects an undefined instruction exception back to the guest,
consistently with the behaviour of non-SVE-capable hardware (as was
done unconditionally prior to this patch).

No SVE handling is added on non-VHE-only paths, since VHE is an
architectural and Kconfig prerequisite of SVE.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
---

Changes since RFCv1:

 * Add a if_sve () helper macro to efficiently skip or optimise out
   SVE conditional support code for the SVE-unsupported case.  This
   reduces the verbose boilerplate at the affected sites.

 * In the style of sve_pffr(), a vcpu_sve_pffr() helper is added to
   provide the FFR anchor pointer for sve_load_state() in the hyp switch
   code.   This help avoid some open-coded pointer mungeing which is not
   very readable.

 * The condition for calling __hyp_switch_fpsimd() is abstracted for
   better readability.
---
 arch/arm64/include/asm/kvm_host.h |  6 ++++
 arch/arm64/kvm/fpsimd.c           |  5 +--
 arch/arm64/kvm/hyp/switch.c       | 71 ++++++++++++++++++++++++++++++---------
 3 files changed, 65 insertions(+), 17 deletions(-)

Comments

Alex Bennée Nov. 19, 2018, 4:36 p.m. UTC | #1
Dave Martin <Dave.Martin@arm.com> writes:

> In order to give each vcpu its own view of the SVE registers, this
> patch adds context storage via a new sve_state pointer in struct
> vcpu_arch.  An additional member sve_max_vl is also added for each
> vcpu, to determine the maximum vector length visible to the guest
> and thus the value to be configured in ZCR_EL2.LEN while the is
> active.  This also determines the layout and size of the storage in
> sve_state, which is read and written by the same backend functions
> that are used for context-switching the SVE state for host tasks.
>
> On SVE-enabled vcpus, SVE access traps are now handled by switching
> in the vcpu's SVE context and disabling the trap before returning
> to the guest.  On other vcpus, the trap is not handled and an exit
> back to the host occurs, where the handle_sve() fallback path
> reflects an undefined instruction exception back to the guest,
> consistently with the behaviour of non-SVE-capable hardware (as was
> done unconditionally prior to this patch).
>
> No SVE handling is added on non-VHE-only paths, since VHE is an
> architectural and Kconfig prerequisite of SVE.
>
> Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> ---
>
> Changes since RFCv1:
>
>  * Add a if_sve () helper macro to efficiently skip or optimise out
>    SVE conditional support code for the SVE-unsupported case.  This
>    reduces the verbose boilerplate at the affected sites.
>
>  * In the style of sve_pffr(), a vcpu_sve_pffr() helper is added to
>    provide the FFR anchor pointer for sve_load_state() in the hyp switch
>    code.   This help avoid some open-coded pointer mungeing which is not
>    very readable.
>
>  * The condition for calling __hyp_switch_fpsimd() is abstracted for
>    better readability.
> ---
>  arch/arm64/include/asm/kvm_host.h |  6 ++++
>  arch/arm64/kvm/fpsimd.c           |  5 +--
>  arch/arm64/kvm/hyp/switch.c       | 71 ++++++++++++++++++++++++++++++---------
>  3 files changed, 65 insertions(+), 17 deletions(-)
>
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 76cbb95e..8e9cd43 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -210,6 +210,8 @@ typedef struct kvm_cpu_context kvm_cpu_context_t;
>
>  struct kvm_vcpu_arch {
>  	struct kvm_cpu_context ctxt;
> +	void *sve_state;
> +	unsigned int sve_max_vl;
>
>  	/* HYP configuration */
>  	u64 hcr_el2;
> @@ -302,6 +304,10 @@ struct kvm_vcpu_arch {
>  	bool sysregs_loaded_on_cpu;
>  };
>
> +/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
> +#define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \
> +				      sve_ffr_offset((vcpu)->arch.sve_max_vl)))
> +
>  /* vcpu_arch flags field values: */
>  #define KVM_ARM64_DEBUG_DIRTY		(1 << 0)
>  #define KVM_ARM64_FP_ENABLED		(1 << 1) /* guest FP regs loaded */
> diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
> index 29e5585..3474388 100644
> --- a/arch/arm64/kvm/fpsimd.c
> +++ b/arch/arm64/kvm/fpsimd.c
> @@ -86,10 +86,11 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
>
>  	if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
>  		fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.gp_regs.fp_regs,
> -					 NULL, sve_max_vl);
> +					 vcpu->arch.sve_state,
> +					 vcpu->arch.sve_max_vl);
>
>  		clear_thread_flag(TIF_FOREIGN_FPSTATE);
> -		clear_thread_flag(TIF_SVE);
> +		update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu));
>  	}
>  }
>
> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> index 085ed06..9941349 100644
> --- a/arch/arm64/kvm/hyp/switch.c
> +++ b/arch/arm64/kvm/hyp/switch.c
> @@ -98,7 +98,10 @@ static void activate_traps_vhe(struct kvm_vcpu *vcpu)
>  	val = read_sysreg(cpacr_el1);
>  	val |= CPACR_EL1_TTA;
>  	val &= ~CPACR_EL1_ZEN;
> -	if (!update_fp_enabled(vcpu)) {
> +	if (update_fp_enabled(vcpu)) {
> +		if (vcpu_has_sve(vcpu))
> +			val |= CPACR_EL1_ZEN;
> +	} else {
>  		val &= ~CPACR_EL1_FPEN;
>  		__activate_traps_fpsimd32(vcpu);
>  	}
> @@ -332,16 +335,29 @@ static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
>  	}
>  }
>
> -static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
> +/*
> + * if () with a gating check for SVE support to minimise branch
> + * mispredictions in non-SVE systems.
> + * (system_supports_sve() is resolved at build time or via a static key.)
> + */
> +#define if_sve(cond) if (system_supports_sve() && (cond))
> +
> +static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu,
> +					   bool guest_has_sve)
>  {
>  	struct user_fpsimd_state *host_fpsimd = vcpu->arch.host_fpsimd_state;
>
> -	if (has_vhe())
> -		write_sysreg(read_sysreg(cpacr_el1) | CPACR_EL1_FPEN,
> -			     cpacr_el1);
> -	else
> +	if (has_vhe()) {
> +		u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN;
> +
> +		if_sve (guest_has_sve)
> +			reg |= CPACR_EL1_ZEN;
> +
> +		write_sysreg(reg, cpacr_el1);
> +	} else {
>  		write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP,
>  			     cptr_el2);
> +	}
>
>  	isb();
>
> @@ -350,8 +366,7 @@ static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
>  		 * In the SVE case, VHE is assumed: it is enforced by
>  		 * Kconfig and kvm_arch_init().
>  		 */
> -		if (system_supports_sve() &&
> -		    (vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE)) {
> +		if_sve (vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE) {
>  			struct thread_struct *thread = container_of(
>  				host_fpsimd,
>  				struct thread_struct, uw.fpsimd_state);
> @@ -364,11 +379,14 @@ static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
>  		vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
>  	}
>
> -	__fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs);
> -
> -	if (system_supports_sve() &&
> -	    vcpu->arch.flags & KVM_ARM64_GUEST_HAS_SVE)
> +	if_sve (guest_has_sve) {
> +		sve_load_state(vcpu_sve_pffr(vcpu),
> +			       &vcpu->arch.ctxt.gp_regs.fp_regs.fpsr,
> +			       sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1);
>  		write_sysreg_s(vcpu->arch.ctxt.sys_regs[ZCR_EL1], SYS_ZCR_EL12);
> +	} else {
> +		__fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs);
> +	}
>
>  	/* Skip restoring fpexc32 for AArch64 guests */
>  	if (!(read_sysreg(hcr_el2) & HCR_RW))
> @@ -380,6 +398,26 @@ static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
>  	return true;
>  }
>
> +static inline bool __hyp_text __hyp_trap_is_fpsimd(struct kvm_vcpu *vcpu,
> +						   bool guest_has_sve)
> +{
> +
> +	u8 trap_class;
> +
> +	if (!system_supports_fpsimd())
> +		return false;
> +
> +	trap_class = kvm_vcpu_trap_get_class(vcpu);
> +
> +	if (trap_class == ESR_ELx_EC_FP_ASIMD)
> +		return true;
> +
> +	if_sve (guest_has_sve && trap_class == ESR_ELx_EC_SVE)
> +		return true;

Do we really need to check the guest has SVE before believing what the
hardware is telling us? According to the ARM ARM:

For ESR_ELx_EC_FP_ASIMD

  Excludes exceptions resulting from CPACR_EL1 when the value of HCR_EL2.TGE is
  1, or because SVE or Advanced SIMD and floating-point are not implemented. These
  are reported with EC value 0b000000

But also for ESR_ELx_EC_SVE

  Access to SVE functionality trapped as a result of CPACR_EL1.ZEN,
  CPTR_EL2.ZEN, CPTR_EL2.TZ, or CPTR_EL3.EZ, that is not reported using EC
  0b000000. This EC is defined only if SVE is implemented

Given I got confused maybe we need a comment for clarity?

  /* Catch guests without SVE enabled running on SVE capable hardware */

> +
> +	return false;
> +}
> +
>  /*
>   * Return true when we were able to fixup the guest exit and should return to
>   * the guest, false when we should restore the host state and return to the
> @@ -387,6 +425,8 @@ static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
>   */
>  static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
>  {
> +	bool guest_has_sve;
> +
>  	if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
>  		vcpu->arch.fault.esr_el2 = read_sysreg_el2(esr);
>
> @@ -404,10 +444,11 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
>  	 * and restore the guest context lazily.
>  	 * If FP/SIMD is not implemented, handle the trap and inject an
>  	 * undefined instruction exception to the guest.
> +	 * Similarly for trapped SVE accesses.
>  	 */
> -	if (system_supports_fpsimd() &&
> -	    kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_FP_ASIMD)
> -		return __hyp_switch_fpsimd(vcpu);
> +	guest_has_sve = vcpu_has_sve(vcpu);

I'm not sure if it's worth fishing this out here given you are already
passing vcpu down the chain.

> +	if (__hyp_trap_is_fpsimd(vcpu, guest_has_sve))
> +		return __hyp_switch_fpsimd(vcpu, guest_has_sve);
>
>  	if (!__populate_fault_info(vcpu))
>  		return true;

Otherwise:

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

--
Alex Bennée
Dave Martin Nov. 19, 2018, 5:03 p.m. UTC | #2
On Mon, Nov 19, 2018 at 04:36:01PM +0000, Alex Bennée wrote:
> 
> Dave Martin <Dave.Martin@arm.com> writes:
> 
> > In order to give each vcpu its own view of the SVE registers, this
> > patch adds context storage via a new sve_state pointer in struct
> > vcpu_arch.  An additional member sve_max_vl is also added for each
> > vcpu, to determine the maximum vector length visible to the guest
> > and thus the value to be configured in ZCR_EL2.LEN while the is
> > active.  This also determines the layout and size of the storage in
> > sve_state, which is read and written by the same backend functions
> > that are used for context-switching the SVE state for host tasks.
> >
> > On SVE-enabled vcpus, SVE access traps are now handled by switching
> > in the vcpu's SVE context and disabling the trap before returning
> > to the guest.  On other vcpus, the trap is not handled and an exit
> > back to the host occurs, where the handle_sve() fallback path
> > reflects an undefined instruction exception back to the guest,
> > consistently with the behaviour of non-SVE-capable hardware (as was
> > done unconditionally prior to this patch).
> >
> > No SVE handling is added on non-VHE-only paths, since VHE is an
> > architectural and Kconfig prerequisite of SVE.
> >
> > Signed-off-by: Dave Martin <Dave.Martin@arm.com>

[...]

> > diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> > index 085ed06..9941349 100644
> > --- a/arch/arm64/kvm/hyp/switch.c
> > +++ b/arch/arm64/kvm/hyp/switch.c

[...]

> > @@ -380,6 +398,26 @@ static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
> >  	return true;
> >  }
> >
> > +static inline bool __hyp_text __hyp_trap_is_fpsimd(struct kvm_vcpu *vcpu,
> > +						   bool guest_has_sve)
> > +{
> > +
> > +	u8 trap_class;
> > +
> > +	if (!system_supports_fpsimd())
> > +		return false;
> > +
> > +	trap_class = kvm_vcpu_trap_get_class(vcpu);
> > +
> > +	if (trap_class == ESR_ELx_EC_FP_ASIMD)
> > +		return true;
> > +
> > +	if_sve (guest_has_sve && trap_class == ESR_ELx_EC_SVE)
> > +		return true;
> 
> Do we really need to check the guest has SVE before believing what the
> hardware is telling us? According to the ARM ARM:
> 
> For ESR_ELx_EC_FP_ASIMD
> 
>   Excludes exceptions resulting from CPACR_EL1 when the value of HCR_EL2.TGE is
>   1, or because SVE or Advanced SIMD and floating-point are not implemented. These
>   are reported with EC value 0b000000
> 
> But also for ESR_ELx_EC_SVE
> 
>   Access to SVE functionality trapped as a result of CPACR_EL1.ZEN,
>   CPTR_EL2.ZEN, CPTR_EL2.TZ, or CPTR_EL3.EZ, that is not reported using EC
>   0b000000. This EC is defined only if SVE is implemented
> 
> Given I got confused maybe we need a comment for clarity?

This is not about not trusting the value ESR_ELx_EC_SVE on older
hardware: in effect it is retrospectively reserved for this purpose on
all older arch versions, so there is no ambiguity about what it means.
It should never be observed on hardware that doesn't have SVE.

Rather, how we handle this trap differs depending on whether the guest
is SVE-enabled or not.  If not, then this trap is handled by the generic
fallback path for unhandled guest traps, so we don't check for this
particular EC value explicitly in that case.

>   /* Catch guests without SVE enabled running on SVE capable hardware */

I might write something like:

	/*
	 * For sve-enmabled guests only, handle SVE access via FPSIMD
	 * context handling code.
	 */

Does that make sense?  I may have misunderstood your concern here.

[...]

> > @@ -387,6 +425,8 @@ static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
> >   */
> >  static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
> >  {
> > +	bool guest_has_sve;
> > +
> >  	if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
> >  		vcpu->arch.fault.esr_el2 = read_sysreg_el2(esr);
> >
> > @@ -404,10 +444,11 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
> >  	 * and restore the guest context lazily.
> >  	 * If FP/SIMD is not implemented, handle the trap and inject an
> >  	 * undefined instruction exception to the guest.
> > +	 * Similarly for trapped SVE accesses.
> >  	 */
> > -	if (system_supports_fpsimd() &&
> > -	    kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_FP_ASIMD)
> > -		return __hyp_switch_fpsimd(vcpu);
> > +	guest_has_sve = vcpu_has_sve(vcpu);
> 
> I'm not sure if it's worth fishing this out here given you are already
> passing vcpu down the chain.

I wanted to discourage GCC from recomputing this.  If you're in a
position to do so, can you look at the disassembly with/without this
factored out and see whether it makes a difference?

> 
> > +	if (__hyp_trap_is_fpsimd(vcpu, guest_has_sve))
> > +		return __hyp_switch_fpsimd(vcpu, guest_has_sve);
> >
> >  	if (!__populate_fault_info(vcpu))
> >  		return true;
> 
> Otherwise:
> 
> Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

Thanks
---Dave
Alex Bennée Nov. 20, 2018, 12:25 p.m. UTC | #3
Dave Martin <Dave.Martin@arm.com> writes:

> On Mon, Nov 19, 2018 at 04:36:01PM +0000, Alex Bennée wrote:
>>
>> Dave Martin <Dave.Martin@arm.com> writes:
>>
>> > In order to give each vcpu its own view of the SVE registers, this
>> > patch adds context storage via a new sve_state pointer in struct
>> > vcpu_arch.  An additional member sve_max_vl is also added for each
>> > vcpu, to determine the maximum vector length visible to the guest
>> > and thus the value to be configured in ZCR_EL2.LEN while the is
>> > active.  This also determines the layout and size of the storage in
>> > sve_state, which is read and written by the same backend functions
>> > that are used for context-switching the SVE state for host tasks.
>> >
>> > On SVE-enabled vcpus, SVE access traps are now handled by switching
>> > in the vcpu's SVE context and disabling the trap before returning
>> > to the guest.  On other vcpus, the trap is not handled and an exit
>> > back to the host occurs, where the handle_sve() fallback path
>> > reflects an undefined instruction exception back to the guest,
>> > consistently with the behaviour of non-SVE-capable hardware (as was
>> > done unconditionally prior to this patch).
>> >
>> > No SVE handling is added on non-VHE-only paths, since VHE is an
>> > architectural and Kconfig prerequisite of SVE.
>> >
>> > Signed-off-by: Dave Martin <Dave.Martin@arm.com>
>
> [...]
>
>> > diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
>> > index 085ed06..9941349 100644
>> > --- a/arch/arm64/kvm/hyp/switch.c
>> > +++ b/arch/arm64/kvm/hyp/switch.c
>
> [...]
>
>> > @@ -380,6 +398,26 @@ static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
>> >  	return true;
>> >  }
>> >
>> > +static inline bool __hyp_text __hyp_trap_is_fpsimd(struct kvm_vcpu *vcpu,
>> > +						   bool guest_has_sve)
>> > +{
>> > +
>> > +	u8 trap_class;
>> > +
>> > +	if (!system_supports_fpsimd())
>> > +		return false;
>> > +
>> > +	trap_class = kvm_vcpu_trap_get_class(vcpu);
>> > +
>> > +	if (trap_class == ESR_ELx_EC_FP_ASIMD)
>> > +		return true;
>> > +
>> > +	if_sve (guest_has_sve && trap_class == ESR_ELx_EC_SVE)
>> > +		return true;
>>
>> Do we really need to check the guest has SVE before believing what the
>> hardware is telling us? According to the ARM ARM:
>>
>> For ESR_ELx_EC_FP_ASIMD
>>
>>   Excludes exceptions resulting from CPACR_EL1 when the value of HCR_EL2.TGE is
>>   1, or because SVE or Advanced SIMD and floating-point are not implemented. These
>>   are reported with EC value 0b000000
>>
>> But also for ESR_ELx_EC_SVE
>>
>>   Access to SVE functionality trapped as a result of CPACR_EL1.ZEN,
>>   CPTR_EL2.ZEN, CPTR_EL2.TZ, or CPTR_EL3.EZ, that is not reported using EC
>>   0b000000. This EC is defined only if SVE is implemented
>>
>> Given I got confused maybe we need a comment for clarity?
>
> This is not about not trusting the value ESR_ELx_EC_SVE on older
> hardware: in effect it is retrospectively reserved for this purpose on
> all older arch versions, so there is no ambiguity about what it means.
> It should never be observed on hardware that doesn't have SVE.
>
> Rather, how we handle this trap differs depending on whether the guest
> is SVE-enabled or not.  If not, then this trap is handled by the generic
> fallback path for unhandled guest traps, so we don't check for this
> particular EC value explicitly in that case.
>
>>   /* Catch guests without SVE enabled running on SVE capable hardware */
>
> I might write something like:
>
> 	/*
> 	 * For sve-enmabled guests only, handle SVE access via FPSIMD
> 	 * context handling code.
> 	 */
>
> Does that make sense?  I may have misunderstood your concern here.

s/enmabled/enabled/ but yeah that's fine.

>
> [...]
>
>> > @@ -387,6 +425,8 @@ static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
>> >   */
>> >  static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
>> >  {
>> > +	bool guest_has_sve;
>> > +
>> >  	if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
>> >  		vcpu->arch.fault.esr_el2 = read_sysreg_el2(esr);
>> >
>> > @@ -404,10 +444,11 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
>> >  	 * and restore the guest context lazily.
>> >  	 * If FP/SIMD is not implemented, handle the trap and inject an
>> >  	 * undefined instruction exception to the guest.
>> > +	 * Similarly for trapped SVE accesses.
>> >  	 */
>> > -	if (system_supports_fpsimd() &&
>> > -	    kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_FP_ASIMD)
>> > -		return __hyp_switch_fpsimd(vcpu);
>> > +	guest_has_sve = vcpu_has_sve(vcpu);
>>
>> I'm not sure if it's worth fishing this out here given you are already
>> passing vcpu down the chain.
>
> I wanted to discourage GCC from recomputing this.  If you're in a
> position to do so, can you look at the disassembly with/without this
> factored out and see whether it makes a difference?

Hmm it is hard to tell. There is code motion but for some reason I'm
seeing the static jump code unrolled, for example (original on left):

__hyp_switch_fpsimd():                                                                  __hyp_switch_fpsimd():
/home/alex/lsrc/kvm/linux.git/arch/arm64/kvm/hyp/switch.----:382                      | /home/alex/lsrc/kvm/linux.git/arch/arm64/kvm/hyp/switch.----:381
                                                                                      >  ----:  tst     w0, #0x400000
                                                                                      >  ----:  b.eq    22c <fixup_guest_exit+0x1a4>  // b.none
                                                                                      > arch_static_branch_jump():
                                                                                      > /home/alex/lsrc/kvm/linux.git/arch/arm64/include/asm/jump_label.h:45
                                                                                      >  ----:  b       38c <fixup_guest_exit+0x304>
                                                                                      > arch_static_branch():
                                                                                      > /home/alex/lsrc/kvm/linux.git/arch/arm64/include/asm/jump_label.h:31
                                                                                      >  ----:  nop
                                                                                      >  ----:  b       22c <fixup_guest_exit+0x1a4>
                                                                                      > test_bit():
                                                                                      > /home/alex/lsrc/kvm/linux.git/include/asm-generic/bitops/non-atomic.h:106
                                                                                      >  ----:  adrp    x0, 0 <cpu_hwcaps>
                                                                                      >  ----:  ldr     x0, [x0]
                                                                                      > __hyp_switch_fpsimd():
                                                                                      > /home/alex/lsrc/kvm/linux.git/arch/arm64/kvm/hyp/switch.----:381
 ----:  tst     w0, #0x400000                                                            ----:  tst     w0, #0x400000
 ----:  b.eq    238 <fixup_guest_exit+0x1b0>  // b.none                               |  ----:  b.eq    22c <fixup_guest_exit+0x1a4>  // b.none
 ----:  cbz     w21, 238 <fixup_guest_exit+0x1b0>                                     |  ----:  tbz     w2, #5, 22c <fixup_guest_exit+0x1a4>
/home/alex/lsrc/kvm/linux.git/arch/arm64/kvm/hyp/switch.----:383                      | /home/alex/lsrc/kvm/linux.git/arch/arm64/kvm/hyp/switch.----:382
 ----:  ldr     w2, [x19, #2040]                                                      |  ----:  ldr     w2, [x20, #2040]
 ----:  add     x1, x19, #0x4b0                                                       |  ----:  add     x1, x20, #0x4b0
 ----:  ldr     x0, [x19, #2032]                                                      |  ----:  ldr     x0, [x20, #2032]
sve_ffr_offset():                                                                       sve_ffr_offset():

Put calculating guest_has_sve at the top of __hyp_switch_fpsimd make
most of that go away and just moves things around a little bit. So I
guess it could makes sense for the fast(ish) path although I'd be
interested in knowing if it made any real difference to the numbers.
After all the first read should be well cached and moving it through the
stack is just additional memory and register pressure.

>>
>> > +	if (__hyp_trap_is_fpsimd(vcpu, guest_has_sve))
>> > +		return __hyp_switch_fpsimd(vcpu, guest_has_sve);
>> >
>> >  	if (!__populate_fault_info(vcpu))
>> >  		return true;
>>
>> Otherwise:
>>
>> Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
>
> Thanks
> ---Dave


--
Alex Bennée
Dave Martin Nov. 20, 2018, 2:17 p.m. UTC | #4
On Tue, Nov 20, 2018 at 12:25:12PM +0000, Alex Bennée wrote:
> 
> Dave Martin <Dave.Martin@arm.com> writes:
> 
> > On Mon, Nov 19, 2018 at 04:36:01PM +0000, Alex Bennée wrote:
> >>
> >> Dave Martin <Dave.Martin@arm.com> writes:
> >>
> >> > In order to give each vcpu its own view of the SVE registers, this
> >> > patch adds context storage via a new sve_state pointer in struct
> >> > vcpu_arch.  An additional member sve_max_vl is also added for each
> >> > vcpu, to determine the maximum vector length visible to the guest
> >> > and thus the value to be configured in ZCR_EL2.LEN while the is
> >> > active.  This also determines the layout and size of the storage in
> >> > sve_state, which is read and written by the same backend functions
> >> > that are used for context-switching the SVE state for host tasks.
> >> >
> >> > On SVE-enabled vcpus, SVE access traps are now handled by switching
> >> > in the vcpu's SVE context and disabling the trap before returning
> >> > to the guest.  On other vcpus, the trap is not handled and an exit
> >> > back to the host occurs, where the handle_sve() fallback path
> >> > reflects an undefined instruction exception back to the guest,
> >> > consistently with the behaviour of non-SVE-capable hardware (as was
> >> > done unconditionally prior to this patch).
> >> >
> >> > No SVE handling is added on non-VHE-only paths, since VHE is an
> >> > architectural and Kconfig prerequisite of SVE.
> >> >
> >> > Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> >
> > [...]
> >
> >> > diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> >> > index 085ed06..9941349 100644
> >> > --- a/arch/arm64/kvm/hyp/switch.c
> >> > +++ b/arch/arm64/kvm/hyp/switch.c
> >
> > [...]
> >
> >> > @@ -380,6 +398,26 @@ static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
> >> >  	return true;
> >> >  }
> >> >
> >> > +static inline bool __hyp_text __hyp_trap_is_fpsimd(struct kvm_vcpu *vcpu,
> >> > +						   bool guest_has_sve)
> >> > +{
> >> > +
> >> > +	u8 trap_class;
> >> > +
> >> > +	if (!system_supports_fpsimd())
> >> > +		return false;
> >> > +
> >> > +	trap_class = kvm_vcpu_trap_get_class(vcpu);
> >> > +
> >> > +	if (trap_class == ESR_ELx_EC_FP_ASIMD)
> >> > +		return true;
> >> > +
> >> > +	if_sve (guest_has_sve && trap_class == ESR_ELx_EC_SVE)
> >> > +		return true;
> >>
> >> Do we really need to check the guest has SVE before believing what the
> >> hardware is telling us? According to the ARM ARM:
> >>
> >> For ESR_ELx_EC_FP_ASIMD
> >>
> >>   Excludes exceptions resulting from CPACR_EL1 when the value of HCR_EL2.TGE is
> >>   1, or because SVE or Advanced SIMD and floating-point are not implemented. These
> >>   are reported with EC value 0b000000
> >>
> >> But also for ESR_ELx_EC_SVE
> >>
> >>   Access to SVE functionality trapped as a result of CPACR_EL1.ZEN,
> >>   CPTR_EL2.ZEN, CPTR_EL2.TZ, or CPTR_EL3.EZ, that is not reported using EC
> >>   0b000000. This EC is defined only if SVE is implemented
> >>
> >> Given I got confused maybe we need a comment for clarity?
> >
> > This is not about not trusting the value ESR_ELx_EC_SVE on older
> > hardware: in effect it is retrospectively reserved for this purpose on
> > all older arch versions, so there is no ambiguity about what it means.
> > It should never be observed on hardware that doesn't have SVE.
> >
> > Rather, how we handle this trap differs depending on whether the guest
> > is SVE-enabled or not.  If not, then this trap is handled by the generic
> > fallback path for unhandled guest traps, so we don't check for this
> > particular EC value explicitly in that case.
> >
> >>   /* Catch guests without SVE enabled running on SVE capable hardware */
> >
> > I might write something like:
> >
> > 	/*
> > 	 * For sve-enmabled guests only, handle SVE access via FPSIMD
> > 	 * context handling code.
> > 	 */
> >
> > Does that make sense?  I may have misunderstood your concern here.
> 
> s/enmabled/enabled/ but yeah that's fine.

Well spotted... I guess I was in a hurry.

> > [...]
> >
> >> > @@ -387,6 +425,8 @@ static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
> >> >   */
> >> >  static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
> >> >  {
> >> > +	bool guest_has_sve;
> >> > +
> >> >  	if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
> >> >  		vcpu->arch.fault.esr_el2 = read_sysreg_el2(esr);
> >> >
> >> > @@ -404,10 +444,11 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
> >> >  	 * and restore the guest context lazily.
> >> >  	 * If FP/SIMD is not implemented, handle the trap and inject an
> >> >  	 * undefined instruction exception to the guest.
> >> > +	 * Similarly for trapped SVE accesses.
> >> >  	 */
> >> > -	if (system_supports_fpsimd() &&
> >> > -	    kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_FP_ASIMD)
> >> > -		return __hyp_switch_fpsimd(vcpu);
> >> > +	guest_has_sve = vcpu_has_sve(vcpu);
> >>
> >> I'm not sure if it's worth fishing this out here given you are already
> >> passing vcpu down the chain.
> >
> > I wanted to discourage GCC from recomputing this.  If you're in a
> > position to do so, can you look at the disassembly with/without this
> > factored out and see whether it makes a difference?
> 
> Hmm it is hard to tell. There is code motion but for some reason I'm
> seeing the static jump code unrolled, for example (original on left):
> 
> __hyp_switch_fpsimd():                                                                  __hyp_switch_fpsimd():
> /home/alex/lsrc/kvm/linux.git/arch/arm64/kvm/hyp/switch.----:382                      | /home/alex/lsrc/kvm/linux.git/arch/arm64/kvm/hyp/switch.----:381
>                                                                                       >  ----:  tst     w0, #0x400000
>                                                                                       >  ----:  b.eq    22c <fixup_guest_exit+0x1a4>  // b.none
>                                                                                       > arch_static_branch_jump():
>                                                                                       > /home/alex/lsrc/kvm/linux.git/arch/arm64/include/asm/jump_label.h:45
>                                                                                       >  ----:  b       38c <fixup_guest_exit+0x304>
>                                                                                       > arch_static_branch():
>                                                                                       > /home/alex/lsrc/kvm/linux.git/arch/arm64/include/asm/jump_label.h:31
>                                                                                       >  ----:  nop
>                                                                                       >  ----:  b       22c <fixup_guest_exit+0x1a4>
>                                                                                       > test_bit():
>                                                                                       > /home/alex/lsrc/kvm/linux.git/include/asm-generic/bitops/non-atomic.h:106
>                                                                                       >  ----:  adrp    x0, 0 <cpu_hwcaps>
>                                                                                       >  ----:  ldr     x0, [x0]
>                                                                                       > __hyp_switch_fpsimd():
>                                                                                       > /home/alex/lsrc/kvm/linux.git/arch/arm64/kvm/hyp/switch.----:381
>  ----:  tst     w0, #0x400000                                                            ----:  tst     w0, #0x400000
>  ----:  b.eq    238 <fixup_guest_exit+0x1b0>  // b.none                               |  ----:  b.eq    22c <fixup_guest_exit+0x1a4>  // b.none
>  ----:  cbz     w21, 238 <fixup_guest_exit+0x1b0>                                     |  ----:  tbz     w2, #5, 22c <fixup_guest_exit+0x1a4>
> /home/alex/lsrc/kvm/linux.git/arch/arm64/kvm/hyp/switch.----:383                      | /home/alex/lsrc/kvm/linux.git/arch/arm64/kvm/hyp/switch.----:382
>  ----:  ldr     w2, [x19, #2040]                                                      |  ----:  ldr     w2, [x20, #2040]
>  ----:  add     x1, x19, #0x4b0                                                       |  ----:  add     x1, x20, #0x4b0
>  ----:  ldr     x0, [x19, #2032]                                                      |  ----:  ldr     x0, [x20, #2032]
> sve_ffr_offset():                                                                       sve_ffr_offset():
> 
> Put calculating guest_has_sve at the top of __hyp_switch_fpsimd make
> most of that go away and just moves things around a little bit. So I
> guess it could makes sense for the fast(ish) path although I'd be
> interested in knowing if it made any real difference to the numbers.
> After all the first read should be well cached and moving it through the
> stack is just additional memory and register pressure.

Hmmm, I will have a think about this when I respin.

Explicitly caching guest_has_sve() does reduce the compiler's freedom to
optimise.

We might be able to mark it as __pure or __attribute_const__ to enable
the compiler to decide whether to cache the result, but this may not be
100% safe.

Part of me would prefer to leave things as they are to avoid the risk of
breaking the code again...

Cheers
---Dave
Alex Bennée Nov. 20, 2018, 3:30 p.m. UTC | #5
Dave Martin <Dave.Martin@arm.com> writes:

<snip>
>> >> > @@ -404,10 +444,11 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
>> >> >  	 * and restore the guest context lazily.
>> >> >  	 * If FP/SIMD is not implemented, handle the trap and inject an
>> >> >  	 * undefined instruction exception to the guest.
>> >> > +	 * Similarly for trapped SVE accesses.
>> >> >  	 */
>> >> > -	if (system_supports_fpsimd() &&
>> >> > -	    kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_FP_ASIMD)
>> >> > -		return __hyp_switch_fpsimd(vcpu);
>> >> > +	guest_has_sve = vcpu_has_sve(vcpu);
>> >>
>> >> I'm not sure if it's worth fishing this out here given you are already
>> >> passing vcpu down the chain.
>> >
>> > I wanted to discourage GCC from recomputing this.  If you're in a
>> > position to do so, can you look at the disassembly with/without this
>> > factored out and see whether it makes a difference?
>>
>> Hmm it is hard to tell. There is code motion but for some reason I'm
>> seeing the static jump code unrolled, for example (original on left):
>>
>> __hyp_switch_fpsimd():                                                                  __hyp_switch_fpsimd():
>> /home/alex/lsrc/kvm/linux.git/arch/arm64/kvm/hyp/switch.----:382                      | /home/alex/lsrc/kvm/linux.git/arch/arm64/kvm/hyp/switch.----:381
>>                                                                                       >  ----:  tst     w0, #0x400000
>>                                                                                       >  ----:  b.eq    22c <fixup_guest_exit+0x1a4>  // b.none
>>                                                                                       > arch_static_branch_jump():
>>                                                                                       > /home/alex/lsrc/kvm/linux.git/arch/arm64/include/asm/jump_label.h:45
>>                                                                                       >  ----:  b       38c <fixup_guest_exit+0x304>
>>                                                                                       > arch_static_branch():
>>                                                                                       > /home/alex/lsrc/kvm/linux.git/arch/arm64/include/asm/jump_label.h:31
>>                                                                                       >  ----:  nop
>>                                                                                       >  ----:  b       22c <fixup_guest_exit+0x1a4>
>>                                                                                       > test_bit():
>>                                                                                       > /home/alex/lsrc/kvm/linux.git/include/asm-generic/bitops/non-atomic.h:106
>>                                                                                       >  ----:  adrp    x0, 0 <cpu_hwcaps>
>>                                                                                       >  ----:  ldr     x0, [x0]
>>                                                                                       > __hyp_switch_fpsimd():
>>                                                                                       > /home/alex/lsrc/kvm/linux.git/arch/arm64/kvm/hyp/switch.----:381
>>  ----:  tst     w0, #0x400000                                                            ----:  tst     w0, #0x400000
>>  ----:  b.eq    238 <fixup_guest_exit+0x1b0>  // b.none                               |  ----:  b.eq    22c <fixup_guest_exit+0x1a4>  // b.none
>>  ----:  cbz     w21, 238 <fixup_guest_exit+0x1b0>                                     |  ----:  tbz     w2, #5, 22c <fixup_guest_exit+0x1a4>
>> /home/alex/lsrc/kvm/linux.git/arch/arm64/kvm/hyp/switch.----:383                      | /home/alex/lsrc/kvm/linux.git/arch/arm64/kvm/hyp/switch.----:382
>>  ----:  ldr     w2, [x19, #2040]                                                      |  ----:  ldr     w2, [x20, #2040]
>>  ----:  add     x1, x19, #0x4b0                                                       |  ----:  add     x1, x20, #0x4b0
>>  ----:  ldr     x0, [x19, #2032]                                                      |  ----:  ldr     x0, [x20, #2032]
>> sve_ffr_offset():                                                                       sve_ffr_offset():
>>
>> Put calculating guest_has_sve at the top of __hyp_switch_fpsimd make
>> most of that go away and just moves things around a little bit. So I
>> guess it could makes sense for the fast(ish) path although I'd be
>> interested in knowing if it made any real difference to the numbers.
>> After all the first read should be well cached and moving it through the
>> stack is just additional memory and register pressure.
>
> Hmmm, I will have a think about this when I respin.
>
> Explicitly caching guest_has_sve() does reduce the compiler's freedom to
> optimise.
>
> We might be able to mark it as __pure or __attribute_const__ to enable
> the compiler to decide whether to cache the result, but this may not be
> 100% safe.
>
> Part of me would prefer to leave things as they are to avoid the risk of
> breaking the code again...

Given that the only place you call __hyp_switch_fpsimd is here you could
just roll in into __hyp_trap_is_fpsimd and have:

	if (__hyp_trap_is_fpsimd(vcpu))
		return true;

--
Alex Bennée
Dave Martin Nov. 20, 2018, 5:18 p.m. UTC | #6
On Tue, Nov 20, 2018 at 03:30:29PM +0000, Alex Bennée wrote:
> 
> Dave Martin <Dave.Martin@arm.com> writes:

[...]

> >> Put calculating guest_has_sve at the top of __hyp_switch_fpsimd make
> >> most of that go away and just moves things around a little bit. So I
> >> guess it could makes sense for the fast(ish) path although I'd be
> >> interested in knowing if it made any real difference to the numbers.
> >> After all the first read should be well cached and moving it through the
> >> stack is just additional memory and register pressure.
> >
> > Hmmm, I will have a think about this when I respin.
> >
> > Explicitly caching guest_has_sve() does reduce the compiler's freedom to
> > optimise.
> >
> > We might be able to mark it as __pure or __attribute_const__ to enable
> > the compiler to decide whether to cache the result, but this may not be
> > 100% safe.
> >
> > Part of me would prefer to leave things as they are to avoid the risk of
> > breaking the code again...
> 
> Given that the only place you call __hyp_switch_fpsimd is here you could
> just roll in into __hyp_trap_is_fpsimd and have:
> 
> 	if (__hyp_trap_is_fpsimd(vcpu))
> 		return true;

Possibly, though the function should be renamed in this case, something
like __hyp_handle_fpsimd_trap() I guess.

Cheers
---Dave
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 76cbb95e..8e9cd43 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -210,6 +210,8 @@  typedef struct kvm_cpu_context kvm_cpu_context_t;
 
 struct kvm_vcpu_arch {
 	struct kvm_cpu_context ctxt;
+	void *sve_state;
+	unsigned int sve_max_vl;
 
 	/* HYP configuration */
 	u64 hcr_el2;
@@ -302,6 +304,10 @@  struct kvm_vcpu_arch {
 	bool sysregs_loaded_on_cpu;
 };
 
+/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
+#define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \
+				      sve_ffr_offset((vcpu)->arch.sve_max_vl)))
+
 /* vcpu_arch flags field values: */
 #define KVM_ARM64_DEBUG_DIRTY		(1 << 0)
 #define KVM_ARM64_FP_ENABLED		(1 << 1) /* guest FP regs loaded */
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 29e5585..3474388 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -86,10 +86,11 @@  void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
 
 	if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
 		fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.gp_regs.fp_regs,
-					 NULL, sve_max_vl);
+					 vcpu->arch.sve_state,
+					 vcpu->arch.sve_max_vl);
 
 		clear_thread_flag(TIF_FOREIGN_FPSTATE);
-		clear_thread_flag(TIF_SVE);
+		update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu));
 	}
 }
 
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 085ed06..9941349 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -98,7 +98,10 @@  static void activate_traps_vhe(struct kvm_vcpu *vcpu)
 	val = read_sysreg(cpacr_el1);
 	val |= CPACR_EL1_TTA;
 	val &= ~CPACR_EL1_ZEN;
-	if (!update_fp_enabled(vcpu)) {
+	if (update_fp_enabled(vcpu)) {
+		if (vcpu_has_sve(vcpu))
+			val |= CPACR_EL1_ZEN;
+	} else {
 		val &= ~CPACR_EL1_FPEN;
 		__activate_traps_fpsimd32(vcpu);
 	}
@@ -332,16 +335,29 @@  static bool __hyp_text __skip_instr(struct kvm_vcpu *vcpu)
 	}
 }
 
-static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
+/*
+ * if () with a gating check for SVE support to minimise branch
+ * mispredictions in non-SVE systems.
+ * (system_supports_sve() is resolved at build time or via a static key.)
+ */
+#define if_sve(cond) if (system_supports_sve() && (cond))
+
+static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu,
+					   bool guest_has_sve)
 {
 	struct user_fpsimd_state *host_fpsimd = vcpu->arch.host_fpsimd_state;
 
-	if (has_vhe())
-		write_sysreg(read_sysreg(cpacr_el1) | CPACR_EL1_FPEN,
-			     cpacr_el1);
-	else
+	if (has_vhe()) {
+		u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN;
+
+		if_sve (guest_has_sve)
+			reg |= CPACR_EL1_ZEN;
+
+		write_sysreg(reg, cpacr_el1);
+	} else {
 		write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP,
 			     cptr_el2);
+	}
 
 	isb();
 
@@ -350,8 +366,7 @@  static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
 		 * In the SVE case, VHE is assumed: it is enforced by
 		 * Kconfig and kvm_arch_init().
 		 */
-		if (system_supports_sve() &&
-		    (vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE)) {
+		if_sve (vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE) {
 			struct thread_struct *thread = container_of(
 				host_fpsimd,
 				struct thread_struct, uw.fpsimd_state);
@@ -364,11 +379,14 @@  static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
 		vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
 	}
 
-	__fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs);
-
-	if (system_supports_sve() &&
-	    vcpu->arch.flags & KVM_ARM64_GUEST_HAS_SVE)
+	if_sve (guest_has_sve) {
+		sve_load_state(vcpu_sve_pffr(vcpu),
+			       &vcpu->arch.ctxt.gp_regs.fp_regs.fpsr,
+			       sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1);
 		write_sysreg_s(vcpu->arch.ctxt.sys_regs[ZCR_EL1], SYS_ZCR_EL12);
+	} else {
+		__fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs);
+	}
 
 	/* Skip restoring fpexc32 for AArch64 guests */
 	if (!(read_sysreg(hcr_el2) & HCR_RW))
@@ -380,6 +398,26 @@  static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
 	return true;
 }
 
+static inline bool __hyp_text __hyp_trap_is_fpsimd(struct kvm_vcpu *vcpu,
+						   bool guest_has_sve)
+{
+
+	u8 trap_class;
+
+	if (!system_supports_fpsimd())
+		return false;
+
+	trap_class = kvm_vcpu_trap_get_class(vcpu);
+
+	if (trap_class == ESR_ELx_EC_FP_ASIMD)
+		return true;
+
+	if_sve (guest_has_sve && trap_class == ESR_ELx_EC_SVE)
+		return true;
+
+	return false;
+}
+
 /*
  * Return true when we were able to fixup the guest exit and should return to
  * the guest, false when we should restore the host state and return to the
@@ -387,6 +425,8 @@  static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
  */
 static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
 {
+	bool guest_has_sve;
+
 	if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
 		vcpu->arch.fault.esr_el2 = read_sysreg_el2(esr);
 
@@ -404,10 +444,11 @@  static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
 	 * and restore the guest context lazily.
 	 * If FP/SIMD is not implemented, handle the trap and inject an
 	 * undefined instruction exception to the guest.
+	 * Similarly for trapped SVE accesses.
 	 */
-	if (system_supports_fpsimd() &&
-	    kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_FP_ASIMD)
-		return __hyp_switch_fpsimd(vcpu);
+	guest_has_sve = vcpu_has_sve(vcpu);
+	if (__hyp_trap_is_fpsimd(vcpu, guest_has_sve))
+		return __hyp_switch_fpsimd(vcpu, guest_has_sve);
 
 	if (!__populate_fault_info(vcpu))
 		return true;