diff mbox series

[v7,13/27] KVM: arm64/sve: Context switch the SVE registers

Message ID 1553864452-15080-14-git-send-email-Dave.Martin@arm.com (mailing list archive)
State New, archived
Headers show
Series KVM: arm64: SVE guest support | expand

Commit Message

Dave Martin March 29, 2019, 1 p.m. UTC
In order to give each vcpu its own view of the SVE registers, this
patch adds context storage via a new sve_state pointer in struct
vcpu_arch.  An additional member sve_max_vl is also added for each
vcpu, to determine the maximum vector length visible to the guest
and thus the value to be configured in ZCR_EL2.LEN while the vcpu
is active.  This also determines the layout and size of the storage
in sve_state, which is read and written by the same backend
functions that are used for context-switching the SVE state for
host tasks.

On SVE-enabled vcpus, SVE access traps are now handled by switching
in the vcpu's SVE context and disabling the trap before returning
to the guest.  On other vcpus, the trap is not handled and an exit
back to the host occurs, where the handle_sve() fallback path
reflects an undefined instruction exception back to the guest,
consistently with the behaviour of non-SVE-capable hardware (as was
done unconditionally prior to this patch).

No SVE handling is added on non-VHE-only paths, since VHE is an
architectural and Kconfig prerequisite of SVE.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Reviewed-by: Julien Thierry <julien.thierry@arm.com>
Tested-by: zhang.lei <zhang.lei@jp.fujitsu.com>

---

Changes since v5:

 * [Julien Thierry, Julien Grall] Commit message typo fixes

 * [Mark Rutland] Rename trap_class to hsr_ec, for consistency with
   existing code.

 * [Mark Rutland] Simplify condition for refusing to handle an
   FPSIMD/SVE trap, using multiple if () statements for clarity.  The
   previous condition was a bit tortuous, and how that the static_key
   checks have been hoisted out, it makes little difference to the
   compiler how we express the condition here.
---
 arch/arm64/include/asm/kvm_host.h |  6 ++++
 arch/arm64/kvm/fpsimd.c           |  5 +--
 arch/arm64/kvm/hyp/switch.c       | 75 +++++++++++++++++++++++++++++----------
 3 files changed, 66 insertions(+), 20 deletions(-)

Comments

Andrew Jones April 3, 2019, 8:01 p.m. UTC | #1
On Fri, Mar 29, 2019 at 01:00:38PM +0000, Dave Martin wrote:
> In order to give each vcpu its own view of the SVE registers, this
> patch adds context storage via a new sve_state pointer in struct
> vcpu_arch.  An additional member sve_max_vl is also added for each
> vcpu, to determine the maximum vector length visible to the guest
> and thus the value to be configured in ZCR_EL2.LEN while the vcpu
> is active.  This also determines the layout and size of the storage
> in sve_state, which is read and written by the same backend
> functions that are used for context-switching the SVE state for
> host tasks.
> 
> On SVE-enabled vcpus, SVE access traps are now handled by switching
> in the vcpu's SVE context and disabling the trap before returning
> to the guest.  On other vcpus, the trap is not handled and an exit
> back to the host occurs, where the handle_sve() fallback path
> reflects an undefined instruction exception back to the guest,
> consistently with the behaviour of non-SVE-capable hardware (as was
> done unconditionally prior to this patch).
> 
> No SVE handling is added on non-VHE-only paths, since VHE is an
> architectural and Kconfig prerequisite of SVE.
> 
> Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> Reviewed-by: Julien Thierry <julien.thierry@arm.com>
> Tested-by: zhang.lei <zhang.lei@jp.fujitsu.com>
> 
> ---
> 
> Changes since v5:
> 
>  * [Julien Thierry, Julien Grall] Commit message typo fixes
> 
>  * [Mark Rutland] Rename trap_class to hsr_ec, for consistency with
>    existing code.
> 
>  * [Mark Rutland] Simplify condition for refusing to handle an
>    FPSIMD/SVE trap, using multiple if () statements for clarity.  The
>    previous condition was a bit tortuous, and how that the static_key
>    checks have been hoisted out, it makes little difference to the
>    compiler how we express the condition here.
> ---
>  arch/arm64/include/asm/kvm_host.h |  6 ++++
>  arch/arm64/kvm/fpsimd.c           |  5 +--
>  arch/arm64/kvm/hyp/switch.c       | 75 +++++++++++++++++++++++++++++----------
>  3 files changed, 66 insertions(+), 20 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 22cf484..4fabfd2 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -228,6 +228,8 @@ struct vcpu_reset_state {
>  
>  struct kvm_vcpu_arch {
>  	struct kvm_cpu_context ctxt;
> +	void *sve_state;
> +	unsigned int sve_max_vl;
>  
>  	/* HYP configuration */
>  	u64 hcr_el2;
> @@ -323,6 +325,10 @@ struct kvm_vcpu_arch {
>  	bool sysregs_loaded_on_cpu;
>  };
>  
> +/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
> +#define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \
> +				      sve_ffr_offset((vcpu)->arch.sve_max_vl)))

Maybe an inline function instead?

> +
>  /* vcpu_arch flags field values: */
>  #define KVM_ARM64_DEBUG_DIRTY		(1 << 0)
>  #define KVM_ARM64_FP_ENABLED		(1 << 1) /* guest FP regs loaded */
> diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
> index 7053bf4..6e3c9c8 100644
> --- a/arch/arm64/kvm/fpsimd.c
> +++ b/arch/arm64/kvm/fpsimd.c
> @@ -87,10 +87,11 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
>  
>  	if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
>  		fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.gp_regs.fp_regs,
> -					 NULL, SVE_VL_MIN);
> +					 vcpu->arch.sve_state,
> +					 vcpu->arch.sve_max_vl);
>  
>  		clear_thread_flag(TIF_FOREIGN_FPSTATE);
> -		clear_thread_flag(TIF_SVE);
> +		update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu));
>  	}
>  }
>  
> diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
> index 9d46066..5444b9c 100644
> --- a/arch/arm64/kvm/hyp/switch.c
> +++ b/arch/arm64/kvm/hyp/switch.c
> @@ -100,7 +100,10 @@ static void activate_traps_vhe(struct kvm_vcpu *vcpu)
>  	val = read_sysreg(cpacr_el1);
>  	val |= CPACR_EL1_TTA;
>  	val &= ~CPACR_EL1_ZEN;
> -	if (!update_fp_enabled(vcpu)) {
> +	if (update_fp_enabled(vcpu)) {
> +		if (vcpu_has_sve(vcpu))
> +			val |= CPACR_EL1_ZEN;
> +	} else {
>  		val &= ~CPACR_EL1_FPEN;
>  		__activate_traps_fpsimd32(vcpu);
>  	}
> @@ -317,16 +320,48 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
>  	return true;
>  }
>  
> -static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
> +/* Check for an FPSIMD/SVE trap and handle as appropriate */
> +static bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
>  {
> -	struct user_fpsimd_state *host_fpsimd = vcpu->arch.host_fpsimd_state;
> +	bool vhe, sve_guest, sve_host;
> +	u8 hsr_ec;
>  
> -	if (has_vhe())
> -		write_sysreg(read_sysreg(cpacr_el1) | CPACR_EL1_FPEN,
> -			     cpacr_el1);
> -	else
> +	if (!system_supports_fpsimd())
> +		return false;
> +
> +	if (system_supports_sve()) {
> +		sve_guest = vcpu_has_sve(vcpu);
> +		sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE;
> +		vhe = true;
> +	} else {
> +		sve_guest = false;
> +		sve_host = false;
> +		vhe = has_vhe();
> +	}
> +
> +	hsr_ec = kvm_vcpu_trap_get_class(vcpu);
> +	if (hsr_ec != ESR_ELx_EC_FP_ASIMD &&
> +	    hsr_ec != ESR_ELx_EC_SVE)
> +		return false;
> +
> +	/* Don't handle SVE traps for non-SVE vcpus here: */
> +	if (!sve_guest)
> +		if (hsr_ec != ESR_ELx_EC_FP_ASIMD)
> +			return false;
> +
> +	/* Valid trap.  Switch the context: */
> +
> +	if (vhe) {
> +		u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN;
> +
> +		if (sve_guest)
> +			reg |= CPACR_EL1_ZEN;
> +
> +		write_sysreg(reg, cpacr_el1);
> +	} else {
>  		write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP,
>  			     cptr_el2);
> +	}
>  
>  	isb();
>  
> @@ -335,24 +370,28 @@ static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
>  		 * In the SVE case, VHE is assumed: it is enforced by
>  		 * Kconfig and kvm_arch_init().
>  		 */
> -		if (system_supports_sve() &&
> -		    (vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE)) {
> +		if (sve_host) {
>  			struct thread_struct *thread = container_of(
> -				host_fpsimd,
> +				vcpu->arch.host_fpsimd_state,
>  				struct thread_struct, uw.fpsimd_state);
>  
> -			sve_save_state(sve_pffr(thread), &host_fpsimd->fpsr);
> +			sve_save_state(sve_pffr(thread),
> +				       &vcpu->arch.host_fpsimd_state->fpsr);
>  		} else {
> -			__fpsimd_save_state(host_fpsimd);
> +			__fpsimd_save_state(vcpu->arch.host_fpsimd_state);
>  		}
>  
>  		vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
>  	}
>  
> -	__fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs);
> -
> -	if (vcpu_has_sve(vcpu))
> +	if (sve_guest) {
> +		sve_load_state(vcpu_sve_pffr(vcpu),
> +			       &vcpu->arch.ctxt.gp_regs.fp_regs.fpsr,
> +			       sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1);
>  		write_sysreg_s(vcpu->arch.ctxt.sys_regs[ZCR_EL1], SYS_ZCR_EL12);
> +	} else {
> +		__fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs);
> +	}
>  
>  	/* Skip restoring fpexc32 for AArch64 guests */
>  	if (!(read_sysreg(hcr_el2) & HCR_RW))
> @@ -388,10 +427,10 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
>  	 * and restore the guest context lazily.
>  	 * If FP/SIMD is not implemented, handle the trap and inject an
>  	 * undefined instruction exception to the guest.
> +	 * Similarly for trapped SVE accesses.
>  	 */
> -	if (system_supports_fpsimd() &&
> -	    kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_FP_ASIMD)
> -		return __hyp_switch_fpsimd(vcpu);
> +	if (__hyp_handle_fpsimd(vcpu))
> +		return true;
>  
>  	if (!__populate_fault_info(vcpu))
>  		return true;

Reviewed-by: Andrew Jones <drjones@redhat.com>
Dave Martin April 4, 2019, 8:10 a.m. UTC | #2
On Wed, Apr 03, 2019 at 10:01:45PM +0200, Andrew Jones wrote:
> On Fri, Mar 29, 2019 at 01:00:38PM +0000, Dave Martin wrote:
> > In order to give each vcpu its own view of the SVE registers, this
> > patch adds context storage via a new sve_state pointer in struct
> > vcpu_arch.  An additional member sve_max_vl is also added for each
> > vcpu, to determine the maximum vector length visible to the guest
> > and thus the value to be configured in ZCR_EL2.LEN while the vcpu
> > is active.  This also determines the layout and size of the storage
> > in sve_state, which is read and written by the same backend
> > functions that are used for context-switching the SVE state for
> > host tasks.
> > 
> > On SVE-enabled vcpus, SVE access traps are now handled by switching
> > in the vcpu's SVE context and disabling the trap before returning
> > to the guest.  On other vcpus, the trap is not handled and an exit
> > back to the host occurs, where the handle_sve() fallback path
> > reflects an undefined instruction exception back to the guest,
> > consistently with the behaviour of non-SVE-capable hardware (as was
> > done unconditionally prior to this patch).
> > 
> > No SVE handling is added on non-VHE-only paths, since VHE is an
> > architectural and Kconfig prerequisite of SVE.
> > 
> > Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> > Reviewed-by: Julien Thierry <julien.thierry@arm.com>
> > Tested-by: zhang.lei <zhang.lei@jp.fujitsu.com>
> > 
> > ---
> > 
> > Changes since v5:
> > 
> >  * [Julien Thierry, Julien Grall] Commit message typo fixes
> > 
> >  * [Mark Rutland] Rename trap_class to hsr_ec, for consistency with
> >    existing code.
> > 
> >  * [Mark Rutland] Simplify condition for refusing to handle an
> >    FPSIMD/SVE trap, using multiple if () statements for clarity.  The
> >    previous condition was a bit tortuous, and how that the static_key
> >    checks have been hoisted out, it makes little difference to the
> >    compiler how we express the condition here.
> > ---
> >  arch/arm64/include/asm/kvm_host.h |  6 ++++
> >  arch/arm64/kvm/fpsimd.c           |  5 +--
> >  arch/arm64/kvm/hyp/switch.c       | 75 +++++++++++++++++++++++++++++----------
> >  3 files changed, 66 insertions(+), 20 deletions(-)
> > 
> > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> > index 22cf484..4fabfd2 100644
> > --- a/arch/arm64/include/asm/kvm_host.h
> > +++ b/arch/arm64/include/asm/kvm_host.h
> > @@ -228,6 +228,8 @@ struct vcpu_reset_state {
> >  
> >  struct kvm_vcpu_arch {
> >  	struct kvm_cpu_context ctxt;
> > +	void *sve_state;
> > +	unsigned int sve_max_vl;
> >  
> >  	/* HYP configuration */
> >  	u64 hcr_el2;
> > @@ -323,6 +325,10 @@ struct kvm_vcpu_arch {
> >  	bool sysregs_loaded_on_cpu;
> >  };
> >  
> > +/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
> > +#define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \
> > +				      sve_ffr_offset((vcpu)->arch.sve_max_vl)))
> 
> Maybe an inline function instead?

I tried, but that requires the definition of struct kvm_vcpu to be
visible.  I failed to get that here without circular #include problems,
and it looked tricky to fix.

Since this is a small bit of code which is unlikely to get used by
accident, I decided it was OK to keep it as a macro.

Can you see another way around this?

[...]

> Reviewed-by: Andrew Jones <drjones@redhat.com>

Thanks

---Dave
Andrew Jones April 4, 2019, 8:35 a.m. UTC | #3
On Thu, Apr 04, 2019 at 09:10:08AM +0100, Dave Martin wrote:
> On Wed, Apr 03, 2019 at 10:01:45PM +0200, Andrew Jones wrote:
> > On Fri, Mar 29, 2019 at 01:00:38PM +0000, Dave Martin wrote:
> > > In order to give each vcpu its own view of the SVE registers, this
> > > patch adds context storage via a new sve_state pointer in struct
> > > vcpu_arch.  An additional member sve_max_vl is also added for each
> > > vcpu, to determine the maximum vector length visible to the guest
> > > and thus the value to be configured in ZCR_EL2.LEN while the vcpu
> > > is active.  This also determines the layout and size of the storage
> > > in sve_state, which is read and written by the same backend
> > > functions that are used for context-switching the SVE state for
> > > host tasks.
> > > 
> > > On SVE-enabled vcpus, SVE access traps are now handled by switching
> > > in the vcpu's SVE context and disabling the trap before returning
> > > to the guest.  On other vcpus, the trap is not handled and an exit
> > > back to the host occurs, where the handle_sve() fallback path
> > > reflects an undefined instruction exception back to the guest,
> > > consistently with the behaviour of non-SVE-capable hardware (as was
> > > done unconditionally prior to this patch).
> > > 
> > > No SVE handling is added on non-VHE-only paths, since VHE is an
> > > architectural and Kconfig prerequisite of SVE.
> > > 
> > > Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> > > Reviewed-by: Julien Thierry <julien.thierry@arm.com>
> > > Tested-by: zhang.lei <zhang.lei@jp.fujitsu.com>
> > > 
> > > ---
> > > 
> > > Changes since v5:
> > > 
> > >  * [Julien Thierry, Julien Grall] Commit message typo fixes
> > > 
> > >  * [Mark Rutland] Rename trap_class to hsr_ec, for consistency with
> > >    existing code.
> > > 
> > >  * [Mark Rutland] Simplify condition for refusing to handle an
> > >    FPSIMD/SVE trap, using multiple if () statements for clarity.  The
> > >    previous condition was a bit tortuous, and how that the static_key
> > >    checks have been hoisted out, it makes little difference to the
> > >    compiler how we express the condition here.
> > > ---
> > >  arch/arm64/include/asm/kvm_host.h |  6 ++++
> > >  arch/arm64/kvm/fpsimd.c           |  5 +--
> > >  arch/arm64/kvm/hyp/switch.c       | 75 +++++++++++++++++++++++++++++----------
> > >  3 files changed, 66 insertions(+), 20 deletions(-)
> > > 
> > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> > > index 22cf484..4fabfd2 100644
> > > --- a/arch/arm64/include/asm/kvm_host.h
> > > +++ b/arch/arm64/include/asm/kvm_host.h
> > > @@ -228,6 +228,8 @@ struct vcpu_reset_state {
> > >  
> > >  struct kvm_vcpu_arch {
> > >  	struct kvm_cpu_context ctxt;
> > > +	void *sve_state;
> > > +	unsigned int sve_max_vl;
> > >  
> > >  	/* HYP configuration */
> > >  	u64 hcr_el2;
> > > @@ -323,6 +325,10 @@ struct kvm_vcpu_arch {
> > >  	bool sysregs_loaded_on_cpu;
> > >  };
> > >  
> > > +/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
> > > +#define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \
> > > +				      sve_ffr_offset((vcpu)->arch.sve_max_vl)))
> > 
> > Maybe an inline function instead?
> 
> I tried, but that requires the definition of struct kvm_vcpu to be
> visible.  I failed to get that here without circular #include problems,
> and it looked tricky to fix.

Ah, OK

> 
> Since this is a small bit of code which is unlikely to get used by
> accident, I decided it was OK to keep it as a macro.
> 
> Can you see another way around this?

Nope

drew
Dave Martin April 4, 2019, 8:36 a.m. UTC | #4
On Thu, Apr 04, 2019 at 10:35:02AM +0200, Andrew Jones wrote:
> On Thu, Apr 04, 2019 at 09:10:08AM +0100, Dave Martin wrote:
> > On Wed, Apr 03, 2019 at 10:01:45PM +0200, Andrew Jones wrote:
> > > On Fri, Mar 29, 2019 at 01:00:38PM +0000, Dave Martin wrote:
> > > > In order to give each vcpu its own view of the SVE registers, this
> > > > patch adds context storage via a new sve_state pointer in struct
> > > > vcpu_arch.  An additional member sve_max_vl is also added for each
> > > > vcpu, to determine the maximum vector length visible to the guest
> > > > and thus the value to be configured in ZCR_EL2.LEN while the vcpu
> > > > is active.  This also determines the layout and size of the storage
> > > > in sve_state, which is read and written by the same backend
> > > > functions that are used for context-switching the SVE state for
> > > > host tasks.
> > > > 
> > > > On SVE-enabled vcpus, SVE access traps are now handled by switching
> > > > in the vcpu's SVE context and disabling the trap before returning
> > > > to the guest.  On other vcpus, the trap is not handled and an exit
> > > > back to the host occurs, where the handle_sve() fallback path
> > > > reflects an undefined instruction exception back to the guest,
> > > > consistently with the behaviour of non-SVE-capable hardware (as was
> > > > done unconditionally prior to this patch).
> > > > 
> > > > No SVE handling is added on non-VHE-only paths, since VHE is an
> > > > architectural and Kconfig prerequisite of SVE.
> > > > 
> > > > Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> > > > Reviewed-by: Julien Thierry <julien.thierry@arm.com>
> > > > Tested-by: zhang.lei <zhang.lei@jp.fujitsu.com>
> > > > 
> > > > ---
> > > > 
> > > > Changes since v5:
> > > > 
> > > >  * [Julien Thierry, Julien Grall] Commit message typo fixes
> > > > 
> > > >  * [Mark Rutland] Rename trap_class to hsr_ec, for consistency with
> > > >    existing code.
> > > > 
> > > >  * [Mark Rutland] Simplify condition for refusing to handle an
> > > >    FPSIMD/SVE trap, using multiple if () statements for clarity.  The
> > > >    previous condition was a bit tortuous, and how that the static_key
> > > >    checks have been hoisted out, it makes little difference to the
> > > >    compiler how we express the condition here.
> > > > ---
> > > >  arch/arm64/include/asm/kvm_host.h |  6 ++++
> > > >  arch/arm64/kvm/fpsimd.c           |  5 +--
> > > >  arch/arm64/kvm/hyp/switch.c       | 75 +++++++++++++++++++++++++++++----------
> > > >  3 files changed, 66 insertions(+), 20 deletions(-)
> > > > 
> > > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> > > > index 22cf484..4fabfd2 100644
> > > > --- a/arch/arm64/include/asm/kvm_host.h
> > > > +++ b/arch/arm64/include/asm/kvm_host.h
> > > > @@ -228,6 +228,8 @@ struct vcpu_reset_state {
> > > >  
> > > >  struct kvm_vcpu_arch {
> > > >  	struct kvm_cpu_context ctxt;
> > > > +	void *sve_state;
> > > > +	unsigned int sve_max_vl;
> > > >  
> > > >  	/* HYP configuration */
> > > >  	u64 hcr_el2;
> > > > @@ -323,6 +325,10 @@ struct kvm_vcpu_arch {
> > > >  	bool sysregs_loaded_on_cpu;
> > > >  };
> > > >  
> > > > +/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
> > > > +#define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \
> > > > +				      sve_ffr_offset((vcpu)->arch.sve_max_vl)))
> > > 
> > > Maybe an inline function instead?
> > 
> > I tried, but that requires the definition of struct kvm_vcpu to be
> > visible.  I failed to get that here without circular #include problems,
> > and it looked tricky to fix.
> 
> Ah, OK
> 
> > 
> > Since this is a small bit of code which is unlikely to get used by
> > accident, I decided it was OK to keep it as a macro.
> > 
> > Can you see another way around this?
> 
> Nope

OK.  If someone eventually solves this, I'd be happy to change to an
inline function.

Cheers
---Dave
Alex Bennée April 24, 2019, 2:51 p.m. UTC | #5
Dave Martin <Dave.Martin@arm.com> writes:

> On Thu, Apr 04, 2019 at 10:35:02AM +0200, Andrew Jones wrote:
>> On Thu, Apr 04, 2019 at 09:10:08AM +0100, Dave Martin wrote:
>> > On Wed, Apr 03, 2019 at 10:01:45PM +0200, Andrew Jones wrote:
>> > > On Fri, Mar 29, 2019 at 01:00:38PM +0000, Dave Martin wrote:
>> > > > In order to give each vcpu its own view of the SVE registers, this
>> > > > patch adds context storage via a new sve_state pointer in struct
>> > > > vcpu_arch.  An additional member sve_max_vl is also added for each
>> > > > vcpu, to determine the maximum vector length visible to the guest
>> > > > and thus the value to be configured in ZCR_EL2.LEN while the vcpu
>> > > > is active.  This also determines the layout and size of the storage
>> > > > in sve_state, which is read and written by the same backend
>> > > > functions that are used for context-switching the SVE state for
>> > > > host tasks.
>> > > >
>> > > > On SVE-enabled vcpus, SVE access traps are now handled by switching
>> > > > in the vcpu's SVE context and disabling the trap before returning
>> > > > to the guest.  On other vcpus, the trap is not handled and an exit
>> > > > back to the host occurs, where the handle_sve() fallback path
>> > > > reflects an undefined instruction exception back to the guest,
>> > > > consistently with the behaviour of non-SVE-capable hardware (as was
>> > > > done unconditionally prior to this patch).
>> > > >
>> > > > No SVE handling is added on non-VHE-only paths, since VHE is an
>> > > > architectural and Kconfig prerequisite of SVE.
>> > > >
>> > > > Signed-off-by: Dave Martin <Dave.Martin@arm.com>
>> > > > Reviewed-by: Julien Thierry <julien.thierry@arm.com>
>> > > > Tested-by: zhang.lei <zhang.lei@jp.fujitsu.com>
>> > > >
>> > > > ---
>> > > >
>> > > > Changes since v5:
>> > > >
>> > > >  * [Julien Thierry, Julien Grall] Commit message typo fixes
>> > > >
>> > > >  * [Mark Rutland] Rename trap_class to hsr_ec, for consistency with
>> > > >    existing code.
>> > > >
>> > > >  * [Mark Rutland] Simplify condition for refusing to handle an
>> > > >    FPSIMD/SVE trap, using multiple if () statements for clarity.  The
>> > > >    previous condition was a bit tortuous, and how that the static_key
>> > > >    checks have been hoisted out, it makes little difference to the
>> > > >    compiler how we express the condition here.
>> > > > ---
>> > > >  arch/arm64/include/asm/kvm_host.h |  6 ++++
>> > > >  arch/arm64/kvm/fpsimd.c           |  5 +--
>> > > >  arch/arm64/kvm/hyp/switch.c       | 75 +++++++++++++++++++++++++++++----------
>> > > >  3 files changed, 66 insertions(+), 20 deletions(-)
>> > > >
>> > > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
>> > > > index 22cf484..4fabfd2 100644
>> > > > --- a/arch/arm64/include/asm/kvm_host.h
>> > > > +++ b/arch/arm64/include/asm/kvm_host.h
>> > > > @@ -228,6 +228,8 @@ struct vcpu_reset_state {
>> > > >
>> > > >  struct kvm_vcpu_arch {
>> > > >  	struct kvm_cpu_context ctxt;
>> > > > +	void *sve_state;
>> > > > +	unsigned int sve_max_vl;
>> > > >
>> > > >  	/* HYP configuration */
>> > > >  	u64 hcr_el2;
>> > > > @@ -323,6 +325,10 @@ struct kvm_vcpu_arch {
>> > > >  	bool sysregs_loaded_on_cpu;
>> > > >  };
>> > > >
>> > > > +/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
>> > > > +#define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \
>> > > > +				      sve_ffr_offset((vcpu)->arch.sve_max_vl)))
>> > >
>> > > Maybe an inline function instead?
>> >
>> > I tried, but that requires the definition of struct kvm_vcpu to be
>> > visible.  I failed to get that here without circular #include problems,
>> > and it looked tricky to fix.
>>
>> Ah, OK
>>
>> >
>> > Since this is a small bit of code which is unlikely to get used by
>> > accident, I decided it was OK to keep it as a macro.
>> >
>> > Can you see another way around this?
>>
>> Nope
>
> OK.  If someone eventually solves this, I'd be happy to change to an
> inline function.

Is the function intended to be used by more call sites? Currently in the
tree with this plus the v2 fixups I can only see:

  arch/arm64/include/asm/kvm_host.h:333:#define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \
  arch/arm64/kvm/hyp/switch.c:388:		sve_load_state(vcpu_sve_pffr(vcpu),

--
Alex Bennée
Dave Martin April 25, 2019, 1:35 p.m. UTC | #6
On Wed, Apr 24, 2019 at 03:51:32PM +0100, Alex Bennée wrote:
> 
> Dave Martin <Dave.Martin@arm.com> writes:
> 
> > On Thu, Apr 04, 2019 at 10:35:02AM +0200, Andrew Jones wrote:
> >> On Thu, Apr 04, 2019 at 09:10:08AM +0100, Dave Martin wrote:
> >> > On Wed, Apr 03, 2019 at 10:01:45PM +0200, Andrew Jones wrote:
> >> > > On Fri, Mar 29, 2019 at 01:00:38PM +0000, Dave Martin wrote:
> >> > > > In order to give each vcpu its own view of the SVE registers, this
> >> > > > patch adds context storage via a new sve_state pointer in struct
> >> > > > vcpu_arch.  An additional member sve_max_vl is also added for each
> >> > > > vcpu, to determine the maximum vector length visible to the guest
> >> > > > and thus the value to be configured in ZCR_EL2.LEN while the vcpu
> >> > > > is active.  This also determines the layout and size of the storage
> >> > > > in sve_state, which is read and written by the same backend
> >> > > > functions that are used for context-switching the SVE state for
> >> > > > host tasks.
> >> > > >
> >> > > > On SVE-enabled vcpus, SVE access traps are now handled by switching
> >> > > > in the vcpu's SVE context and disabling the trap before returning
> >> > > > to the guest.  On other vcpus, the trap is not handled and an exit
> >> > > > back to the host occurs, where the handle_sve() fallback path
> >> > > > reflects an undefined instruction exception back to the guest,
> >> > > > consistently with the behaviour of non-SVE-capable hardware (as was
> >> > > > done unconditionally prior to this patch).
> >> > > >
> >> > > > No SVE handling is added on non-VHE-only paths, since VHE is an
> >> > > > architectural and Kconfig prerequisite of SVE.
> >> > > >
> >> > > > Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> >> > > > Reviewed-by: Julien Thierry <julien.thierry@arm.com>
> >> > > > Tested-by: zhang.lei <zhang.lei@jp.fujitsu.com>
> >> > > >
> >> > > > ---
> >> > > >
> >> > > > Changes since v5:
> >> > > >
> >> > > >  * [Julien Thierry, Julien Grall] Commit message typo fixes
> >> > > >
> >> > > >  * [Mark Rutland] Rename trap_class to hsr_ec, for consistency with
> >> > > >    existing code.
> >> > > >
> >> > > >  * [Mark Rutland] Simplify condition for refusing to handle an
> >> > > >    FPSIMD/SVE trap, using multiple if () statements for clarity.  The
> >> > > >    previous condition was a bit tortuous, and how that the static_key
> >> > > >    checks have been hoisted out, it makes little difference to the
> >> > > >    compiler how we express the condition here.
> >> > > > ---
> >> > > >  arch/arm64/include/asm/kvm_host.h |  6 ++++
> >> > > >  arch/arm64/kvm/fpsimd.c           |  5 +--
> >> > > >  arch/arm64/kvm/hyp/switch.c       | 75 +++++++++++++++++++++++++++++----------
> >> > > >  3 files changed, 66 insertions(+), 20 deletions(-)
> >> > > >
> >> > > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> >> > > > index 22cf484..4fabfd2 100644
> >> > > > --- a/arch/arm64/include/asm/kvm_host.h
> >> > > > +++ b/arch/arm64/include/asm/kvm_host.h
> >> > > > @@ -228,6 +228,8 @@ struct vcpu_reset_state {
> >> > > >
> >> > > >  struct kvm_vcpu_arch {
> >> > > >  	struct kvm_cpu_context ctxt;
> >> > > > +	void *sve_state;
> >> > > > +	unsigned int sve_max_vl;
> >> > > >
> >> > > >  	/* HYP configuration */
> >> > > >  	u64 hcr_el2;
> >> > > > @@ -323,6 +325,10 @@ struct kvm_vcpu_arch {
> >> > > >  	bool sysregs_loaded_on_cpu;
> >> > > >  };
> >> > > >
> >> > > > +/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
> >> > > > +#define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \
> >> > > > +				      sve_ffr_offset((vcpu)->arch.sve_max_vl)))
> >> > >
> >> > > Maybe an inline function instead?
> >> >
> >> > I tried, but that requires the definition of struct kvm_vcpu to be
> >> > visible.  I failed to get that here without circular #include problems,
> >> > and it looked tricky to fix.
> >>
> >> Ah, OK
> >>
> >> >
> >> > Since this is a small bit of code which is unlikely to get used by
> >> > accident, I decided it was OK to keep it as a macro.
> >> >
> >> > Can you see another way around this?
> >>
> >> Nope
> >
> > OK.  If someone eventually solves this, I'd be happy to change to an
> > inline function.
> 
> Is the function intended to be used by more call sites? Currently in the
> tree with this plus the v2 fixups I can only see:
> 
>   arch/arm64/include/asm/kvm_host.h:333:#define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \
>   arch/arm64/kvm/hyp/switch.c:388:		sve_load_state(vcpu_sve_pffr(vcpu),

Probably not, although it was probably used to save the state back
before things were refactored so that fpsimd_save() in
arch/arm64/kernel/fpsimd.c is used instead of separate code to save the
vcpu state.

The expression is ugly so it's nice to abstract it.  This also keeps
the sve_load_state() call feeling consistent to the equivalent call in
task_fpsimd_load() in arm64/kernel/fpsimd.c

Other than that, there's no underlying reason for having a macro.

Cheers
---Dave
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 22cf484..4fabfd2 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -228,6 +228,8 @@  struct vcpu_reset_state {
 
 struct kvm_vcpu_arch {
 	struct kvm_cpu_context ctxt;
+	void *sve_state;
+	unsigned int sve_max_vl;
 
 	/* HYP configuration */
 	u64 hcr_el2;
@@ -323,6 +325,10 @@  struct kvm_vcpu_arch {
 	bool sysregs_loaded_on_cpu;
 };
 
+/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
+#define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \
+				      sve_ffr_offset((vcpu)->arch.sve_max_vl)))
+
 /* vcpu_arch flags field values: */
 #define KVM_ARM64_DEBUG_DIRTY		(1 << 0)
 #define KVM_ARM64_FP_ENABLED		(1 << 1) /* guest FP regs loaded */
diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
index 7053bf4..6e3c9c8 100644
--- a/arch/arm64/kvm/fpsimd.c
+++ b/arch/arm64/kvm/fpsimd.c
@@ -87,10 +87,11 @@  void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
 
 	if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
 		fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.gp_regs.fp_regs,
-					 NULL, SVE_VL_MIN);
+					 vcpu->arch.sve_state,
+					 vcpu->arch.sve_max_vl);
 
 		clear_thread_flag(TIF_FOREIGN_FPSTATE);
-		clear_thread_flag(TIF_SVE);
+		update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu));
 	}
 }
 
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index 9d46066..5444b9c 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -100,7 +100,10 @@  static void activate_traps_vhe(struct kvm_vcpu *vcpu)
 	val = read_sysreg(cpacr_el1);
 	val |= CPACR_EL1_TTA;
 	val &= ~CPACR_EL1_ZEN;
-	if (!update_fp_enabled(vcpu)) {
+	if (update_fp_enabled(vcpu)) {
+		if (vcpu_has_sve(vcpu))
+			val |= CPACR_EL1_ZEN;
+	} else {
 		val &= ~CPACR_EL1_FPEN;
 		__activate_traps_fpsimd32(vcpu);
 	}
@@ -317,16 +320,48 @@  static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
 	return true;
 }
 
-static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
+/* Check for an FPSIMD/SVE trap and handle as appropriate */
+static bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
 {
-	struct user_fpsimd_state *host_fpsimd = vcpu->arch.host_fpsimd_state;
+	bool vhe, sve_guest, sve_host;
+	u8 hsr_ec;
 
-	if (has_vhe())
-		write_sysreg(read_sysreg(cpacr_el1) | CPACR_EL1_FPEN,
-			     cpacr_el1);
-	else
+	if (!system_supports_fpsimd())
+		return false;
+
+	if (system_supports_sve()) {
+		sve_guest = vcpu_has_sve(vcpu);
+		sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE;
+		vhe = true;
+	} else {
+		sve_guest = false;
+		sve_host = false;
+		vhe = has_vhe();
+	}
+
+	hsr_ec = kvm_vcpu_trap_get_class(vcpu);
+	if (hsr_ec != ESR_ELx_EC_FP_ASIMD &&
+	    hsr_ec != ESR_ELx_EC_SVE)
+		return false;
+
+	/* Don't handle SVE traps for non-SVE vcpus here: */
+	if (!sve_guest)
+		if (hsr_ec != ESR_ELx_EC_FP_ASIMD)
+			return false;
+
+	/* Valid trap.  Switch the context: */
+
+	if (vhe) {
+		u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN;
+
+		if (sve_guest)
+			reg |= CPACR_EL1_ZEN;
+
+		write_sysreg(reg, cpacr_el1);
+	} else {
 		write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP,
 			     cptr_el2);
+	}
 
 	isb();
 
@@ -335,24 +370,28 @@  static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu)
 		 * In the SVE case, VHE is assumed: it is enforced by
 		 * Kconfig and kvm_arch_init().
 		 */
-		if (system_supports_sve() &&
-		    (vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE)) {
+		if (sve_host) {
 			struct thread_struct *thread = container_of(
-				host_fpsimd,
+				vcpu->arch.host_fpsimd_state,
 				struct thread_struct, uw.fpsimd_state);
 
-			sve_save_state(sve_pffr(thread), &host_fpsimd->fpsr);
+			sve_save_state(sve_pffr(thread),
+				       &vcpu->arch.host_fpsimd_state->fpsr);
 		} else {
-			__fpsimd_save_state(host_fpsimd);
+			__fpsimd_save_state(vcpu->arch.host_fpsimd_state);
 		}
 
 		vcpu->arch.flags &= ~KVM_ARM64_FP_HOST;
 	}
 
-	__fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs);
-
-	if (vcpu_has_sve(vcpu))
+	if (sve_guest) {
+		sve_load_state(vcpu_sve_pffr(vcpu),
+			       &vcpu->arch.ctxt.gp_regs.fp_regs.fpsr,
+			       sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1);
 		write_sysreg_s(vcpu->arch.ctxt.sys_regs[ZCR_EL1], SYS_ZCR_EL12);
+	} else {
+		__fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs);
+	}
 
 	/* Skip restoring fpexc32 for AArch64 guests */
 	if (!(read_sysreg(hcr_el2) & HCR_RW))
@@ -388,10 +427,10 @@  static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
 	 * and restore the guest context lazily.
 	 * If FP/SIMD is not implemented, handle the trap and inject an
 	 * undefined instruction exception to the guest.
+	 * Similarly for trapped SVE accesses.
 	 */
-	if (system_supports_fpsimd() &&
-	    kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_FP_ASIMD)
-		return __hyp_switch_fpsimd(vcpu);
+	if (__hyp_handle_fpsimd(vcpu))
+		return true;
 
 	if (!__populate_fault_info(vcpu))
 		return true;