Message ID | 1553864452-15080-14-git-send-email-Dave.Martin@arm.com (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Series | KVM: arm64: SVE guest support | expand |
On Fri, Mar 29, 2019 at 01:00:38PM +0000, Dave Martin wrote: > In order to give each vcpu its own view of the SVE registers, this > patch adds context storage via a new sve_state pointer in struct > vcpu_arch. An additional member sve_max_vl is also added for each > vcpu, to determine the maximum vector length visible to the guest > and thus the value to be configured in ZCR_EL2.LEN while the vcpu > is active. This also determines the layout and size of the storage > in sve_state, which is read and written by the same backend > functions that are used for context-switching the SVE state for > host tasks. > > On SVE-enabled vcpus, SVE access traps are now handled by switching > in the vcpu's SVE context and disabling the trap before returning > to the guest. On other vcpus, the trap is not handled and an exit > back to the host occurs, where the handle_sve() fallback path > reflects an undefined instruction exception back to the guest, > consistently with the behaviour of non-SVE-capable hardware (as was > done unconditionally prior to this patch). > > No SVE handling is added on non-VHE-only paths, since VHE is an > architectural and Kconfig prerequisite of SVE. > > Signed-off-by: Dave Martin <Dave.Martin@arm.com> > Reviewed-by: Julien Thierry <julien.thierry@arm.com> > Tested-by: zhang.lei <zhang.lei@jp.fujitsu.com> > > --- > > Changes since v5: > > * [Julien Thierry, Julien Grall] Commit message typo fixes > > * [Mark Rutland] Rename trap_class to hsr_ec, for consistency with > existing code. > > * [Mark Rutland] Simplify condition for refusing to handle an > FPSIMD/SVE trap, using multiple if () statements for clarity. The > previous condition was a bit tortuous, and how that the static_key > checks have been hoisted out, it makes little difference to the > compiler how we express the condition here. > --- > arch/arm64/include/asm/kvm_host.h | 6 ++++ > arch/arm64/kvm/fpsimd.c | 5 +-- > arch/arm64/kvm/hyp/switch.c | 75 +++++++++++++++++++++++++++++---------- > 3 files changed, 66 insertions(+), 20 deletions(-) > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > index 22cf484..4fabfd2 100644 > --- a/arch/arm64/include/asm/kvm_host.h > +++ b/arch/arm64/include/asm/kvm_host.h > @@ -228,6 +228,8 @@ struct vcpu_reset_state { > > struct kvm_vcpu_arch { > struct kvm_cpu_context ctxt; > + void *sve_state; > + unsigned int sve_max_vl; > > /* HYP configuration */ > u64 hcr_el2; > @@ -323,6 +325,10 @@ struct kvm_vcpu_arch { > bool sysregs_loaded_on_cpu; > }; > > +/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ > +#define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \ > + sve_ffr_offset((vcpu)->arch.sve_max_vl))) Maybe an inline function instead? > + > /* vcpu_arch flags field values: */ > #define KVM_ARM64_DEBUG_DIRTY (1 << 0) > #define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */ > diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c > index 7053bf4..6e3c9c8 100644 > --- a/arch/arm64/kvm/fpsimd.c > +++ b/arch/arm64/kvm/fpsimd.c > @@ -87,10 +87,11 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) > > if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { > fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.gp_regs.fp_regs, > - NULL, SVE_VL_MIN); > + vcpu->arch.sve_state, > + vcpu->arch.sve_max_vl); > > clear_thread_flag(TIF_FOREIGN_FPSTATE); > - clear_thread_flag(TIF_SVE); > + update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu)); > } > } > > diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c > index 9d46066..5444b9c 100644 > --- a/arch/arm64/kvm/hyp/switch.c > +++ b/arch/arm64/kvm/hyp/switch.c > @@ -100,7 +100,10 @@ static void activate_traps_vhe(struct kvm_vcpu *vcpu) > val = read_sysreg(cpacr_el1); > val |= CPACR_EL1_TTA; > val &= ~CPACR_EL1_ZEN; > - if (!update_fp_enabled(vcpu)) { > + if (update_fp_enabled(vcpu)) { > + if (vcpu_has_sve(vcpu)) > + val |= CPACR_EL1_ZEN; > + } else { > val &= ~CPACR_EL1_FPEN; > __activate_traps_fpsimd32(vcpu); > } > @@ -317,16 +320,48 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) > return true; > } > > -static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu) > +/* Check for an FPSIMD/SVE trap and handle as appropriate */ > +static bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) > { > - struct user_fpsimd_state *host_fpsimd = vcpu->arch.host_fpsimd_state; > + bool vhe, sve_guest, sve_host; > + u8 hsr_ec; > > - if (has_vhe()) > - write_sysreg(read_sysreg(cpacr_el1) | CPACR_EL1_FPEN, > - cpacr_el1); > - else > + if (!system_supports_fpsimd()) > + return false; > + > + if (system_supports_sve()) { > + sve_guest = vcpu_has_sve(vcpu); > + sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE; > + vhe = true; > + } else { > + sve_guest = false; > + sve_host = false; > + vhe = has_vhe(); > + } > + > + hsr_ec = kvm_vcpu_trap_get_class(vcpu); > + if (hsr_ec != ESR_ELx_EC_FP_ASIMD && > + hsr_ec != ESR_ELx_EC_SVE) > + return false; > + > + /* Don't handle SVE traps for non-SVE vcpus here: */ > + if (!sve_guest) > + if (hsr_ec != ESR_ELx_EC_FP_ASIMD) > + return false; > + > + /* Valid trap. Switch the context: */ > + > + if (vhe) { > + u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN; > + > + if (sve_guest) > + reg |= CPACR_EL1_ZEN; > + > + write_sysreg(reg, cpacr_el1); > + } else { > write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP, > cptr_el2); > + } > > isb(); > > @@ -335,24 +370,28 @@ static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu) > * In the SVE case, VHE is assumed: it is enforced by > * Kconfig and kvm_arch_init(). > */ > - if (system_supports_sve() && > - (vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE)) { > + if (sve_host) { > struct thread_struct *thread = container_of( > - host_fpsimd, > + vcpu->arch.host_fpsimd_state, > struct thread_struct, uw.fpsimd_state); > > - sve_save_state(sve_pffr(thread), &host_fpsimd->fpsr); > + sve_save_state(sve_pffr(thread), > + &vcpu->arch.host_fpsimd_state->fpsr); > } else { > - __fpsimd_save_state(host_fpsimd); > + __fpsimd_save_state(vcpu->arch.host_fpsimd_state); > } > > vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; > } > > - __fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs); > - > - if (vcpu_has_sve(vcpu)) > + if (sve_guest) { > + sve_load_state(vcpu_sve_pffr(vcpu), > + &vcpu->arch.ctxt.gp_regs.fp_regs.fpsr, > + sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1); > write_sysreg_s(vcpu->arch.ctxt.sys_regs[ZCR_EL1], SYS_ZCR_EL12); > + } else { > + __fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs); > + } > > /* Skip restoring fpexc32 for AArch64 guests */ > if (!(read_sysreg(hcr_el2) & HCR_RW)) > @@ -388,10 +427,10 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) > * and restore the guest context lazily. > * If FP/SIMD is not implemented, handle the trap and inject an > * undefined instruction exception to the guest. > + * Similarly for trapped SVE accesses. > */ > - if (system_supports_fpsimd() && > - kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_FP_ASIMD) > - return __hyp_switch_fpsimd(vcpu); > + if (__hyp_handle_fpsimd(vcpu)) > + return true; > > if (!__populate_fault_info(vcpu)) > return true; Reviewed-by: Andrew Jones <drjones@redhat.com>
On Wed, Apr 03, 2019 at 10:01:45PM +0200, Andrew Jones wrote: > On Fri, Mar 29, 2019 at 01:00:38PM +0000, Dave Martin wrote: > > In order to give each vcpu its own view of the SVE registers, this > > patch adds context storage via a new sve_state pointer in struct > > vcpu_arch. An additional member sve_max_vl is also added for each > > vcpu, to determine the maximum vector length visible to the guest > > and thus the value to be configured in ZCR_EL2.LEN while the vcpu > > is active. This also determines the layout and size of the storage > > in sve_state, which is read and written by the same backend > > functions that are used for context-switching the SVE state for > > host tasks. > > > > On SVE-enabled vcpus, SVE access traps are now handled by switching > > in the vcpu's SVE context and disabling the trap before returning > > to the guest. On other vcpus, the trap is not handled and an exit > > back to the host occurs, where the handle_sve() fallback path > > reflects an undefined instruction exception back to the guest, > > consistently with the behaviour of non-SVE-capable hardware (as was > > done unconditionally prior to this patch). > > > > No SVE handling is added on non-VHE-only paths, since VHE is an > > architectural and Kconfig prerequisite of SVE. > > > > Signed-off-by: Dave Martin <Dave.Martin@arm.com> > > Reviewed-by: Julien Thierry <julien.thierry@arm.com> > > Tested-by: zhang.lei <zhang.lei@jp.fujitsu.com> > > > > --- > > > > Changes since v5: > > > > * [Julien Thierry, Julien Grall] Commit message typo fixes > > > > * [Mark Rutland] Rename trap_class to hsr_ec, for consistency with > > existing code. > > > > * [Mark Rutland] Simplify condition for refusing to handle an > > FPSIMD/SVE trap, using multiple if () statements for clarity. The > > previous condition was a bit tortuous, and how that the static_key > > checks have been hoisted out, it makes little difference to the > > compiler how we express the condition here. > > --- > > arch/arm64/include/asm/kvm_host.h | 6 ++++ > > arch/arm64/kvm/fpsimd.c | 5 +-- > > arch/arm64/kvm/hyp/switch.c | 75 +++++++++++++++++++++++++++++---------- > > 3 files changed, 66 insertions(+), 20 deletions(-) > > > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > > index 22cf484..4fabfd2 100644 > > --- a/arch/arm64/include/asm/kvm_host.h > > +++ b/arch/arm64/include/asm/kvm_host.h > > @@ -228,6 +228,8 @@ struct vcpu_reset_state { > > > > struct kvm_vcpu_arch { > > struct kvm_cpu_context ctxt; > > + void *sve_state; > > + unsigned int sve_max_vl; > > > > /* HYP configuration */ > > u64 hcr_el2; > > @@ -323,6 +325,10 @@ struct kvm_vcpu_arch { > > bool sysregs_loaded_on_cpu; > > }; > > > > +/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ > > +#define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \ > > + sve_ffr_offset((vcpu)->arch.sve_max_vl))) > > Maybe an inline function instead? I tried, but that requires the definition of struct kvm_vcpu to be visible. I failed to get that here without circular #include problems, and it looked tricky to fix. Since this is a small bit of code which is unlikely to get used by accident, I decided it was OK to keep it as a macro. Can you see another way around this? [...] > Reviewed-by: Andrew Jones <drjones@redhat.com> Thanks ---Dave
On Thu, Apr 04, 2019 at 09:10:08AM +0100, Dave Martin wrote: > On Wed, Apr 03, 2019 at 10:01:45PM +0200, Andrew Jones wrote: > > On Fri, Mar 29, 2019 at 01:00:38PM +0000, Dave Martin wrote: > > > In order to give each vcpu its own view of the SVE registers, this > > > patch adds context storage via a new sve_state pointer in struct > > > vcpu_arch. An additional member sve_max_vl is also added for each > > > vcpu, to determine the maximum vector length visible to the guest > > > and thus the value to be configured in ZCR_EL2.LEN while the vcpu > > > is active. This also determines the layout and size of the storage > > > in sve_state, which is read and written by the same backend > > > functions that are used for context-switching the SVE state for > > > host tasks. > > > > > > On SVE-enabled vcpus, SVE access traps are now handled by switching > > > in the vcpu's SVE context and disabling the trap before returning > > > to the guest. On other vcpus, the trap is not handled and an exit > > > back to the host occurs, where the handle_sve() fallback path > > > reflects an undefined instruction exception back to the guest, > > > consistently with the behaviour of non-SVE-capable hardware (as was > > > done unconditionally prior to this patch). > > > > > > No SVE handling is added on non-VHE-only paths, since VHE is an > > > architectural and Kconfig prerequisite of SVE. > > > > > > Signed-off-by: Dave Martin <Dave.Martin@arm.com> > > > Reviewed-by: Julien Thierry <julien.thierry@arm.com> > > > Tested-by: zhang.lei <zhang.lei@jp.fujitsu.com> > > > > > > --- > > > > > > Changes since v5: > > > > > > * [Julien Thierry, Julien Grall] Commit message typo fixes > > > > > > * [Mark Rutland] Rename trap_class to hsr_ec, for consistency with > > > existing code. > > > > > > * [Mark Rutland] Simplify condition for refusing to handle an > > > FPSIMD/SVE trap, using multiple if () statements for clarity. The > > > previous condition was a bit tortuous, and how that the static_key > > > checks have been hoisted out, it makes little difference to the > > > compiler how we express the condition here. > > > --- > > > arch/arm64/include/asm/kvm_host.h | 6 ++++ > > > arch/arm64/kvm/fpsimd.c | 5 +-- > > > arch/arm64/kvm/hyp/switch.c | 75 +++++++++++++++++++++++++++++---------- > > > 3 files changed, 66 insertions(+), 20 deletions(-) > > > > > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > > > index 22cf484..4fabfd2 100644 > > > --- a/arch/arm64/include/asm/kvm_host.h > > > +++ b/arch/arm64/include/asm/kvm_host.h > > > @@ -228,6 +228,8 @@ struct vcpu_reset_state { > > > > > > struct kvm_vcpu_arch { > > > struct kvm_cpu_context ctxt; > > > + void *sve_state; > > > + unsigned int sve_max_vl; > > > > > > /* HYP configuration */ > > > u64 hcr_el2; > > > @@ -323,6 +325,10 @@ struct kvm_vcpu_arch { > > > bool sysregs_loaded_on_cpu; > > > }; > > > > > > +/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ > > > +#define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \ > > > + sve_ffr_offset((vcpu)->arch.sve_max_vl))) > > > > Maybe an inline function instead? > > I tried, but that requires the definition of struct kvm_vcpu to be > visible. I failed to get that here without circular #include problems, > and it looked tricky to fix. Ah, OK > > Since this is a small bit of code which is unlikely to get used by > accident, I decided it was OK to keep it as a macro. > > Can you see another way around this? Nope drew
On Thu, Apr 04, 2019 at 10:35:02AM +0200, Andrew Jones wrote: > On Thu, Apr 04, 2019 at 09:10:08AM +0100, Dave Martin wrote: > > On Wed, Apr 03, 2019 at 10:01:45PM +0200, Andrew Jones wrote: > > > On Fri, Mar 29, 2019 at 01:00:38PM +0000, Dave Martin wrote: > > > > In order to give each vcpu its own view of the SVE registers, this > > > > patch adds context storage via a new sve_state pointer in struct > > > > vcpu_arch. An additional member sve_max_vl is also added for each > > > > vcpu, to determine the maximum vector length visible to the guest > > > > and thus the value to be configured in ZCR_EL2.LEN while the vcpu > > > > is active. This also determines the layout and size of the storage > > > > in sve_state, which is read and written by the same backend > > > > functions that are used for context-switching the SVE state for > > > > host tasks. > > > > > > > > On SVE-enabled vcpus, SVE access traps are now handled by switching > > > > in the vcpu's SVE context and disabling the trap before returning > > > > to the guest. On other vcpus, the trap is not handled and an exit > > > > back to the host occurs, where the handle_sve() fallback path > > > > reflects an undefined instruction exception back to the guest, > > > > consistently with the behaviour of non-SVE-capable hardware (as was > > > > done unconditionally prior to this patch). > > > > > > > > No SVE handling is added on non-VHE-only paths, since VHE is an > > > > architectural and Kconfig prerequisite of SVE. > > > > > > > > Signed-off-by: Dave Martin <Dave.Martin@arm.com> > > > > Reviewed-by: Julien Thierry <julien.thierry@arm.com> > > > > Tested-by: zhang.lei <zhang.lei@jp.fujitsu.com> > > > > > > > > --- > > > > > > > > Changes since v5: > > > > > > > > * [Julien Thierry, Julien Grall] Commit message typo fixes > > > > > > > > * [Mark Rutland] Rename trap_class to hsr_ec, for consistency with > > > > existing code. > > > > > > > > * [Mark Rutland] Simplify condition for refusing to handle an > > > > FPSIMD/SVE trap, using multiple if () statements for clarity. The > > > > previous condition was a bit tortuous, and how that the static_key > > > > checks have been hoisted out, it makes little difference to the > > > > compiler how we express the condition here. > > > > --- > > > > arch/arm64/include/asm/kvm_host.h | 6 ++++ > > > > arch/arm64/kvm/fpsimd.c | 5 +-- > > > > arch/arm64/kvm/hyp/switch.c | 75 +++++++++++++++++++++++++++++---------- > > > > 3 files changed, 66 insertions(+), 20 deletions(-) > > > > > > > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > > > > index 22cf484..4fabfd2 100644 > > > > --- a/arch/arm64/include/asm/kvm_host.h > > > > +++ b/arch/arm64/include/asm/kvm_host.h > > > > @@ -228,6 +228,8 @@ struct vcpu_reset_state { > > > > > > > > struct kvm_vcpu_arch { > > > > struct kvm_cpu_context ctxt; > > > > + void *sve_state; > > > > + unsigned int sve_max_vl; > > > > > > > > /* HYP configuration */ > > > > u64 hcr_el2; > > > > @@ -323,6 +325,10 @@ struct kvm_vcpu_arch { > > > > bool sysregs_loaded_on_cpu; > > > > }; > > > > > > > > +/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ > > > > +#define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \ > > > > + sve_ffr_offset((vcpu)->arch.sve_max_vl))) > > > > > > Maybe an inline function instead? > > > > I tried, but that requires the definition of struct kvm_vcpu to be > > visible. I failed to get that here without circular #include problems, > > and it looked tricky to fix. > > Ah, OK > > > > > Since this is a small bit of code which is unlikely to get used by > > accident, I decided it was OK to keep it as a macro. > > > > Can you see another way around this? > > Nope OK. If someone eventually solves this, I'd be happy to change to an inline function. Cheers ---Dave
Dave Martin <Dave.Martin@arm.com> writes: > On Thu, Apr 04, 2019 at 10:35:02AM +0200, Andrew Jones wrote: >> On Thu, Apr 04, 2019 at 09:10:08AM +0100, Dave Martin wrote: >> > On Wed, Apr 03, 2019 at 10:01:45PM +0200, Andrew Jones wrote: >> > > On Fri, Mar 29, 2019 at 01:00:38PM +0000, Dave Martin wrote: >> > > > In order to give each vcpu its own view of the SVE registers, this >> > > > patch adds context storage via a new sve_state pointer in struct >> > > > vcpu_arch. An additional member sve_max_vl is also added for each >> > > > vcpu, to determine the maximum vector length visible to the guest >> > > > and thus the value to be configured in ZCR_EL2.LEN while the vcpu >> > > > is active. This also determines the layout and size of the storage >> > > > in sve_state, which is read and written by the same backend >> > > > functions that are used for context-switching the SVE state for >> > > > host tasks. >> > > > >> > > > On SVE-enabled vcpus, SVE access traps are now handled by switching >> > > > in the vcpu's SVE context and disabling the trap before returning >> > > > to the guest. On other vcpus, the trap is not handled and an exit >> > > > back to the host occurs, where the handle_sve() fallback path >> > > > reflects an undefined instruction exception back to the guest, >> > > > consistently with the behaviour of non-SVE-capable hardware (as was >> > > > done unconditionally prior to this patch). >> > > > >> > > > No SVE handling is added on non-VHE-only paths, since VHE is an >> > > > architectural and Kconfig prerequisite of SVE. >> > > > >> > > > Signed-off-by: Dave Martin <Dave.Martin@arm.com> >> > > > Reviewed-by: Julien Thierry <julien.thierry@arm.com> >> > > > Tested-by: zhang.lei <zhang.lei@jp.fujitsu.com> >> > > > >> > > > --- >> > > > >> > > > Changes since v5: >> > > > >> > > > * [Julien Thierry, Julien Grall] Commit message typo fixes >> > > > >> > > > * [Mark Rutland] Rename trap_class to hsr_ec, for consistency with >> > > > existing code. >> > > > >> > > > * [Mark Rutland] Simplify condition for refusing to handle an >> > > > FPSIMD/SVE trap, using multiple if () statements for clarity. The >> > > > previous condition was a bit tortuous, and how that the static_key >> > > > checks have been hoisted out, it makes little difference to the >> > > > compiler how we express the condition here. >> > > > --- >> > > > arch/arm64/include/asm/kvm_host.h | 6 ++++ >> > > > arch/arm64/kvm/fpsimd.c | 5 +-- >> > > > arch/arm64/kvm/hyp/switch.c | 75 +++++++++++++++++++++++++++++---------- >> > > > 3 files changed, 66 insertions(+), 20 deletions(-) >> > > > >> > > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h >> > > > index 22cf484..4fabfd2 100644 >> > > > --- a/arch/arm64/include/asm/kvm_host.h >> > > > +++ b/arch/arm64/include/asm/kvm_host.h >> > > > @@ -228,6 +228,8 @@ struct vcpu_reset_state { >> > > > >> > > > struct kvm_vcpu_arch { >> > > > struct kvm_cpu_context ctxt; >> > > > + void *sve_state; >> > > > + unsigned int sve_max_vl; >> > > > >> > > > /* HYP configuration */ >> > > > u64 hcr_el2; >> > > > @@ -323,6 +325,10 @@ struct kvm_vcpu_arch { >> > > > bool sysregs_loaded_on_cpu; >> > > > }; >> > > > >> > > > +/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ >> > > > +#define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \ >> > > > + sve_ffr_offset((vcpu)->arch.sve_max_vl))) >> > > >> > > Maybe an inline function instead? >> > >> > I tried, but that requires the definition of struct kvm_vcpu to be >> > visible. I failed to get that here without circular #include problems, >> > and it looked tricky to fix. >> >> Ah, OK >> >> > >> > Since this is a small bit of code which is unlikely to get used by >> > accident, I decided it was OK to keep it as a macro. >> > >> > Can you see another way around this? >> >> Nope > > OK. If someone eventually solves this, I'd be happy to change to an > inline function. Is the function intended to be used by more call sites? Currently in the tree with this plus the v2 fixups I can only see: arch/arm64/include/asm/kvm_host.h:333:#define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \ arch/arm64/kvm/hyp/switch.c:388: sve_load_state(vcpu_sve_pffr(vcpu), -- Alex Bennée
On Wed, Apr 24, 2019 at 03:51:32PM +0100, Alex Bennée wrote: > > Dave Martin <Dave.Martin@arm.com> writes: > > > On Thu, Apr 04, 2019 at 10:35:02AM +0200, Andrew Jones wrote: > >> On Thu, Apr 04, 2019 at 09:10:08AM +0100, Dave Martin wrote: > >> > On Wed, Apr 03, 2019 at 10:01:45PM +0200, Andrew Jones wrote: > >> > > On Fri, Mar 29, 2019 at 01:00:38PM +0000, Dave Martin wrote: > >> > > > In order to give each vcpu its own view of the SVE registers, this > >> > > > patch adds context storage via a new sve_state pointer in struct > >> > > > vcpu_arch. An additional member sve_max_vl is also added for each > >> > > > vcpu, to determine the maximum vector length visible to the guest > >> > > > and thus the value to be configured in ZCR_EL2.LEN while the vcpu > >> > > > is active. This also determines the layout and size of the storage > >> > > > in sve_state, which is read and written by the same backend > >> > > > functions that are used for context-switching the SVE state for > >> > > > host tasks. > >> > > > > >> > > > On SVE-enabled vcpus, SVE access traps are now handled by switching > >> > > > in the vcpu's SVE context and disabling the trap before returning > >> > > > to the guest. On other vcpus, the trap is not handled and an exit > >> > > > back to the host occurs, where the handle_sve() fallback path > >> > > > reflects an undefined instruction exception back to the guest, > >> > > > consistently with the behaviour of non-SVE-capable hardware (as was > >> > > > done unconditionally prior to this patch). > >> > > > > >> > > > No SVE handling is added on non-VHE-only paths, since VHE is an > >> > > > architectural and Kconfig prerequisite of SVE. > >> > > > > >> > > > Signed-off-by: Dave Martin <Dave.Martin@arm.com> > >> > > > Reviewed-by: Julien Thierry <julien.thierry@arm.com> > >> > > > Tested-by: zhang.lei <zhang.lei@jp.fujitsu.com> > >> > > > > >> > > > --- > >> > > > > >> > > > Changes since v5: > >> > > > > >> > > > * [Julien Thierry, Julien Grall] Commit message typo fixes > >> > > > > >> > > > * [Mark Rutland] Rename trap_class to hsr_ec, for consistency with > >> > > > existing code. > >> > > > > >> > > > * [Mark Rutland] Simplify condition for refusing to handle an > >> > > > FPSIMD/SVE trap, using multiple if () statements for clarity. The > >> > > > previous condition was a bit tortuous, and how that the static_key > >> > > > checks have been hoisted out, it makes little difference to the > >> > > > compiler how we express the condition here. > >> > > > --- > >> > > > arch/arm64/include/asm/kvm_host.h | 6 ++++ > >> > > > arch/arm64/kvm/fpsimd.c | 5 +-- > >> > > > arch/arm64/kvm/hyp/switch.c | 75 +++++++++++++++++++++++++++++---------- > >> > > > 3 files changed, 66 insertions(+), 20 deletions(-) > >> > > > > >> > > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > >> > > > index 22cf484..4fabfd2 100644 > >> > > > --- a/arch/arm64/include/asm/kvm_host.h > >> > > > +++ b/arch/arm64/include/asm/kvm_host.h > >> > > > @@ -228,6 +228,8 @@ struct vcpu_reset_state { > >> > > > > >> > > > struct kvm_vcpu_arch { > >> > > > struct kvm_cpu_context ctxt; > >> > > > + void *sve_state; > >> > > > + unsigned int sve_max_vl; > >> > > > > >> > > > /* HYP configuration */ > >> > > > u64 hcr_el2; > >> > > > @@ -323,6 +325,10 @@ struct kvm_vcpu_arch { > >> > > > bool sysregs_loaded_on_cpu; > >> > > > }; > >> > > > > >> > > > +/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ > >> > > > +#define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \ > >> > > > + sve_ffr_offset((vcpu)->arch.sve_max_vl))) > >> > > > >> > > Maybe an inline function instead? > >> > > >> > I tried, but that requires the definition of struct kvm_vcpu to be > >> > visible. I failed to get that here without circular #include problems, > >> > and it looked tricky to fix. > >> > >> Ah, OK > >> > >> > > >> > Since this is a small bit of code which is unlikely to get used by > >> > accident, I decided it was OK to keep it as a macro. > >> > > >> > Can you see another way around this? > >> > >> Nope > > > > OK. If someone eventually solves this, I'd be happy to change to an > > inline function. > > Is the function intended to be used by more call sites? Currently in the > tree with this plus the v2 fixups I can only see: > > arch/arm64/include/asm/kvm_host.h:333:#define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \ > arch/arm64/kvm/hyp/switch.c:388: sve_load_state(vcpu_sve_pffr(vcpu), Probably not, although it was probably used to save the state back before things were refactored so that fpsimd_save() in arch/arm64/kernel/fpsimd.c is used instead of separate code to save the vcpu state. The expression is ugly so it's nice to abstract it. This also keeps the sve_load_state() call feeling consistent to the equivalent call in task_fpsimd_load() in arm64/kernel/fpsimd.c Other than that, there's no underlying reason for having a macro. Cheers ---Dave
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 22cf484..4fabfd2 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -228,6 +228,8 @@ struct vcpu_reset_state { struct kvm_vcpu_arch { struct kvm_cpu_context ctxt; + void *sve_state; + unsigned int sve_max_vl; /* HYP configuration */ u64 hcr_el2; @@ -323,6 +325,10 @@ struct kvm_vcpu_arch { bool sysregs_loaded_on_cpu; }; +/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */ +#define vcpu_sve_pffr(vcpu) ((void *)((char *)((vcpu)->arch.sve_state) + \ + sve_ffr_offset((vcpu)->arch.sve_max_vl))) + /* vcpu_arch flags field values: */ #define KVM_ARM64_DEBUG_DIRTY (1 << 0) #define KVM_ARM64_FP_ENABLED (1 << 1) /* guest FP regs loaded */ diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 7053bf4..6e3c9c8 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -87,10 +87,11 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu) if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) { fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.gp_regs.fp_regs, - NULL, SVE_VL_MIN); + vcpu->arch.sve_state, + vcpu->arch.sve_max_vl); clear_thread_flag(TIF_FOREIGN_FPSTATE); - clear_thread_flag(TIF_SVE); + update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu)); } } diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 9d46066..5444b9c 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -100,7 +100,10 @@ static void activate_traps_vhe(struct kvm_vcpu *vcpu) val = read_sysreg(cpacr_el1); val |= CPACR_EL1_TTA; val &= ~CPACR_EL1_ZEN; - if (!update_fp_enabled(vcpu)) { + if (update_fp_enabled(vcpu)) { + if (vcpu_has_sve(vcpu)) + val |= CPACR_EL1_ZEN; + } else { val &= ~CPACR_EL1_FPEN; __activate_traps_fpsimd32(vcpu); } @@ -317,16 +320,48 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) return true; } -static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu) +/* Check for an FPSIMD/SVE trap and handle as appropriate */ +static bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) { - struct user_fpsimd_state *host_fpsimd = vcpu->arch.host_fpsimd_state; + bool vhe, sve_guest, sve_host; + u8 hsr_ec; - if (has_vhe()) - write_sysreg(read_sysreg(cpacr_el1) | CPACR_EL1_FPEN, - cpacr_el1); - else + if (!system_supports_fpsimd()) + return false; + + if (system_supports_sve()) { + sve_guest = vcpu_has_sve(vcpu); + sve_host = vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE; + vhe = true; + } else { + sve_guest = false; + sve_host = false; + vhe = has_vhe(); + } + + hsr_ec = kvm_vcpu_trap_get_class(vcpu); + if (hsr_ec != ESR_ELx_EC_FP_ASIMD && + hsr_ec != ESR_ELx_EC_SVE) + return false; + + /* Don't handle SVE traps for non-SVE vcpus here: */ + if (!sve_guest) + if (hsr_ec != ESR_ELx_EC_FP_ASIMD) + return false; + + /* Valid trap. Switch the context: */ + + if (vhe) { + u64 reg = read_sysreg(cpacr_el1) | CPACR_EL1_FPEN; + + if (sve_guest) + reg |= CPACR_EL1_ZEN; + + write_sysreg(reg, cpacr_el1); + } else { write_sysreg(read_sysreg(cptr_el2) & ~(u64)CPTR_EL2_TFP, cptr_el2); + } isb(); @@ -335,24 +370,28 @@ static bool __hyp_text __hyp_switch_fpsimd(struct kvm_vcpu *vcpu) * In the SVE case, VHE is assumed: it is enforced by * Kconfig and kvm_arch_init(). */ - if (system_supports_sve() && - (vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE)) { + if (sve_host) { struct thread_struct *thread = container_of( - host_fpsimd, + vcpu->arch.host_fpsimd_state, struct thread_struct, uw.fpsimd_state); - sve_save_state(sve_pffr(thread), &host_fpsimd->fpsr); + sve_save_state(sve_pffr(thread), + &vcpu->arch.host_fpsimd_state->fpsr); } else { - __fpsimd_save_state(host_fpsimd); + __fpsimd_save_state(vcpu->arch.host_fpsimd_state); } vcpu->arch.flags &= ~KVM_ARM64_FP_HOST; } - __fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs); - - if (vcpu_has_sve(vcpu)) + if (sve_guest) { + sve_load_state(vcpu_sve_pffr(vcpu), + &vcpu->arch.ctxt.gp_regs.fp_regs.fpsr, + sve_vq_from_vl(vcpu->arch.sve_max_vl) - 1); write_sysreg_s(vcpu->arch.ctxt.sys_regs[ZCR_EL1], SYS_ZCR_EL12); + } else { + __fpsimd_restore_state(&vcpu->arch.ctxt.gp_regs.fp_regs); + } /* Skip restoring fpexc32 for AArch64 guests */ if (!(read_sysreg(hcr_el2) & HCR_RW)) @@ -388,10 +427,10 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) * and restore the guest context lazily. * If FP/SIMD is not implemented, handle the trap and inject an * undefined instruction exception to the guest. + * Similarly for trapped SVE accesses. */ - if (system_supports_fpsimd() && - kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_FP_ASIMD) - return __hyp_switch_fpsimd(vcpu); + if (__hyp_handle_fpsimd(vcpu)) + return true; if (!__populate_fault_info(vcpu)) return true;