diff mbox series

[v2,6/7] KVM: arm64: Eagerly restore host fpsimd/sve state in pKVM

Message ID 20240521163720.3812851-7-tabba@google.com (mailing list archive)
State New
Headers show
Series KVM: arm64: Fix handling of host fpsimd/sve state in protected mode | expand

Commit Message

Fuad Tabba May 21, 2024, 4:37 p.m. UTC
When running in protected mode we don't want to leak protected
guest state to the host, including whether a guest has used
fpsimd/sve. Therefore, eagerly restore the host state on guest
exit when running in protected mode, which happens only if the
guest has used fpsimd/sve.

As a future optimisation, we could go back to lazy restoring
state at the host after exiting non-protected guests.

Signed-off-by: Fuad Tabba <tabba@google.com>
---
 arch/arm64/kvm/hyp/include/hyp/switch.h | 13 +++++-
 arch/arm64/kvm/hyp/nvhe/hyp-main.c      | 53 +++++++++++++++++++++++--
 arch/arm64/kvm/hyp/nvhe/pkvm.c          |  2 +
 arch/arm64/kvm/hyp/nvhe/switch.c        | 16 +++++++-
 4 files changed, 79 insertions(+), 5 deletions(-)

Comments

Marc Zyngier May 21, 2024, 10:52 p.m. UTC | #1
On Tue, 21 May 2024 17:37:19 +0100,
Fuad Tabba <tabba@google.com> wrote:
> 
> When running in protected mode we don't want to leak protected
> guest state to the host, including whether a guest has used
> fpsimd/sve. Therefore, eagerly restore the host state on guest
> exit when running in protected mode, which happens only if the
> guest has used fpsimd/sve.
> 
> As a future optimisation, we could go back to lazy restoring
> state at the host after exiting non-protected guests.

No. This sort of things is way too invasive and would require mapping
the VHE host data at EL2. If we need to optimise this crap, then we
apply the same techniques as we use for guests. If that's good enough
for the guests, that's good enough for the host.

> 
> Signed-off-by: Fuad Tabba <tabba@google.com>
> ---
>  arch/arm64/kvm/hyp/include/hyp/switch.h | 13 +++++-
>  arch/arm64/kvm/hyp/nvhe/hyp-main.c      | 53 +++++++++++++++++++++++--
>  arch/arm64/kvm/hyp/nvhe/pkvm.c          |  2 +
>  arch/arm64/kvm/hyp/nvhe/switch.c        | 16 +++++++-
>  4 files changed, 79 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
> index 1897b73e635c..2fa29bfec0b6 100644
> --- a/arch/arm64/kvm/hyp/include/hyp/switch.h
> +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
> @@ -320,6 +320,16 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
>  	write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR);
>  }
>  
> +static inline void __hyp_sve_save_host(void)
> +{
> +	struct user_sve_state *sve_state = *host_data_ptr(sve_state);
> +
> +	sve_state->zcr_el1 = read_sysreg_el1(SYS_ZCR);
> +	sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
> +	__sve_save_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
> +			 &sve_state->fpsr);
> +}
> +
>  static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu);
>  
>  /*
> @@ -356,7 +366,8 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
>  
>  	/* First disable enough traps to allow us to update the registers */
>  	reg = CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN;
> -	if (sve_guest)
> +	if (sve_guest ||
> +	    (is_protected_kvm_enabled() && system_supports_sve()))

This looks really ugly. Why can't we just compute sve_guest to take
these conditions into account?

>  		reg |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
>  	cpacr_clear_set(0, reg);
>  	isb();
> diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> index b07d44484f42..f79f0f7b2759 100644
> --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> @@ -23,20 +23,66 @@ DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
>  
>  void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt);
>  
> +static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu)
> +{
> +	__vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
> +	sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
> +	__sve_save_state(vcpu_sve_pffr(vcpu), &vcpu->arch.ctxt.fp_regs.fpsr);
> +	sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
> +}
> +
> +static void __hyp_sve_restore_host(void)
> +{
> +	struct user_sve_state *sve_state = *host_data_ptr(sve_state);
> +
> +	sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
> +	__sve_restore_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),

This is what I was worried about in a previous patch.
kvm_host_sve_max_vl represents the max VL across all CPUs. if CPU0
supports 128bit SVE and CPU1 256bit, the value is 256. but on CPU0,
ZCR_ELx_LEN_MASK will results in using 128bit accesses, and the offset
will be wrong. I can't convince myself that anything really goes wrong
as long as we're consistent between save and restore, but that's at
best ugly and needs extensive documenting.

On top of that, a conditional update of ZCR_EL2 with ZCR_ELx_LEN_MASK
is unlikely to be beneficial, since nobody implements 2k vectors. A
direct write followed by an ISB is likely to be better.

> +			    &sve_state->fpsr);
> +	write_sysreg_el1(sve_state->zcr_el1, SYS_ZCR);
> +}
> +
> +static void fpsimd_sve_flush(void)
> +{
> +	*host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
> +}
> +
> +static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
> +{
> +	if (!guest_owns_fp_regs())
> +		return;
> +
> +	cpacr_clear_set(0, (CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN |
> +			    CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN));
> +	isb();
> +
> +	if (vcpu_has_sve(vcpu))
> +		__hyp_sve_save_guest(vcpu);
> +	else
> +		__fpsimd_save_state(&vcpu->arch.ctxt.fp_regs);
> +
> +	if (system_supports_sve())
> +		__hyp_sve_restore_host();
> +	else
> +		__fpsimd_restore_state(*host_data_ptr(fpsimd_state));
> +
> +	*host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
> +}
> +
>  static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
>  {
>  	struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
>  
> +	fpsimd_sve_flush();
> +
>  	hyp_vcpu->vcpu.arch.ctxt	= host_vcpu->arch.ctxt;
>  
>  	hyp_vcpu->vcpu.arch.sve_state	= kern_hyp_va(host_vcpu->arch.sve_state);
> -	hyp_vcpu->vcpu.arch.sve_max_vl	= host_vcpu->arch.sve_max_vl;
> +	hyp_vcpu->vcpu.arch.sve_max_vl	= min(host_vcpu->arch.sve_max_vl, kvm_host_sve_max_vl);
>  
>  	hyp_vcpu->vcpu.arch.hw_mmu	= host_vcpu->arch.hw_mmu;
>  
>  	hyp_vcpu->vcpu.arch.hcr_el2	= host_vcpu->arch.hcr_el2;
>  	hyp_vcpu->vcpu.arch.mdcr_el2	= host_vcpu->arch.mdcr_el2;
> -	hyp_vcpu->vcpu.arch.cptr_el2	= host_vcpu->arch.cptr_el2;
>  
>  	hyp_vcpu->vcpu.arch.iflags	= host_vcpu->arch.iflags;
>  
> @@ -54,10 +100,11 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
>  	struct vgic_v3_cpu_if *host_cpu_if = &host_vcpu->arch.vgic_cpu.vgic_v3;
>  	unsigned int i;
>  
> +	fpsimd_sve_sync(&hyp_vcpu->vcpu);
> +
>  	host_vcpu->arch.ctxt		= hyp_vcpu->vcpu.arch.ctxt;
>  
>  	host_vcpu->arch.hcr_el2		= hyp_vcpu->vcpu.arch.hcr_el2;
> -	host_vcpu->arch.cptr_el2	= hyp_vcpu->vcpu.arch.cptr_el2;
>  
>  	host_vcpu->arch.fault		= hyp_vcpu->vcpu.arch.fault;
>  
> diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
> index 25e9a94f6d76..feb27b4ce459 100644
> --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
> +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
> @@ -588,6 +588,8 @@ int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
>  	if (ret)
>  		unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu));
>  
> +	hyp_vcpu->vcpu.arch.cptr_el2 = kvm_get_reset_cptr_el2(&hyp_vcpu->vcpu);
> +

Eventually, we need to rename this to cpacr_el2 and make sure we
consistently use the VHE format everywhere. I'm starting to be worried
that we mix things badly.

>  	return ret;
>  }
>  
> diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
> index 019f863922fa..5bf437682b94 100644
> --- a/arch/arm64/kvm/hyp/nvhe/switch.c
> +++ b/arch/arm64/kvm/hyp/nvhe/switch.c
> @@ -184,7 +184,21 @@ static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code)
>  
>  static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
>  {
> -	__fpsimd_save_state(*host_data_ptr(fpsimd_state));
> +	/*
> +	 * Non-protected kvm relies on the host restoring its sve state.
> +	 * Protected kvm restores the host's sve state as not to reveal that
> +	 * fpsimd was used by a guest nor leak upper sve bits.
> +	 */
> +	if (unlikely(is_protected_kvm_enabled() && system_supports_sve())) {
> +		__hyp_sve_save_host();
> +
> +		/* Re-enable SVE traps if not supported for the guest vcpu. */
> +		if (!vcpu_has_sve(vcpu))
> +			cpacr_clear_set(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN, 0);
> +

nit: spurious new line

Thanks,

	M.
Fuad Tabba May 22, 2024, 2:48 p.m. UTC | #2
Hi Marc,

On Tue, May 21, 2024 at 11:52 PM Marc Zyngier <maz@kernel.org> wrote:
>
> On Tue, 21 May 2024 17:37:19 +0100,
> Fuad Tabba <tabba@google.com> wrote:
> >
> > When running in protected mode we don't want to leak protected
> > guest state to the host, including whether a guest has used
> > fpsimd/sve. Therefore, eagerly restore the host state on guest
> > exit when running in protected mode, which happens only if the
> > guest has used fpsimd/sve.
> >
> > As a future optimisation, we could go back to lazy restoring
> > state at the host after exiting non-protected guests.
>
> No. This sort of things is way too invasive and would require mapping
> the VHE host data at EL2. If we need to optimise this crap, then we
> apply the same techniques as we use for guests. If that's good enough
> for the guests, that's good enough for the host.

:D

> >
> > Signed-off-by: Fuad Tabba <tabba@google.com>
> > ---
> >  arch/arm64/kvm/hyp/include/hyp/switch.h | 13 +++++-
> >  arch/arm64/kvm/hyp/nvhe/hyp-main.c      | 53 +++++++++++++++++++++++--
> >  arch/arm64/kvm/hyp/nvhe/pkvm.c          |  2 +
> >  arch/arm64/kvm/hyp/nvhe/switch.c        | 16 +++++++-
> >  4 files changed, 79 insertions(+), 5 deletions(-)
> >
> > diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
> > index 1897b73e635c..2fa29bfec0b6 100644
> > --- a/arch/arm64/kvm/hyp/include/hyp/switch.h
> > +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
> > @@ -320,6 +320,16 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
> >       write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR);
> >  }
> >
> > +static inline void __hyp_sve_save_host(void)
> > +{
> > +     struct user_sve_state *sve_state = *host_data_ptr(sve_state);
> > +
> > +     sve_state->zcr_el1 = read_sysreg_el1(SYS_ZCR);
> > +     sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
> > +     __sve_save_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
> > +                      &sve_state->fpsr);
> > +}
> > +
> >  static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu);
> >
> >  /*
> > @@ -356,7 +366,8 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
> >
> >       /* First disable enough traps to allow us to update the registers */
> >       reg = CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN;
> > -     if (sve_guest)
> > +     if (sve_guest ||
> > +         (is_protected_kvm_enabled() && system_supports_sve()))
>
> This looks really ugly. Why can't we just compute sve_guest to take
> these conditions into account?

Because this is just for disabling the SVE traps, which we would need
to do even in the case where the guest doesn't support SVE in order to
be able to store the host sve state.

sve_guest is also used later in the function to determine whether we
need to restore the guest's sve state, and earlier to determine
whether an SVE trap from a non-sve guest should result in injecting an
undef instruction.

That said, maybe the following is a bit less ugly?

    if (sve_guest || (is_protected_kvm_enabled() && system_supports_sve()))
        cpacr_clear_set(0, CPACR_ELx_FPEN|CPACR_ELx_ZEN);
    else
        cpacr_clear_set(0, CPACR_ELx_FPEN);

>
> >               reg |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
> >       cpacr_clear_set(0, reg);
> >       isb();
> > diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> > index b07d44484f42..f79f0f7b2759 100644
> > --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> > +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> > @@ -23,20 +23,66 @@ DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
> >
> >  void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt);
> >
> > +static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu)
> > +{
> > +     __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
> > +     sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
> > +     __sve_save_state(vcpu_sve_pffr(vcpu), &vcpu->arch.ctxt.fp_regs.fpsr);
> > +     sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
> > +}
> > +
> > +static void __hyp_sve_restore_host(void)
> > +{
> > +     struct user_sve_state *sve_state = *host_data_ptr(sve_state);
> > +
> > +     sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
> > +     __sve_restore_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
>
> This is what I was worried about in a previous patch.
> kvm_host_sve_max_vl represents the max VL across all CPUs. if CPU0
> supports 128bit SVE and CPU1 256bit, the value is 256. but on CPU0,
> ZCR_ELx_LEN_MASK will results in using 128bit accesses, and the offset
> will be wrong. I can't convince myself that anything really goes wrong
> as long as we're consistent between save and restore, but that's at
> best ugly and needs extensive documenting.

As I mentioned in my reply to the previous patch, I _think_ that it
represents the maximum common VL across CPUs. Since the value comes
from vl_info.vq_map, which is calculated by filtering out all the VQs
not supported.

In kvm_arch_vcpu_put_fp(), we always assume the maximum VL supported
by the guest, regardless of which cpu this happens to be running on.
That said, there's a comment there outlining that. I'll add a comment
here to document it.

> On top of that, a conditional update of ZCR_EL2 with ZCR_ELx_LEN_MASK
> is unlikely to be beneficial, since nobody implements 2k vectors. A
> direct write followed by an ISB is likely to be better.

Will do.

> > +                         &sve_state->fpsr);
> > +     write_sysreg_el1(sve_state->zcr_el1, SYS_ZCR);
> > +}
> > +
> > +static void fpsimd_sve_flush(void)
> > +{
> > +     *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
> > +}
> > +
> > +static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
> > +{
> > +     if (!guest_owns_fp_regs())
> > +             return;
> > +
> > +     cpacr_clear_set(0, (CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN |
> > +                         CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN));
> > +     isb();
> > +
> > +     if (vcpu_has_sve(vcpu))
> > +             __hyp_sve_save_guest(vcpu);
> > +     else
> > +             __fpsimd_save_state(&vcpu->arch.ctxt.fp_regs);
> > +
> > +     if (system_supports_sve())
> > +             __hyp_sve_restore_host();
> > +     else
> > +             __fpsimd_restore_state(*host_data_ptr(fpsimd_state));
> > +
> > +     *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
> > +}
> > +
> >  static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
> >  {
> >       struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
> >
> > +     fpsimd_sve_flush();
> > +
> >       hyp_vcpu->vcpu.arch.ctxt        = host_vcpu->arch.ctxt;
> >
> >       hyp_vcpu->vcpu.arch.sve_state   = kern_hyp_va(host_vcpu->arch.sve_state);
> > -     hyp_vcpu->vcpu.arch.sve_max_vl  = host_vcpu->arch.sve_max_vl;
> > +     hyp_vcpu->vcpu.arch.sve_max_vl  = min(host_vcpu->arch.sve_max_vl, kvm_host_sve_max_vl);
> >
> >       hyp_vcpu->vcpu.arch.hw_mmu      = host_vcpu->arch.hw_mmu;
> >
> >       hyp_vcpu->vcpu.arch.hcr_el2     = host_vcpu->arch.hcr_el2;
> >       hyp_vcpu->vcpu.arch.mdcr_el2    = host_vcpu->arch.mdcr_el2;
> > -     hyp_vcpu->vcpu.arch.cptr_el2    = host_vcpu->arch.cptr_el2;
> >
> >       hyp_vcpu->vcpu.arch.iflags      = host_vcpu->arch.iflags;
> >
> > @@ -54,10 +100,11 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
> >       struct vgic_v3_cpu_if *host_cpu_if = &host_vcpu->arch.vgic_cpu.vgic_v3;
> >       unsigned int i;
> >
> > +     fpsimd_sve_sync(&hyp_vcpu->vcpu);
> > +
> >       host_vcpu->arch.ctxt            = hyp_vcpu->vcpu.arch.ctxt;
> >
> >       host_vcpu->arch.hcr_el2         = hyp_vcpu->vcpu.arch.hcr_el2;
> > -     host_vcpu->arch.cptr_el2        = hyp_vcpu->vcpu.arch.cptr_el2;
> >
> >       host_vcpu->arch.fault           = hyp_vcpu->vcpu.arch.fault;
> >
> > diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
> > index 25e9a94f6d76..feb27b4ce459 100644
> > --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
> > +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
> > @@ -588,6 +588,8 @@ int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
> >       if (ret)
> >               unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu));
> >
> > +     hyp_vcpu->vcpu.arch.cptr_el2 = kvm_get_reset_cptr_el2(&hyp_vcpu->vcpu);
> > +
>
> Eventually, we need to rename this to cpacr_el2 and make sure we
> consistently use the VHE format everywhere. I'm starting to be worried
> that we mix things badly.

Would you like me to do that in this patch series, or should I submit
another one after this that does this cleanup?

> >       return ret;
> >  }
> >
> > diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
> > index 019f863922fa..5bf437682b94 100644
> > --- a/arch/arm64/kvm/hyp/nvhe/switch.c
> > +++ b/arch/arm64/kvm/hyp/nvhe/switch.c
> > @@ -184,7 +184,21 @@ static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code)
> >
> >  static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
> >  {
> > -     __fpsimd_save_state(*host_data_ptr(fpsimd_state));
> > +     /*
> > +      * Non-protected kvm relies on the host restoring its sve state.
> > +      * Protected kvm restores the host's sve state as not to reveal that
> > +      * fpsimd was used by a guest nor leak upper sve bits.
> > +      */
> > +     if (unlikely(is_protected_kvm_enabled() && system_supports_sve())) {
> > +             __hyp_sve_save_host();
> > +
> > +             /* Re-enable SVE traps if not supported for the guest vcpu. */
> > +             if (!vcpu_has_sve(vcpu))
> > +                     cpacr_clear_set(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN, 0);
> > +
>
> nit: spurious new line

Ack.

Thank you!


/fuad

> Thanks,
>
>         M.
>
> --
> Without deviation from the norm, progress is not possible.
Marc Zyngier May 28, 2024, 8:21 a.m. UTC | #3
On Wed, 22 May 2024 15:48:39 +0100,
Fuad Tabba <tabba@google.com> wrote:
> 
> Hi Marc,
> 
> On Tue, May 21, 2024 at 11:52 PM Marc Zyngier <maz@kernel.org> wrote:
> >
> > On Tue, 21 May 2024 17:37:19 +0100,
> > Fuad Tabba <tabba@google.com> wrote:
> > >
> > > When running in protected mode we don't want to leak protected
> > > guest state to the host, including whether a guest has used
> > > fpsimd/sve. Therefore, eagerly restore the host state on guest
> > > exit when running in protected mode, which happens only if the
> > > guest has used fpsimd/sve.
> > >
> > > As a future optimisation, we could go back to lazy restoring
> > > state at the host after exiting non-protected guests.
> >
> > No. This sort of things is way too invasive and would require mapping
> > the VHE host data at EL2. If we need to optimise this crap, then we
> > apply the same techniques as we use for guests. If that's good enough
> > for the guests, that's good enough for the host.
> 
> :D
> 
> > >
> > > Signed-off-by: Fuad Tabba <tabba@google.com>
> > > ---
> > >  arch/arm64/kvm/hyp/include/hyp/switch.h | 13 +++++-
> > >  arch/arm64/kvm/hyp/nvhe/hyp-main.c      | 53 +++++++++++++++++++++++--
> > >  arch/arm64/kvm/hyp/nvhe/pkvm.c          |  2 +
> > >  arch/arm64/kvm/hyp/nvhe/switch.c        | 16 +++++++-
> > >  4 files changed, 79 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
> > > index 1897b73e635c..2fa29bfec0b6 100644
> > > --- a/arch/arm64/kvm/hyp/include/hyp/switch.h
> > > +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
> > > @@ -320,6 +320,16 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
> > >       write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR);
> > >  }
> > >
> > > +static inline void __hyp_sve_save_host(void)
> > > +{
> > > +     struct user_sve_state *sve_state = *host_data_ptr(sve_state);
> > > +
> > > +     sve_state->zcr_el1 = read_sysreg_el1(SYS_ZCR);
> > > +     sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
> > > +     __sve_save_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
> > > +                      &sve_state->fpsr);
> > > +}
> > > +
> > >  static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu);
> > >
> > >  /*
> > > @@ -356,7 +366,8 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
> > >
> > >       /* First disable enough traps to allow us to update the registers */
> > >       reg = CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN;
> > > -     if (sve_guest)
> > > +     if (sve_guest ||
> > > +         (is_protected_kvm_enabled() && system_supports_sve()))
> >
> > This looks really ugly. Why can't we just compute sve_guest to take
> > these conditions into account?
> 
> Because this is just for disabling the SVE traps, which we would need
> to do even in the case where the guest doesn't support SVE in order to
> be able to store the host sve state.
> 
> sve_guest is also used later in the function to determine whether we
> need to restore the guest's sve state, and earlier to determine
> whether an SVE trap from a non-sve guest should result in injecting an
> undef instruction.
> 
> That said, maybe the following is a bit less ugly?
> 
>     if (sve_guest || (is_protected_kvm_enabled() && system_supports_sve()))
>         cpacr_clear_set(0, CPACR_ELx_FPEN|CPACR_ELx_ZEN);
>     else
>         cpacr_clear_set(0, CPACR_ELx_FPEN);

Yes, this looks marginally better. Overall, this helper is in dire
need of a full rewrite...

> >
> > >               reg |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
> > >       cpacr_clear_set(0, reg);
> > >       isb();
> > > diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> > > index b07d44484f42..f79f0f7b2759 100644
> > > --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> > > +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> > > @@ -23,20 +23,66 @@ DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
> > >
> > >  void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt);
> > >
> > > +static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu)
> > > +{
> > > +     __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
> > > +     sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
> > > +     __sve_save_state(vcpu_sve_pffr(vcpu), &vcpu->arch.ctxt.fp_regs.fpsr);
> > > +     sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
> > > +}
> > > +
> > > +static void __hyp_sve_restore_host(void)
> > > +{
> > > +     struct user_sve_state *sve_state = *host_data_ptr(sve_state);
> > > +
> > > +     sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
> > > +     __sve_restore_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
> >
> > This is what I was worried about in a previous patch.
> > kvm_host_sve_max_vl represents the max VL across all CPUs. if CPU0
> > supports 128bit SVE and CPU1 256bit, the value is 256. but on CPU0,
> > ZCR_ELx_LEN_MASK will results in using 128bit accesses, and the offset
> > will be wrong. I can't convince myself that anything really goes wrong
> > as long as we're consistent between save and restore, but that's at
> > best ugly and needs extensive documenting.
> 
> As I mentioned in my reply to the previous patch, I _think_ that it
> represents the maximum common VL across CPUs. Since the value comes
> from vl_info.vq_map, which is calculated by filtering out all the VQs
> not supported.
> 
> In kvm_arch_vcpu_put_fp(), we always assume the maximum VL supported
> by the guest, regardless of which cpu this happens to be running on.
> That said, there's a comment there outlining that. I'll add a comment
> here to document it.

That'd be helpful, thanks.

> 
> > On top of that, a conditional update of ZCR_EL2 with ZCR_ELx_LEN_MASK
> > is unlikely to be beneficial, since nobody implements 2k vectors. A
> > direct write followed by an ISB is likely to be better.
> 
> Will do.

[..]

> > > diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
> > > index 25e9a94f6d76..feb27b4ce459 100644
> > > --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
> > > +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
> > > @@ -588,6 +588,8 @@ int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
> > >       if (ret)
> > >               unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu));
> > >
> > > +     hyp_vcpu->vcpu.arch.cptr_el2 = kvm_get_reset_cptr_el2(&hyp_vcpu->vcpu);
> > > +
> >
> > Eventually, we need to rename this to cpacr_el2 and make sure we
> > consistently use the VHE format everywhere. I'm starting to be worried
> > that we mix things badly.
> 
> Would you like me to do that in this patch series, or should I submit
> another one after this that does this cleanup?

Later please. Let's focus on getting this series across first.

Thanks,

	M.
diff mbox series

Patch

diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 1897b73e635c..2fa29bfec0b6 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -320,6 +320,16 @@  static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
 	write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR);
 }
 
+static inline void __hyp_sve_save_host(void)
+{
+	struct user_sve_state *sve_state = *host_data_ptr(sve_state);
+
+	sve_state->zcr_el1 = read_sysreg_el1(SYS_ZCR);
+	sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
+	__sve_save_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
+			 &sve_state->fpsr);
+}
+
 static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu);
 
 /*
@@ -356,7 +366,8 @@  static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
 
 	/* First disable enough traps to allow us to update the registers */
 	reg = CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN;
-	if (sve_guest)
+	if (sve_guest ||
+	    (is_protected_kvm_enabled() && system_supports_sve()))
 		reg |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
 	cpacr_clear_set(0, reg);
 	isb();
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index b07d44484f42..f79f0f7b2759 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -23,20 +23,66 @@  DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
 
 void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt);
 
+static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu)
+{
+	__vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
+	sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
+	__sve_save_state(vcpu_sve_pffr(vcpu), &vcpu->arch.ctxt.fp_regs.fpsr);
+	sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
+}
+
+static void __hyp_sve_restore_host(void)
+{
+	struct user_sve_state *sve_state = *host_data_ptr(sve_state);
+
+	sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
+	__sve_restore_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
+			    &sve_state->fpsr);
+	write_sysreg_el1(sve_state->zcr_el1, SYS_ZCR);
+}
+
+static void fpsimd_sve_flush(void)
+{
+	*host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
+}
+
+static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
+{
+	if (!guest_owns_fp_regs())
+		return;
+
+	cpacr_clear_set(0, (CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN |
+			    CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN));
+	isb();
+
+	if (vcpu_has_sve(vcpu))
+		__hyp_sve_save_guest(vcpu);
+	else
+		__fpsimd_save_state(&vcpu->arch.ctxt.fp_regs);
+
+	if (system_supports_sve())
+		__hyp_sve_restore_host();
+	else
+		__fpsimd_restore_state(*host_data_ptr(fpsimd_state));
+
+	*host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
+}
+
 static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
 {
 	struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
 
+	fpsimd_sve_flush();
+
 	hyp_vcpu->vcpu.arch.ctxt	= host_vcpu->arch.ctxt;
 
 	hyp_vcpu->vcpu.arch.sve_state	= kern_hyp_va(host_vcpu->arch.sve_state);
-	hyp_vcpu->vcpu.arch.sve_max_vl	= host_vcpu->arch.sve_max_vl;
+	hyp_vcpu->vcpu.arch.sve_max_vl	= min(host_vcpu->arch.sve_max_vl, kvm_host_sve_max_vl);
 
 	hyp_vcpu->vcpu.arch.hw_mmu	= host_vcpu->arch.hw_mmu;
 
 	hyp_vcpu->vcpu.arch.hcr_el2	= host_vcpu->arch.hcr_el2;
 	hyp_vcpu->vcpu.arch.mdcr_el2	= host_vcpu->arch.mdcr_el2;
-	hyp_vcpu->vcpu.arch.cptr_el2	= host_vcpu->arch.cptr_el2;
 
 	hyp_vcpu->vcpu.arch.iflags	= host_vcpu->arch.iflags;
 
@@ -54,10 +100,11 @@  static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
 	struct vgic_v3_cpu_if *host_cpu_if = &host_vcpu->arch.vgic_cpu.vgic_v3;
 	unsigned int i;
 
+	fpsimd_sve_sync(&hyp_vcpu->vcpu);
+
 	host_vcpu->arch.ctxt		= hyp_vcpu->vcpu.arch.ctxt;
 
 	host_vcpu->arch.hcr_el2		= hyp_vcpu->vcpu.arch.hcr_el2;
-	host_vcpu->arch.cptr_el2	= hyp_vcpu->vcpu.arch.cptr_el2;
 
 	host_vcpu->arch.fault		= hyp_vcpu->vcpu.arch.fault;
 
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 25e9a94f6d76..feb27b4ce459 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -588,6 +588,8 @@  int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
 	if (ret)
 		unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu));
 
+	hyp_vcpu->vcpu.arch.cptr_el2 = kvm_get_reset_cptr_el2(&hyp_vcpu->vcpu);
+
 	return ret;
 }
 
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 019f863922fa..5bf437682b94 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -184,7 +184,21 @@  static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code)
 
 static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
 {
-	__fpsimd_save_state(*host_data_ptr(fpsimd_state));
+	/*
+	 * Non-protected kvm relies on the host restoring its sve state.
+	 * Protected kvm restores the host's sve state as not to reveal that
+	 * fpsimd was used by a guest nor leak upper sve bits.
+	 */
+	if (unlikely(is_protected_kvm_enabled() && system_supports_sve())) {
+		__hyp_sve_save_host();
+
+		/* Re-enable SVE traps if not supported for the guest vcpu. */
+		if (!vcpu_has_sve(vcpu))
+			cpacr_clear_set(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN, 0);
+
+	} else {
+		__fpsimd_save_state(*host_data_ptr(fpsimd_state));
+	}
 }
 
 static const exit_handler_fn hyp_exit_handlers[] = {