diff mbox

[RFC,16/16] KVM: arm64/sve: Report and enable SVE API extensions for userspace

Message ID 1529593060-542-17-git-send-email-Dave.Martin@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dave Martin June 21, 2018, 2:57 p.m. UTC
This patch reports the availability of KVM SVE support to userspace
via a new vcpu feature flag KVM_ARM_VCPU_SVE.  This flag is
reported via the KVM_ARM_PREFERRED_TARGET ioctl.

Userspace can enable the feature by setting the flag for
KVM_ARM_VCPU_INIT.  Without this flag set, SVE-related ioctls and
register access extensions are hidden, and SVE remains disabled
unconditionally for the guest.  This ensures that non-SVE-aware KVM
userspace does not receive a vcpu that it does not understand how
to snapshot or restore correctly.

Storage is allocated for the SVE register state at vcpu init time,
sufficient for the maximum vector length to be exposed to the vcpu.
No attempt is made to allocate the storage lazily for now.  Also,
no attempt is made to resize the storage dynamically, since the
effective vector length of the vcpu can change at each EL0/EL1
transition.  The storage is freed at the vcpu uninit hook.

No particular attempt is made to prevent userspace from creating a
mix of vcpus some of which have SVE enabled and some of which have
it disabled.  This may or may not be useful, but it reflects the
underlying architectural behaviour.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
---
 arch/arm64/include/asm/kvm_host.h |  6 +++---
 arch/arm64/include/uapi/asm/kvm.h |  1 +
 arch/arm64/kvm/guest.c            | 19 +++++++++++++------
 arch/arm64/kvm/reset.c            | 14 ++++++++++++++
 4 files changed, 31 insertions(+), 9 deletions(-)

Comments

Andrew Jones July 19, 2018, 2:59 p.m. UTC | #1
On Thu, Jun 21, 2018 at 03:57:40PM +0100, Dave Martin wrote:
> This patch reports the availability of KVM SVE support to userspace
> via a new vcpu feature flag KVM_ARM_VCPU_SVE.  This flag is
> reported via the KVM_ARM_PREFERRED_TARGET ioctl.
> 
> Userspace can enable the feature by setting the flag for
> KVM_ARM_VCPU_INIT.  Without this flag set, SVE-related ioctls and
> register access extensions are hidden, and SVE remains disabled
> unconditionally for the guest.  This ensures that non-SVE-aware KVM
> userspace does not receive a vcpu that it does not understand how
> to snapshot or restore correctly.
> 
> Storage is allocated for the SVE register state at vcpu init time,
> sufficient for the maximum vector length to be exposed to the vcpu.
> No attempt is made to allocate the storage lazily for now.  Also,
> no attempt is made to resize the storage dynamically, since the
> effective vector length of the vcpu can change at each EL0/EL1
> transition.  The storage is freed at the vcpu uninit hook.
> 
> No particular attempt is made to prevent userspace from creating a
> mix of vcpus some of which have SVE enabled and some of which have
> it disabled.  This may or may not be useful, but it reflects the
> underlying architectural behaviour.
> 
> Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> ---
>  arch/arm64/include/asm/kvm_host.h |  6 +++---
>  arch/arm64/include/uapi/asm/kvm.h |  1 +
>  arch/arm64/kvm/guest.c            | 19 +++++++++++++------
>  arch/arm64/kvm/reset.c            | 14 ++++++++++++++
>  4 files changed, 31 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index d2084ae..d956cf2 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -44,7 +44,7 @@
>  
>  #define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS
>  
> -#define KVM_VCPU_MAX_FEATURES 4
> +#define KVM_VCPU_MAX_FEATURES 5
>  
>  #define KVM_REQ_SLEEP \
>  	KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
> @@ -439,8 +439,8 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {}
>  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>  
> -static inline int kvm_arm_arch_vcpu_init(struct kvm_vcpu *vcpu) { return 0; }
> -static inline void kvm_arm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
> +int kvm_arm_arch_vcpu_init(struct kvm_vcpu *vcpu);
> +void kvm_arm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);
>  
>  void kvm_arm_init_debug(void);
>  void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
> diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
> index f54a9b0..6acf276 100644
> --- a/arch/arm64/include/uapi/asm/kvm.h
> +++ b/arch/arm64/include/uapi/asm/kvm.h
> @@ -101,6 +101,7 @@ struct kvm_regs {
>  #define KVM_ARM_VCPU_EL1_32BIT		1 /* CPU running a 32bit VM */
>  #define KVM_ARM_VCPU_PSCI_0_2		2 /* CPU uses PSCI v0.2 */
>  #define KVM_ARM_VCPU_PMU_V3		3 /* Support guest PMUv3 */
> +#define KVM_ARM_VCPU_SVE		4 /* Allow SVE for guest */
>  
>  struct kvm_vcpu_init {
>  	__u32 target;
> diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
> index 5152362..fb7f6aa 100644
> --- a/arch/arm64/kvm/guest.c
> +++ b/arch/arm64/kvm/guest.c
> @@ -58,6 +58,16 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
>  	return 0;
>  }
>  
> +int kvm_arm_arch_vcpu_init(struct kvm_vcpu *vcpu)
> +{
> +	return 0;
> +}

Unused, so could have just left the inline version.

> +
> +void kvm_arm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
> +{
> +	kfree(vcpu->arch.sve_state);
> +}
> +
>  static u64 core_reg_offset_from_id(u64 id)
>  {
>  	return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE);
> @@ -600,12 +610,9 @@ int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
>  
>  	memset(init, 0, sizeof(*init));
>  
> -	/*
> -	 * For now, we don't return any features.
> -	 * In future, we might use features to return target
> -	 * specific features available for the preferred
> -	 * target type.
> -	 */
> +	/* KVM_ARM_VCPU_SVE understood by KVM_VCPU_INIT */
> +	init->features[0] = 1 << KVM_ARM_VCPU_SVE;
> +

We shouldn't need to do this. The "preferred" target type isn't defined
well (that I know of), but IMO it should probably be the target that
best matches the host, minus optional features. The best base target. We
may use these features to convey that the preferred target should enable
some optional feature if that feature is necessary to workaround a bug,
i.e. using the "feature" bit as an erratum bit someday, but that'd be
quite a debatable use, so maybe not even that. Most likely we'll never
need to add features here.

That said, I think defining the feature bit makes sense. ATM, I'm feeling
like we'll want to model the user interface for SVE like PMU (using VCPU
device ioctls).


>  	init->target = (__u32)target;
>  
>  	return 0;
> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
> index a74311b..f63a791 100644
> --- a/arch/arm64/kvm/reset.c
> +++ b/arch/arm64/kvm/reset.c
> @@ -110,6 +110,20 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
>  			cpu_reset = &default_regs_reset;
>  		}
>  
> +		if (system_supports_sve() &&
> +		    test_bit(KVM_ARM_VCPU_SVE, vcpu->arch.features)) {
> +			vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_SVE;
> +
> +			vcpu->arch.sve_max_vl = sve_max_virtualisable_vl;
> +
> +			vcpu->arch.sve_state = kzalloc(
> +				SVE_SIG_REGS_SIZE(
> +					sve_vq_from_vl(vcpu->arch.sve_max_vl)),

I guess sve_state can be pretty large. Should we allocate it like we
do the VM with kvm_arch_alloc_vm()? I.e. using vzalloc() on VHE machines?

> +				GFP_KERNEL);
> +			if (!vcpu->arch.sve_state)
> +				return -ENOMEM;
> +		}
> +
>  		break;
>  	}
>  
> -- 
> 2.1.4

Thanks,
drew
Andrew Jones July 19, 2018, 3:24 p.m. UTC | #2
On Thu, Jun 21, 2018 at 03:57:40PM +0100, Dave Martin wrote:
> This patch reports the availability of KVM SVE support to userspace
> via a new vcpu feature flag KVM_ARM_VCPU_SVE.  This flag is
> reported via the KVM_ARM_PREFERRED_TARGET ioctl.
> 
> Userspace can enable the feature by setting the flag for
> KVM_ARM_VCPU_INIT.  Without this flag set, SVE-related ioctls and
> register access extensions are hidden, and SVE remains disabled
> unconditionally for the guest.  This ensures that non-SVE-aware KVM
> userspace does not receive a vcpu that it does not understand how
> to snapshot or restore correctly.
> 
> Storage is allocated for the SVE register state at vcpu init time,
> sufficient for the maximum vector length to be exposed to the vcpu.
> No attempt is made to allocate the storage lazily for now.  Also,
> no attempt is made to resize the storage dynamically, since the
> effective vector length of the vcpu can change at each EL0/EL1
> transition.  The storage is freed at the vcpu uninit hook.
> 
> No particular attempt is made to prevent userspace from creating a
> mix of vcpus some of which have SVE enabled and some of which have
> it disabled.  This may or may not be useful, but it reflects the
> underlying architectural behaviour.
> 
> Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> ---
>  arch/arm64/include/asm/kvm_host.h |  6 +++---
>  arch/arm64/include/uapi/asm/kvm.h |  1 +
>  arch/arm64/kvm/guest.c            | 19 +++++++++++++------
>  arch/arm64/kvm/reset.c            | 14 ++++++++++++++
>  4 files changed, 31 insertions(+), 9 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index d2084ae..d956cf2 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -44,7 +44,7 @@
>  
>  #define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS
>  
> -#define KVM_VCPU_MAX_FEATURES 4
> +#define KVM_VCPU_MAX_FEATURES 5
>  
>  #define KVM_REQ_SLEEP \
>  	KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
> @@ -439,8 +439,8 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {}
>  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
>  
> -static inline int kvm_arm_arch_vcpu_init(struct kvm_vcpu *vcpu) { return 0; }
> -static inline void kvm_arm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
> +int kvm_arm_arch_vcpu_init(struct kvm_vcpu *vcpu);
> +void kvm_arm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);
>  
>  void kvm_arm_init_debug(void);
>  void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
> diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
> index f54a9b0..6acf276 100644
> --- a/arch/arm64/include/uapi/asm/kvm.h
> +++ b/arch/arm64/include/uapi/asm/kvm.h
> @@ -101,6 +101,7 @@ struct kvm_regs {
>  #define KVM_ARM_VCPU_EL1_32BIT		1 /* CPU running a 32bit VM */
>  #define KVM_ARM_VCPU_PSCI_0_2		2 /* CPU uses PSCI v0.2 */
>  #define KVM_ARM_VCPU_PMU_V3		3 /* Support guest PMUv3 */
> +#define KVM_ARM_VCPU_SVE		4 /* Allow SVE for guest */
>  
>  struct kvm_vcpu_init {
>  	__u32 target;
> diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
> index 5152362..fb7f6aa 100644
> --- a/arch/arm64/kvm/guest.c
> +++ b/arch/arm64/kvm/guest.c
> @@ -58,6 +58,16 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
>  	return 0;
>  }
>  
> +int kvm_arm_arch_vcpu_init(struct kvm_vcpu *vcpu)
> +{
> +	return 0;
> +}
> +
> +void kvm_arm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
> +{
> +	kfree(vcpu->arch.sve_state);
> +}
> +
>  static u64 core_reg_offset_from_id(u64 id)
>  {
>  	return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE);
> @@ -600,12 +610,9 @@ int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
>  
>  	memset(init, 0, sizeof(*init));
>  
> -	/*
> -	 * For now, we don't return any features.
> -	 * In future, we might use features to return target
> -	 * specific features available for the preferred
> -	 * target type.
> -	 */
> +	/* KVM_ARM_VCPU_SVE understood by KVM_VCPU_INIT */
> +	init->features[0] = 1 << KVM_ARM_VCPU_SVE;
> +
>  	init->target = (__u32)target;
>  
>  	return 0;
> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
> index a74311b..f63a791 100644
> --- a/arch/arm64/kvm/reset.c
> +++ b/arch/arm64/kvm/reset.c
> @@ -110,6 +110,20 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
>  			cpu_reset = &default_regs_reset;
>  		}
>  
> +		if (system_supports_sve() &&
> +		    test_bit(KVM_ARM_VCPU_SVE, vcpu->arch.features)) {
> +			vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_SVE;
> +
> +			vcpu->arch.sve_max_vl = sve_max_virtualisable_vl;
> +

The allocation below needs to be guarded by an if (!vcpu->arch.sve_state),
otherwise every time the guest does a PSCI-off/PSCI-on cycle of the vcpu
we'll have a memory leak. Or, we need to move this allocation into the new
kvm_arm_arch_vcpu_init() function. Why did you opt for kvm_reset_vcpu()?

Thanks,
drew

> +			vcpu->arch.sve_state = kzalloc(
> +				SVE_SIG_REGS_SIZE(
> +					sve_vq_from_vl(vcpu->arch.sve_max_vl)),
> +				GFP_KERNEL);
> +			if (!vcpu->arch.sve_state)
> +				return -ENOMEM;
> +		}
> +
>  		break;
>  	}
>  
> -- 
> 2.1.4
> 
> _______________________________________________
> kvmarm mailing list
> kvmarm@lists.cs.columbia.edu
> https://lists.cs.columbia.edu/mailman/listinfo/kvmarm
Dave Martin July 25, 2018, 3:27 p.m. UTC | #3
On Thu, Jul 19, 2018 at 04:59:21PM +0200, Andrew Jones wrote:
> On Thu, Jun 21, 2018 at 03:57:40PM +0100, Dave Martin wrote:
> > This patch reports the availability of KVM SVE support to userspace
> > via a new vcpu feature flag KVM_ARM_VCPU_SVE.  This flag is
> > reported via the KVM_ARM_PREFERRED_TARGET ioctl.
> > 
> > Userspace can enable the feature by setting the flag for
> > KVM_ARM_VCPU_INIT.  Without this flag set, SVE-related ioctls and
> > register access extensions are hidden, and SVE remains disabled
> > unconditionally for the guest.  This ensures that non-SVE-aware KVM
> > userspace does not receive a vcpu that it does not understand how
> > to snapshot or restore correctly.
> > 
> > Storage is allocated for the SVE register state at vcpu init time,
> > sufficient for the maximum vector length to be exposed to the vcpu.
> > No attempt is made to allocate the storage lazily for now.  Also,
> > no attempt is made to resize the storage dynamically, since the
> > effective vector length of the vcpu can change at each EL0/EL1
> > transition.  The storage is freed at the vcpu uninit hook.
> > 
> > No particular attempt is made to prevent userspace from creating a
> > mix of vcpus some of which have SVE enabled and some of which have
> > it disabled.  This may or may not be useful, but it reflects the
> > underlying architectural behaviour.
> > 
> > Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> > ---
> >  arch/arm64/include/asm/kvm_host.h |  6 +++---
> >  arch/arm64/include/uapi/asm/kvm.h |  1 +
> >  arch/arm64/kvm/guest.c            | 19 +++++++++++++------
> >  arch/arm64/kvm/reset.c            | 14 ++++++++++++++
> >  4 files changed, 31 insertions(+), 9 deletions(-)
> > 
> > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> > index d2084ae..d956cf2 100644
> > --- a/arch/arm64/include/asm/kvm_host.h
> > +++ b/arch/arm64/include/asm/kvm_host.h
> > @@ -44,7 +44,7 @@
> >  
> >  #define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS
> >  
> > -#define KVM_VCPU_MAX_FEATURES 4
> > +#define KVM_VCPU_MAX_FEATURES 5
> >  
> >  #define KVM_REQ_SLEEP \
> >  	KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
> > @@ -439,8 +439,8 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {}
> >  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
> >  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
> >  
> > -static inline int kvm_arm_arch_vcpu_init(struct kvm_vcpu *vcpu) { return 0; }
> > -static inline void kvm_arm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
> > +int kvm_arm_arch_vcpu_init(struct kvm_vcpu *vcpu);
> > +void kvm_arm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);
> >  
> >  void kvm_arm_init_debug(void);
> >  void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
> > diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
> > index f54a9b0..6acf276 100644
> > --- a/arch/arm64/include/uapi/asm/kvm.h
> > +++ b/arch/arm64/include/uapi/asm/kvm.h
> > @@ -101,6 +101,7 @@ struct kvm_regs {
> >  #define KVM_ARM_VCPU_EL1_32BIT		1 /* CPU running a 32bit VM */
> >  #define KVM_ARM_VCPU_PSCI_0_2		2 /* CPU uses PSCI v0.2 */
> >  #define KVM_ARM_VCPU_PMU_V3		3 /* Support guest PMUv3 */
> > +#define KVM_ARM_VCPU_SVE		4 /* Allow SVE for guest */
> >  
> >  struct kvm_vcpu_init {
> >  	__u32 target;
> > diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
> > index 5152362..fb7f6aa 100644
> > --- a/arch/arm64/kvm/guest.c
> > +++ b/arch/arm64/kvm/guest.c
> > @@ -58,6 +58,16 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
> >  	return 0;
> >  }
> >  
> > +int kvm_arm_arch_vcpu_init(struct kvm_vcpu *vcpu)
> > +{
> > +	return 0;
> > +}
> 
> Unused, so could have just left the inline version.
> 
> > +
> > +void kvm_arm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
> > +{
> > +	kfree(vcpu->arch.sve_state);
> > +}
> > +
> >  static u64 core_reg_offset_from_id(u64 id)
> >  {
> >  	return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE);
> > @@ -600,12 +610,9 @@ int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
> >  
> >  	memset(init, 0, sizeof(*init));
> >  
> > -	/*
> > -	 * For now, we don't return any features.
> > -	 * In future, we might use features to return target
> > -	 * specific features available for the preferred
> > -	 * target type.
> > -	 */
> > +	/* KVM_ARM_VCPU_SVE understood by KVM_VCPU_INIT */
> > +	init->features[0] = 1 << KVM_ARM_VCPU_SVE;
> > +
> 
> We shouldn't need to do this. The "preferred" target type isn't defined
> well (that I know of), but IMO it should probably be the target that
> best matches the host, minus optional features. The best base target. We
> may use these features to convey that the preferred target should enable
> some optional feature if that feature is necessary to workaround a bug,
> i.e. using the "feature" bit as an erratum bit someday, but that'd be
> quite a debatable use, so maybe not even that. Most likely we'll never
> need to add features here.

init->features[] has no semantics yet so we can define it how we like,
but I agree that the way I use it here is not necessarily the most
natural.

OTOH, we cannot use features[] for "mandatory" features like erratum
workarounds, because current userspace just ignores these bits.

Rather, these bits would be for features that are considered beneficial
but must be off by default (due to incompatibility risks across nodes,
or due to ABI impacts).  Just blindly using the preferred target
already risks configuring a vcpu that won't work across all nodes in
your cluster.

So I'm not convinced that there is any useful interpretation of
features[] unless we interpret it as suggested in this patch.

Can you elaborate why you think it should be used with a more
concrete example?

> That said, I think defining the feature bit makes sense. ATM, I'm feeling
> like we'll want to model the user interface for SVE like PMU (using VCPU
> device ioctls).

Some people expressed concerns about the ioctls becoming order-sensitive.

In the SVE case we don't want people enabling/disabling/reconfiguring
"silicon" features like SVE after the vcpu starts executing.

We will need an extra ioctl() for configuring the allowed SVE vector
lengths though.  I don't see a way around that.  So maybe we have to
solve the ordering problem anyway.


By current approach (not in this series) was to have VCPU_INIT return
-EINPROGRESS or similar if SVE is enabled in features[]: this indicates
that certain setup ioctls are required before the vcpu can run.

This may be overkill / not the best approach though.  I can look at
vcpu device ioctls as an alternative.

> >  	init->target = (__u32)target;
> >  
> >  	return 0;
> > diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
> > index a74311b..f63a791 100644
> > --- a/arch/arm64/kvm/reset.c
> > +++ b/arch/arm64/kvm/reset.c
> > @@ -110,6 +110,20 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
> >  			cpu_reset = &default_regs_reset;
> >  		}
> >  
> > +		if (system_supports_sve() &&
> > +		    test_bit(KVM_ARM_VCPU_SVE, vcpu->arch.features)) {
> > +			vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_SVE;
> > +
> > +			vcpu->arch.sve_max_vl = sve_max_virtualisable_vl;
> > +
> > +			vcpu->arch.sve_state = kzalloc(
> > +				SVE_SIG_REGS_SIZE(
> > +					sve_vq_from_vl(vcpu->arch.sve_max_vl)),
> 
> I guess sve_state can be pretty large. Should we allocate it like we
> do the VM with kvm_arch_alloc_vm()? I.e. using vzalloc() on VHE machines?

Hmmm, dunno.

Historically (i.e., on 32-bit) vmalloc addresses could be a somewhat
scarce resource so I tend not to think of it by default, but allocations
of this kind of size would probably not pose a problem -- certainly
not on 64-bit.

Currently we allocate the sve_state storage for each host task with
kzalloc(), so it would be nice to stay consistent unless there's a
reason to deviate.


With vmalloc() we might waste half a page of memory per vcpu on a
typical system, though that probably isn't the end of the world.
It would be worse with 64K pages though.

The total size is not likely to be more than a few pages, so
it probably doesn't matter too much if we grab physically
contiguous memory for it.

The size of the sve state seems comparable to the size of struct kvm.

I'm not sure what the correct answer is here, to be honest.

Thoughts?

Cheers
---Dave
Andrew Jones July 25, 2018, 4:52 p.m. UTC | #4
On Wed, Jul 25, 2018 at 04:27:49PM +0100, Dave Martin wrote:
> On Thu, Jul 19, 2018 at 04:59:21PM +0200, Andrew Jones wrote:
> > On Thu, Jun 21, 2018 at 03:57:40PM +0100, Dave Martin wrote:
> > > -	/*
> > > -	 * For now, we don't return any features.
> > > -	 * In future, we might use features to return target
> > > -	 * specific features available for the preferred
> > > -	 * target type.
> > > -	 */
> > > +	/* KVM_ARM_VCPU_SVE understood by KVM_VCPU_INIT */
> > > +	init->features[0] = 1 << KVM_ARM_VCPU_SVE;
> > > +
> > 
> > We shouldn't need to do this. The "preferred" target type isn't defined
> > well (that I know of), but IMO it should probably be the target that
> > best matches the host, minus optional features. The best base target. We
> > may use these features to convey that the preferred target should enable
> > some optional feature if that feature is necessary to workaround a bug,
> > i.e. using the "feature" bit as an erratum bit someday, but that'd be
> > quite a debatable use, so maybe not even that. Most likely we'll never
> > need to add features here.
> 
> init->features[] has no semantics yet so we can define it how we like,
> but I agree that the way I use it here is not necessarily the most
> natural.
> 
> OTOH, we cannot use features[] for "mandatory" features like erratum
> workarounds, because current userspace just ignores these bits.

It would have to learn to look here if that's how we started using it,
but it'd be better to invent something else that wouldn't appear as
abusive if we're going to teach userspace new stuff anyway.

> 
> Rather, these bits would be for features that are considered beneficial
> but must be off by default (due to incompatibility risks across nodes,
> or due to ABI impacts).  Just blindly using the preferred target
> already risks configuring a vcpu that won't work across all nodes in
> your cluster.

KVM usually advertises optional features through capabilities. A device
(vcpu device, in this case) ioctl can also be used to check for feature
availability.

> 
> So I'm not convinced that there is any useful interpretation of
> features[] unless we interpret it as suggested in this patch.
> 
> Can you elaborate why you think it should be used with a more
> concrete example?

I'm advocating that it *not* be used here. I think it should be used
like the PMU feature uses it - and the PMU feature doesn't set a bit
here.

> 
> > That said, I think defining the feature bit makes sense. ATM, I'm feeling
> > like we'll want to model the user interface for SVE like PMU (using VCPU
> > device ioctls).
> 
> Some people expressed concerns about the ioctls becoming order-sensitive.
> 
> In the SVE case we don't want people enabling/disabling/reconfiguring
> "silicon" features like SVE after the vcpu starts executing.
> 
> We will need an extra ioctl() for configuring the allowed SVE vector
> lengths though.  I don't see a way around that.  So maybe we have to
> solve the ordering problem anyway.

Yes, that's why I'm thinking that the vcpu device ioctls is probably the
right way to go. The SVE group can have its own "finalize" request that
allows all other SVE ioctls to be in any order prior to it.

> 
> 
> By current approach (not in this series) was to have VCPU_INIT return
> -EINPROGRESS or similar if SVE is enabled in features[]: this indicates
> that certain setup ioctls are required before the vcpu can run.
> 
> This may be overkill / not the best approach though.  I can look at
> vcpu device ioctls as an alternative.

With a "finalize" attribute if SVE isn't finalized by VCPU_INIT or
KVM_RUN time, then SVE just won't be enabled for that VCPU.

Thanks,
drew
Dave Martin July 26, 2018, 1:18 p.m. UTC | #5
On Wed, Jul 25, 2018 at 06:52:56PM +0200, Andrew Jones wrote:
> On Wed, Jul 25, 2018 at 04:27:49PM +0100, Dave Martin wrote:
> > On Thu, Jul 19, 2018 at 04:59:21PM +0200, Andrew Jones wrote:
> > > On Thu, Jun 21, 2018 at 03:57:40PM +0100, Dave Martin wrote:
> > > > -	/*
> > > > -	 * For now, we don't return any features.
> > > > -	 * In future, we might use features to return target
> > > > -	 * specific features available for the preferred
> > > > -	 * target type.
> > > > -	 */
> > > > +	/* KVM_ARM_VCPU_SVE understood by KVM_VCPU_INIT */
> > > > +	init->features[0] = 1 << KVM_ARM_VCPU_SVE;
> > > > +
> > > 
> > > We shouldn't need to do this. The "preferred" target type isn't defined
> > > well (that I know of), but IMO it should probably be the target that
> > > best matches the host, minus optional features. The best base target. We
> > > may use these features to convey that the preferred target should enable
> > > some optional feature if that feature is necessary to workaround a bug,
> > > i.e. using the "feature" bit as an erratum bit someday, but that'd be
> > > quite a debatable use, so maybe not even that. Most likely we'll never
> > > need to add features here.
> > 
> > init->features[] has no semantics yet so we can define it how we like,
> > but I agree that the way I use it here is not necessarily the most
> > natural.
> > 
> > OTOH, we cannot use features[] for "mandatory" features like erratum
> > workarounds, because current userspace just ignores these bits.
> 
> It would have to learn to look here if that's how we started using it,
> but it'd be better to invent something else that wouldn't appear as
> abusive if we're going to teach userspace new stuff anyway.
> 
> > 
> > Rather, these bits would be for features that are considered beneficial
> > but must be off by default (due to incompatibility risks across nodes,
> > or due to ABI impacts).  Just blindly using the preferred target
> > already risks configuring a vcpu that won't work across all nodes in
> > your cluster.
> 
> KVM usually advertises optional features through capabilities. A device
> (vcpu device, in this case) ioctl can also be used to check for feature
> availability.
> 
> > 
> > So I'm not convinced that there is any useful interpretation of
> > features[] unless we interpret it as suggested in this patch.
> > 
> > Can you elaborate why you think it should be used with a more
> > concrete example?
> 
> I'm advocating that it *not* be used here. I think it should be used
> like the PMU feature uses it - and the PMU feature doesn't set a bit
> here.
> 
> > 
> > > That said, I think defining the feature bit makes sense. ATM, I'm feeling
> > > like we'll want to model the user interface for SVE like PMU (using VCPU
> > > device ioctls).
> > 
> > Some people expressed concerns about the ioctls becoming order-sensitive.
> > 
> > In the SVE case we don't want people enabling/disabling/reconfiguring
> > "silicon" features like SVE after the vcpu starts executing.
> > 
> > We will need an extra ioctl() for configuring the allowed SVE vector
> > lengths though.  I don't see a way around that.  So maybe we have to
> > solve the ordering problem anyway.
> 
> Yes, that's why I'm thinking that the vcpu device ioctls is probably the
> right way to go. The SVE group can have its own "finalize" request that
> allows all other SVE ioctls to be in any order prior to it.
> 
> > 
> > 
> > By current approach (not in this series) was to have VCPU_INIT return
> > -EINPROGRESS or similar if SVE is enabled in features[]: this indicates
> > that certain setup ioctls are required before the vcpu can run.
> > 
> > This may be overkill / not the best approach though.  I can look at
> > vcpu device ioctls as an alternative.
> 
> With a "finalize" attribute if SVE isn't finalized by VCPU_INIT or
> KVM_RUN time, then SVE just won't be enabled for that VCPU.

So I suppose we could do something like this:

 * Advertise SVE availability through a vcpu device capability (I need
   to check how that works).

 * SVE-aware userspace that understands SVE can do the relevant
   vcpu device ioctls to configure SVE and turn it on: these are only
   permitted before the vcpu runs.  We might require an explicit
   "finish SVE setup" ioctl to be issued before the vcpu can run.

 * Finally, the vcpu is set running by userspace as normal.

Marc or Christoffer was objecting to me previously that this may be an
abuse of vcpu device ioctls, because SVE is a CPU feature rather than a
device.  I guess it depends on how you define "device" -- I'm not sure
where to draw the line.

The vcpu device approach might reduce the amount of weird special-case
API that needs to be invented, which is probably a good thing.

Cheers
---Dave
Dave Martin July 26, 2018, 1:23 p.m. UTC | #6
On Thu, Jul 19, 2018 at 05:24:20PM +0200, Andrew Jones wrote:
> On Thu, Jun 21, 2018 at 03:57:40PM +0100, Dave Martin wrote:
> > This patch reports the availability of KVM SVE support to userspace
> > via a new vcpu feature flag KVM_ARM_VCPU_SVE.  This flag is
> > reported via the KVM_ARM_PREFERRED_TARGET ioctl.
> > 
> > Userspace can enable the feature by setting the flag for
> > KVM_ARM_VCPU_INIT.  Without this flag set, SVE-related ioctls and
> > register access extensions are hidden, and SVE remains disabled
> > unconditionally for the guest.  This ensures that non-SVE-aware KVM
> > userspace does not receive a vcpu that it does not understand how
> > to snapshot or restore correctly.
> > 
> > Storage is allocated for the SVE register state at vcpu init time,
> > sufficient for the maximum vector length to be exposed to the vcpu.
> > No attempt is made to allocate the storage lazily for now.  Also,
> > no attempt is made to resize the storage dynamically, since the
> > effective vector length of the vcpu can change at each EL0/EL1
> > transition.  The storage is freed at the vcpu uninit hook.
> > 
> > No particular attempt is made to prevent userspace from creating a
> > mix of vcpus some of which have SVE enabled and some of which have
> > it disabled.  This may or may not be useful, but it reflects the
> > underlying architectural behaviour.
> > 
> > Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> > ---
> >  arch/arm64/include/asm/kvm_host.h |  6 +++---
> >  arch/arm64/include/uapi/asm/kvm.h |  1 +
> >  arch/arm64/kvm/guest.c            | 19 +++++++++++++------
> >  arch/arm64/kvm/reset.c            | 14 ++++++++++++++
> >  4 files changed, 31 insertions(+), 9 deletions(-)
> > 
> > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> > index d2084ae..d956cf2 100644
> > --- a/arch/arm64/include/asm/kvm_host.h
> > +++ b/arch/arm64/include/asm/kvm_host.h
> > @@ -44,7 +44,7 @@
> >  
> >  #define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS
> >  
> > -#define KVM_VCPU_MAX_FEATURES 4
> > +#define KVM_VCPU_MAX_FEATURES 5
> >  
> >  #define KVM_REQ_SLEEP \
> >  	KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
> > @@ -439,8 +439,8 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {}
> >  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
> >  static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
> >  
> > -static inline int kvm_arm_arch_vcpu_init(struct kvm_vcpu *vcpu) { return 0; }
> > -static inline void kvm_arm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
> > +int kvm_arm_arch_vcpu_init(struct kvm_vcpu *vcpu);
> > +void kvm_arm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);
> >  
> >  void kvm_arm_init_debug(void);
> >  void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
> > diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
> > index f54a9b0..6acf276 100644
> > --- a/arch/arm64/include/uapi/asm/kvm.h
> > +++ b/arch/arm64/include/uapi/asm/kvm.h
> > @@ -101,6 +101,7 @@ struct kvm_regs {
> >  #define KVM_ARM_VCPU_EL1_32BIT		1 /* CPU running a 32bit VM */
> >  #define KVM_ARM_VCPU_PSCI_0_2		2 /* CPU uses PSCI v0.2 */
> >  #define KVM_ARM_VCPU_PMU_V3		3 /* Support guest PMUv3 */
> > +#define KVM_ARM_VCPU_SVE		4 /* Allow SVE for guest */
> >  
> >  struct kvm_vcpu_init {
> >  	__u32 target;
> > diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
> > index 5152362..fb7f6aa 100644
> > --- a/arch/arm64/kvm/guest.c
> > +++ b/arch/arm64/kvm/guest.c
> > @@ -58,6 +58,16 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
> >  	return 0;
> >  }
> >  
> > +int kvm_arm_arch_vcpu_init(struct kvm_vcpu *vcpu)
> > +{
> > +	return 0;
> > +}
> > +
> > +void kvm_arm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
> > +{
> > +	kfree(vcpu->arch.sve_state);
> > +}
> > +
> >  static u64 core_reg_offset_from_id(u64 id)
> >  {
> >  	return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE);
> > @@ -600,12 +610,9 @@ int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
> >  
> >  	memset(init, 0, sizeof(*init));
> >  
> > -	/*
> > -	 * For now, we don't return any features.
> > -	 * In future, we might use features to return target
> > -	 * specific features available for the preferred
> > -	 * target type.
> > -	 */
> > +	/* KVM_ARM_VCPU_SVE understood by KVM_VCPU_INIT */
> > +	init->features[0] = 1 << KVM_ARM_VCPU_SVE;
> > +
> >  	init->target = (__u32)target;
> >  
> >  	return 0;
> > diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
> > index a74311b..f63a791 100644
> > --- a/arch/arm64/kvm/reset.c
> > +++ b/arch/arm64/kvm/reset.c
> > @@ -110,6 +110,20 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
> >  			cpu_reset = &default_regs_reset;
> >  		}
> >  
> > +		if (system_supports_sve() &&
> > +		    test_bit(KVM_ARM_VCPU_SVE, vcpu->arch.features)) {
> > +			vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_SVE;
> > +
> > +			vcpu->arch.sve_max_vl = sve_max_virtualisable_vl;
> > +
> 
> The allocation below needs to be guarded by an if (!vcpu->arch.sve_state),
> otherwise every time the guest does a PSCI-off/PSCI-on cycle of the vcpu
> we'll have a memory leak. Or, we need to move this allocation into the new
> kvm_arm_arch_vcpu_init() function. Why did you opt for kvm_reset_vcpu()?

I think I failed to find another suitable init function that gets called
at the right time.  I may have got confused though.

Good spot on the PSCI interaction.  In light of that, I agree: the SVE
buffer allocation should not be done here.


I'll have a think about how to refactor this in light of the discussion.

Cheers
---Dave
Christoffer Dall Aug. 6, 2018, 1:41 p.m. UTC | #7
On Thu, Jul 26, 2018 at 02:18:02PM +0100, Dave Martin wrote:
> On Wed, Jul 25, 2018 at 06:52:56PM +0200, Andrew Jones wrote:
> > On Wed, Jul 25, 2018 at 04:27:49PM +0100, Dave Martin wrote:
> > > On Thu, Jul 19, 2018 at 04:59:21PM +0200, Andrew Jones wrote:
> > > > On Thu, Jun 21, 2018 at 03:57:40PM +0100, Dave Martin wrote:
> > > > > -	/*
> > > > > -	 * For now, we don't return any features.
> > > > > -	 * In future, we might use features to return target
> > > > > -	 * specific features available for the preferred
> > > > > -	 * target type.
> > > > > -	 */
> > > > > +	/* KVM_ARM_VCPU_SVE understood by KVM_VCPU_INIT */
> > > > > +	init->features[0] = 1 << KVM_ARM_VCPU_SVE;
> > > > > +
> > > > 
> > > > We shouldn't need to do this. The "preferred" target type isn't defined
> > > > well (that I know of), but IMO it should probably be the target that
> > > > best matches the host, minus optional features. The best base target. We
> > > > may use these features to convey that the preferred target should enable
> > > > some optional feature if that feature is necessary to workaround a bug,
> > > > i.e. using the "feature" bit as an erratum bit someday, but that'd be
> > > > quite a debatable use, so maybe not even that. Most likely we'll never
> > > > need to add features here.
> > > 
> > > init->features[] has no semantics yet so we can define it how we like,
> > > but I agree that the way I use it here is not necessarily the most
> > > natural.
> > > 
> > > OTOH, we cannot use features[] for "mandatory" features like erratum
> > > workarounds, because current userspace just ignores these bits.
> > 
> > It would have to learn to look here if that's how we started using it,
> > but it'd be better to invent something else that wouldn't appear as
> > abusive if we're going to teach userspace new stuff anyway.
> > 
> > > 
> > > Rather, these bits would be for features that are considered beneficial
> > > but must be off by default (due to incompatibility risks across nodes,
> > > or due to ABI impacts).  Just blindly using the preferred target
> > > already risks configuring a vcpu that won't work across all nodes in
> > > your cluster.
> > 
> > KVM usually advertises optional features through capabilities. A device
> > (vcpu device, in this case) ioctl can also be used to check for feature
> > availability.
> > 
> > > 
> > > So I'm not convinced that there is any useful interpretation of
> > > features[] unless we interpret it as suggested in this patch.
> > > 
> > > Can you elaborate why you think it should be used with a more
> > > concrete example?
> > 
> > I'm advocating that it *not* be used here. I think it should be used
> > like the PMU feature uses it - and the PMU feature doesn't set a bit
> > here.
> > 
> > > 
> > > > That said, I think defining the feature bit makes sense. ATM, I'm feeling
> > > > like we'll want to model the user interface for SVE like PMU (using VCPU
> > > > device ioctls).
> > > 
> > > Some people expressed concerns about the ioctls becoming order-sensitive.
> > > 
> > > In the SVE case we don't want people enabling/disabling/reconfiguring
> > > "silicon" features like SVE after the vcpu starts executing.
> > > 
> > > We will need an extra ioctl() for configuring the allowed SVE vector
> > > lengths though.  I don't see a way around that.  So maybe we have to
> > > solve the ordering problem anyway.
> > 
> > Yes, that's why I'm thinking that the vcpu device ioctls is probably the
> > right way to go. The SVE group can have its own "finalize" request that
> > allows all other SVE ioctls to be in any order prior to it.
> > 
> > > 
> > > 
> > > By current approach (not in this series) was to have VCPU_INIT return
> > > -EINPROGRESS or similar if SVE is enabled in features[]: this indicates
> > > that certain setup ioctls are required before the vcpu can run.
> > > 
> > > This may be overkill / not the best approach though.  I can look at
> > > vcpu device ioctls as an alternative.
> > 
> > With a "finalize" attribute if SVE isn't finalized by VCPU_INIT or
> > KVM_RUN time, then SVE just won't be enabled for that VCPU.
> 
> So I suppose we could do something like this:
> 
>  * Advertise SVE availability through a vcpu device capability (I need
>    to check how that works).
> 
>  * SVE-aware userspace that understands SVE can do the relevant
>    vcpu device ioctls to configure SVE and turn it on: these are only
>    permitted before the vcpu runs.  We might require an explicit
>    "finish SVE setup" ioctl to be issued before the vcpu can run.
> 
>  * Finally, the vcpu is set running by userspace as normal.
> 
> Marc or Christoffer was objecting to me previously that this may be an
> abuse of vcpu device ioctls, because SVE is a CPU feature rather than a
> device.  I guess it depends on how you define "device" -- I'm not sure
> where to draw the line.

I initially advocated for a VCPU device ioctl as well, because it's a
less crowded number space that gives you more flexibility.  Marc did
have a strong point that vcpu *devices* implies something else than
features though.

I think you (a) definitely want to announce SVE support via a
capability, and (b) only set the preferred target flag if enabling SVE
*generally* gives you a VM more like the real hardware with similar
performance on some system.

I'm personally fine with both feature flags and vcpu device ioctls.  If
using vcpu device ioctls gives you an obvious way to set attributes
relating to SVE, e.g. the vector length, then I think that's a strong
argument for that approach.

Whatever you do, please document this in
Documentation/virtual/kvm/api.txt and related files, such as
Documentation/virtual/kvm/devices/vcpu.txt.

Thanks,
-Christoffer
Dave Martin Aug. 7, 2018, 11:23 a.m. UTC | #8
On Mon, Aug 06, 2018 at 03:41:33PM +0200, Christoffer Dall wrote:
> On Thu, Jul 26, 2018 at 02:18:02PM +0100, Dave Martin wrote:
> > On Wed, Jul 25, 2018 at 06:52:56PM +0200, Andrew Jones wrote:
> > > On Wed, Jul 25, 2018 at 04:27:49PM +0100, Dave Martin wrote:
> > > > On Thu, Jul 19, 2018 at 04:59:21PM +0200, Andrew Jones wrote:
> > > > > On Thu, Jun 21, 2018 at 03:57:40PM +0100, Dave Martin wrote:
> > > > > > -	/*
> > > > > > -	 * For now, we don't return any features.
> > > > > > -	 * In future, we might use features to return target
> > > > > > -	 * specific features available for the preferred
> > > > > > -	 * target type.
> > > > > > -	 */
> > > > > > +	/* KVM_ARM_VCPU_SVE understood by KVM_VCPU_INIT */
> > > > > > +	init->features[0] = 1 << KVM_ARM_VCPU_SVE;
> > > > > > +
> > > > > 
> > > > > We shouldn't need to do this. The "preferred" target type isn't defined
> > > > > well (that I know of), but IMO it should probably be the target that
> > > > > best matches the host, minus optional features. The best base target. We
> > > > > may use these features to convey that the preferred target should enable
> > > > > some optional feature if that feature is necessary to workaround a bug,
> > > > > i.e. using the "feature" bit as an erratum bit someday, but that'd be
> > > > > quite a debatable use, so maybe not even that. Most likely we'll never
> > > > > need to add features here.
> > > > 
> > > > init->features[] has no semantics yet so we can define it how we like,
> > > > but I agree that the way I use it here is not necessarily the most
> > > > natural.
> > > > 
> > > > OTOH, we cannot use features[] for "mandatory" features like erratum
> > > > workarounds, because current userspace just ignores these bits.
> > > 
> > > It would have to learn to look here if that's how we started using it,
> > > but it'd be better to invent something else that wouldn't appear as
> > > abusive if we're going to teach userspace new stuff anyway.
> > > 
> > > > 
> > > > Rather, these bits would be for features that are considered beneficial
> > > > but must be off by default (due to incompatibility risks across nodes,
> > > > or due to ABI impacts).  Just blindly using the preferred target
> > > > already risks configuring a vcpu that won't work across all nodes in
> > > > your cluster.
> > > 
> > > KVM usually advertises optional features through capabilities. A device
> > > (vcpu device, in this case) ioctl can also be used to check for feature
> > > availability.
> > > 
> > > > 
> > > > So I'm not convinced that there is any useful interpretation of
> > > > features[] unless we interpret it as suggested in this patch.
> > > > 
> > > > Can you elaborate why you think it should be used with a more
> > > > concrete example?
> > > 
> > > I'm advocating that it *not* be used here. I think it should be used
> > > like the PMU feature uses it - and the PMU feature doesn't set a bit
> > > here.
> > > 
> > > > 
> > > > > That said, I think defining the feature bit makes sense. ATM, I'm feeling
> > > > > like we'll want to model the user interface for SVE like PMU (using VCPU
> > > > > device ioctls).
> > > > 
> > > > Some people expressed concerns about the ioctls becoming order-sensitive.
> > > > 
> > > > In the SVE case we don't want people enabling/disabling/reconfiguring
> > > > "silicon" features like SVE after the vcpu starts executing.
> > > > 
> > > > We will need an extra ioctl() for configuring the allowed SVE vector
> > > > lengths though.  I don't see a way around that.  So maybe we have to
> > > > solve the ordering problem anyway.
> > > 
> > > Yes, that's why I'm thinking that the vcpu device ioctls is probably the
> > > right way to go. The SVE group can have its own "finalize" request that
> > > allows all other SVE ioctls to be in any order prior to it.
> > > 
> > > > 
> > > > 
> > > > By current approach (not in this series) was to have VCPU_INIT return
> > > > -EINPROGRESS or similar if SVE is enabled in features[]: this indicates
> > > > that certain setup ioctls are required before the vcpu can run.
> > > > 
> > > > This may be overkill / not the best approach though.  I can look at
> > > > vcpu device ioctls as an alternative.
> > > 
> > > With a "finalize" attribute if SVE isn't finalized by VCPU_INIT or
> > > KVM_RUN time, then SVE just won't be enabled for that VCPU.
> > 
> > So I suppose we could do something like this:
> > 
> >  * Advertise SVE availability through a vcpu device capability (I need
> >    to check how that works).
> > 
> >  * SVE-aware userspace that understands SVE can do the relevant
> >    vcpu device ioctls to configure SVE and turn it on: these are only
> >    permitted before the vcpu runs.  We might require an explicit
> >    "finish SVE setup" ioctl to be issued before the vcpu can run.
> > 
> >  * Finally, the vcpu is set running by userspace as normal.
> > 
> > Marc or Christoffer was objecting to me previously that this may be an
> > abuse of vcpu device ioctls, because SVE is a CPU feature rather than a
> > device.  I guess it depends on how you define "device" -- I'm not sure
> > where to draw the line.
> 
> I initially advocated for a VCPU device ioctl as well, because it's a
> less crowded number space that gives you more flexibility.  Marc did
> have a strong point that vcpu *devices* implies something else than
> features though.
> 
> I think you (a) definitely want to announce SVE support via a
> capability, and (b) only set the preferred target flag if enabling SVE
> *generally* gives you a VM more like the real hardware with similar
> performance on some system.
> 
> I'm personally fine with both feature flags and vcpu device ioctls.  If
> using vcpu device ioctls gives you an obvious way to set attributes
> relating to SVE, e.g. the vector length, then I think that's a strong
> argument for that approach.

There is another option I'm tending towards, which is simply to have
a "set vector lengths" ioctl (whether presented as a vcpu device
ioctl or a random arch ioctl).

If that ioctl() fails then SVE support is not available.

If it succeeds, it will update its arguments to indicate which
vector lengths are enabled (if different).

Old userspace, or userspace that doesn't want to use SVE, would
not use this ioctl at all.

It would also do no harm additionally to advertise this as a
capability, though I wonder whether it's necessary to do so (?)

Cheers
---Dave
Christoffer Dall Aug. 7, 2018, 8:08 p.m. UTC | #9
On Tue, Aug 07, 2018 at 12:23:45PM +0100, Dave Martin wrote:
> On Mon, Aug 06, 2018 at 03:41:33PM +0200, Christoffer Dall wrote:
> > On Thu, Jul 26, 2018 at 02:18:02PM +0100, Dave Martin wrote:
> > > On Wed, Jul 25, 2018 at 06:52:56PM +0200, Andrew Jones wrote:
> > > > On Wed, Jul 25, 2018 at 04:27:49PM +0100, Dave Martin wrote:
> > > > > On Thu, Jul 19, 2018 at 04:59:21PM +0200, Andrew Jones wrote:
> > > > > > On Thu, Jun 21, 2018 at 03:57:40PM +0100, Dave Martin wrote:
> > > > > > > -	/*
> > > > > > > -	 * For now, we don't return any features.
> > > > > > > -	 * In future, we might use features to return target
> > > > > > > -	 * specific features available for the preferred
> > > > > > > -	 * target type.
> > > > > > > -	 */
> > > > > > > +	/* KVM_ARM_VCPU_SVE understood by KVM_VCPU_INIT */
> > > > > > > +	init->features[0] = 1 << KVM_ARM_VCPU_SVE;
> > > > > > > +
> > > > > > 
> > > > > > We shouldn't need to do this. The "preferred" target type isn't defined
> > > > > > well (that I know of), but IMO it should probably be the target that
> > > > > > best matches the host, minus optional features. The best base target. We
> > > > > > may use these features to convey that the preferred target should enable
> > > > > > some optional feature if that feature is necessary to workaround a bug,
> > > > > > i.e. using the "feature" bit as an erratum bit someday, but that'd be
> > > > > > quite a debatable use, so maybe not even that. Most likely we'll never
> > > > > > need to add features here.
> > > > > 
> > > > > init->features[] has no semantics yet so we can define it how we like,
> > > > > but I agree that the way I use it here is not necessarily the most
> > > > > natural.
> > > > > 
> > > > > OTOH, we cannot use features[] for "mandatory" features like erratum
> > > > > workarounds, because current userspace just ignores these bits.
> > > > 
> > > > It would have to learn to look here if that's how we started using it,
> > > > but it'd be better to invent something else that wouldn't appear as
> > > > abusive if we're going to teach userspace new stuff anyway.
> > > > 
> > > > > 
> > > > > Rather, these bits would be for features that are considered beneficial
> > > > > but must be off by default (due to incompatibility risks across nodes,
> > > > > or due to ABI impacts).  Just blindly using the preferred target
> > > > > already risks configuring a vcpu that won't work across all nodes in
> > > > > your cluster.
> > > > 
> > > > KVM usually advertises optional features through capabilities. A device
> > > > (vcpu device, in this case) ioctl can also be used to check for feature
> > > > availability.
> > > > 
> > > > > 
> > > > > So I'm not convinced that there is any useful interpretation of
> > > > > features[] unless we interpret it as suggested in this patch.
> > > > > 
> > > > > Can you elaborate why you think it should be used with a more
> > > > > concrete example?
> > > > 
> > > > I'm advocating that it *not* be used here. I think it should be used
> > > > like the PMU feature uses it - and the PMU feature doesn't set a bit
> > > > here.
> > > > 
> > > > > 
> > > > > > That said, I think defining the feature bit makes sense. ATM, I'm feeling
> > > > > > like we'll want to model the user interface for SVE like PMU (using VCPU
> > > > > > device ioctls).
> > > > > 
> > > > > Some people expressed concerns about the ioctls becoming order-sensitive.
> > > > > 
> > > > > In the SVE case we don't want people enabling/disabling/reconfiguring
> > > > > "silicon" features like SVE after the vcpu starts executing.
> > > > > 
> > > > > We will need an extra ioctl() for configuring the allowed SVE vector
> > > > > lengths though.  I don't see a way around that.  So maybe we have to
> > > > > solve the ordering problem anyway.
> > > > 
> > > > Yes, that's why I'm thinking that the vcpu device ioctls is probably the
> > > > right way to go. The SVE group can have its own "finalize" request that
> > > > allows all other SVE ioctls to be in any order prior to it.
> > > > 
> > > > > 
> > > > > 
> > > > > By current approach (not in this series) was to have VCPU_INIT return
> > > > > -EINPROGRESS or similar if SVE is enabled in features[]: this indicates
> > > > > that certain setup ioctls are required before the vcpu can run.
> > > > > 
> > > > > This may be overkill / not the best approach though.  I can look at
> > > > > vcpu device ioctls as an alternative.
> > > > 
> > > > With a "finalize" attribute if SVE isn't finalized by VCPU_INIT or
> > > > KVM_RUN time, then SVE just won't be enabled for that VCPU.
> > > 
> > > So I suppose we could do something like this:
> > > 
> > >  * Advertise SVE availability through a vcpu device capability (I need
> > >    to check how that works).
> > > 
> > >  * SVE-aware userspace that understands SVE can do the relevant
> > >    vcpu device ioctls to configure SVE and turn it on: these are only
> > >    permitted before the vcpu runs.  We might require an explicit
> > >    "finish SVE setup" ioctl to be issued before the vcpu can run.
> > > 
> > >  * Finally, the vcpu is set running by userspace as normal.
> > > 
> > > Marc or Christoffer was objecting to me previously that this may be an
> > > abuse of vcpu device ioctls, because SVE is a CPU feature rather than a
> > > device.  I guess it depends on how you define "device" -- I'm not sure
> > > where to draw the line.
> > 
> > I initially advocated for a VCPU device ioctl as well, because it's a
> > less crowded number space that gives you more flexibility.  Marc did
> > have a strong point that vcpu *devices* implies something else than
> > features though.
> > 
> > I think you (a) definitely want to announce SVE support via a
> > capability, and (b) only set the preferred target flag if enabling SVE
> > *generally* gives you a VM more like the real hardware with similar
> > performance on some system.
> > 
> > I'm personally fine with both feature flags and vcpu device ioctls.  If
> > using vcpu device ioctls gives you an obvious way to set attributes
> > relating to SVE, e.g. the vector length, then I think that's a strong
> > argument for that approach.
> 
> There is another option I'm tending towards, which is simply to have
> a "set vector lengths" ioctl (whether presented as a vcpu device
> ioctl or a random arch ioctl).

Someone complained once about adding too many arch ioctls because there
is a limited number space for doing so, but I'm not sure if that was and
still a valid concern.

> 
> If that ioctl() fails then SVE support is not available.
> 
> If it succeeds, it will update its arguments to indicate which
> vector lengths are enabled (if different).
> 
> Old userspace, or userspace that doesn't want to use SVE, would
> not use this ioctl at all.
> 
> It would also do no harm additionally to advertise this as a
> capability, though I wonder whether it's necessary to do so (?)
> 

It is customary to expose features via capabilities.  I have a vague
recollection that tools like libvirt negotiate capabilities across
systems and would need more plumbing to discover features by probing an
ioctl instead.

Thanks,
-Christoffer
Dave Martin Aug. 8, 2018, 8:30 a.m. UTC | #10
On Tue, Aug 07, 2018 at 10:08:28PM +0200, Christoffer Dall wrote:
> On Tue, Aug 07, 2018 at 12:23:45PM +0100, Dave Martin wrote:
> > On Mon, Aug 06, 2018 at 03:41:33PM +0200, Christoffer Dall wrote:

[...]

> > > I'm personally fine with both feature flags and vcpu device ioctls.  If
> > > using vcpu device ioctls gives you an obvious way to set attributes
> > > relating to SVE, e.g. the vector length, then I think that's a strong
> > > argument for that approach.
> > 
> > There is another option I'm tending towards, which is simply to have
> > a "set vector lengths" ioctl (whether presented as a vcpu device
> > ioctl or a random arch ioctl).
> 
> Someone complained once about adding too many arch ioctls because there
> is a limited number space for doing so, but I'm not sure if that was and
> still a valid concern.

I have no strong opinion on this.  I may stick with the separate ioctl
approach for the next spin just to reduce the amount of churn, but I'm
not overly committed to it.

> > 
> > If that ioctl() fails then SVE support is not available.
> > 
> > If it succeeds, it will update its arguments to indicate which
> > vector lengths are enabled (if different).
> > 
> > Old userspace, or userspace that doesn't want to use SVE, would
> > not use this ioctl at all.
> > 
> > It would also do no harm additionally to advertise this as a
> > capability, though I wonder whether it's necessary to do so (?)
> > 
> 
> It is customary to expose features via capabilities.  I have a vague
> recollection that tools like libvirt negotiate capabilities across
> systems and would need more plumbing to discover features by probing an
> ioctl instead.

OK, fair enough.

Cheers
---Dave
diff mbox

Patch

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index d2084ae..d956cf2 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -44,7 +44,7 @@ 
 
 #define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS
 
-#define KVM_VCPU_MAX_FEATURES 4
+#define KVM_VCPU_MAX_FEATURES 5
 
 #define KVM_REQ_SLEEP \
 	KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
@@ -439,8 +439,8 @@  static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {}
 
-static inline int kvm_arm_arch_vcpu_init(struct kvm_vcpu *vcpu) { return 0; }
-static inline void kvm_arm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
+int kvm_arm_arch_vcpu_init(struct kvm_vcpu *vcpu);
+void kvm_arm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);
 
 void kvm_arm_init_debug(void);
 void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index f54a9b0..6acf276 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -101,6 +101,7 @@  struct kvm_regs {
 #define KVM_ARM_VCPU_EL1_32BIT		1 /* CPU running a 32bit VM */
 #define KVM_ARM_VCPU_PSCI_0_2		2 /* CPU uses PSCI v0.2 */
 #define KVM_ARM_VCPU_PMU_V3		3 /* Support guest PMUv3 */
+#define KVM_ARM_VCPU_SVE		4 /* Allow SVE for guest */
 
 struct kvm_vcpu_init {
 	__u32 target;
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 5152362..fb7f6aa 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -58,6 +58,16 @@  int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+int kvm_arm_arch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
+void kvm_arm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+	kfree(vcpu->arch.sve_state);
+}
+
 static u64 core_reg_offset_from_id(u64 id)
 {
 	return id & ~(KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_CORE);
@@ -600,12 +610,9 @@  int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
 
 	memset(init, 0, sizeof(*init));
 
-	/*
-	 * For now, we don't return any features.
-	 * In future, we might use features to return target
-	 * specific features available for the preferred
-	 * target type.
-	 */
+	/* KVM_ARM_VCPU_SVE understood by KVM_VCPU_INIT */
+	init->features[0] = 1 << KVM_ARM_VCPU_SVE;
+
 	init->target = (__u32)target;
 
 	return 0;
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index a74311b..f63a791 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -110,6 +110,20 @@  int kvm_reset_vcpu(struct kvm_vcpu *vcpu)
 			cpu_reset = &default_regs_reset;
 		}
 
+		if (system_supports_sve() &&
+		    test_bit(KVM_ARM_VCPU_SVE, vcpu->arch.features)) {
+			vcpu->arch.flags |= KVM_ARM64_GUEST_HAS_SVE;
+
+			vcpu->arch.sve_max_vl = sve_max_virtualisable_vl;
+
+			vcpu->arch.sve_state = kzalloc(
+				SVE_SIG_REGS_SIZE(
+					sve_vq_from_vl(vcpu->arch.sve_max_vl)),
+				GFP_KERNEL);
+			if (!vcpu->arch.sve_state)
+				return -ENOMEM;
+		}
+
 		break;
 	}