diff mbox series

[v18,023/121] KVM: TDX: Make KVM_CAP_MAX_VCPUS backend specific

Message ID ed33ebe29b231e8e657cd610a983fa603b10f530.1705965634.git.isaku.yamahata@intel.com (mailing list archive)
State New, archived
Headers show
Series KVM TDX basic feature support | expand

Commit Message

Isaku Yamahata Jan. 22, 2024, 11:52 p.m. UTC
From: Isaku Yamahata <isaku.yamahata@intel.com>

TDX has its own limitation on the maximum number of vcpus that the guest
can accommodate.  Allow x86 kvm backend to implement its own KVM_ENABLE_CAP
handler and implement TDX backend for KVM_CAP_MAX_VCPUS.  user space VMM,
e.g. qemu, can specify its value instead of KVM_MAX_VCPUS.

Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
v18:
- use TDX instead of "x86, tdx" in subject
- use min(max_vcpu, TDX_MAX_VCPU) instead of
  min3(max_vcpu, KVM_MAX_VCPU, TDX_MAX_VCPU)
- make "if (KVM_MAX_VCPU) and if (TDX_MAX_VCPU)" into one if statement
---
 arch/x86/include/asm/kvm-x86-ops.h |  2 ++
 arch/x86/include/asm/kvm_host.h    |  2 ++
 arch/x86/kvm/vmx/main.c            | 22 ++++++++++++++++++++++
 arch/x86/kvm/vmx/tdx.c             | 29 +++++++++++++++++++++++++++++
 arch/x86/kvm/vmx/x86_ops.h         |  5 +++++
 arch/x86/kvm/x86.c                 |  4 ++++
 6 files changed, 64 insertions(+)

Comments

Binbin Wu Jan. 24, 2024, 1:17 a.m. UTC | #1
On 1/23/2024 7:52 AM, isaku.yamahata@intel.com wrote:
> From: Isaku Yamahata <isaku.yamahata@intel.com>
>
> TDX has its own limitation on the maximum number of vcpus that the guest
> can accommodate.  Allow x86 kvm backend to implement its own KVM_ENABLE_CAP
> handler and implement TDX backend for KVM_CAP_MAX_VCPUS.  user space VMM,
> e.g. qemu, can specify its value instead of KVM_MAX_VCPUS.
For legacy VM, KVM just provides the interface to query the max_vcpus.
Why TD needs to provide a interface for userspace to set the limitation?
What's the scenario?


>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> ---
> v18:
> - use TDX instead of "x86, tdx" in subject
> - use min(max_vcpu, TDX_MAX_VCPU) instead of
>    min3(max_vcpu, KVM_MAX_VCPU, TDX_MAX_VCPU)
> - make "if (KVM_MAX_VCPU) and if (TDX_MAX_VCPU)" into one if statement
> ---
>   arch/x86/include/asm/kvm-x86-ops.h |  2 ++
>   arch/x86/include/asm/kvm_host.h    |  2 ++
>   arch/x86/kvm/vmx/main.c            | 22 ++++++++++++++++++++++
>   arch/x86/kvm/vmx/tdx.c             | 29 +++++++++++++++++++++++++++++
>   arch/x86/kvm/vmx/x86_ops.h         |  5 +++++
>   arch/x86/kvm/x86.c                 |  4 ++++
>   6 files changed, 64 insertions(+)
>
> diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
> index 943b21b8b106..2f976c0f3116 100644
> --- a/arch/x86/include/asm/kvm-x86-ops.h
> +++ b/arch/x86/include/asm/kvm-x86-ops.h
> @@ -21,6 +21,8 @@ KVM_X86_OP(hardware_unsetup)
>   KVM_X86_OP(has_emulated_msr)
>   KVM_X86_OP(vcpu_after_set_cpuid)
>   KVM_X86_OP(is_vm_type_supported)
> +KVM_X86_OP_OPTIONAL(max_vcpus);
> +KVM_X86_OP_OPTIONAL(vm_enable_cap)
>   KVM_X86_OP(vm_init)
>   KVM_X86_OP_OPTIONAL(vm_destroy)
>   KVM_X86_OP_OPTIONAL_RET0(vcpu_precreate)
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 26f4668b0273..db44a92e5659 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1602,7 +1602,9 @@ struct kvm_x86_ops {
>   	void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu);
>   
>   	bool (*is_vm_type_supported)(unsigned long vm_type);
> +	int (*max_vcpus)(struct kvm *kvm);
>   	unsigned int vm_size;
> +	int (*vm_enable_cap)(struct kvm *kvm, struct kvm_enable_cap *cap);
>   	int (*vm_init)(struct kvm *kvm);
>   	void (*vm_destroy)(struct kvm *kvm);
>   
> diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
> index 50da807d7aea..4611f305a450 100644
> --- a/arch/x86/kvm/vmx/main.c
> +++ b/arch/x86/kvm/vmx/main.c
> @@ -6,6 +6,7 @@
>   #include "nested.h"
>   #include "pmu.h"
>   #include "tdx.h"
> +#include "tdx_arch.h"
>   
>   static bool enable_tdx __ro_after_init;
>   module_param_named(tdx, enable_tdx, bool, 0444);
> @@ -16,6 +17,17 @@ static bool vt_is_vm_type_supported(unsigned long type)
>   		(enable_tdx && tdx_is_vm_type_supported(type));
>   }
>   
> +static int vt_max_vcpus(struct kvm *kvm)
> +{
> +	if (!kvm)
> +		return KVM_MAX_VCPUS;
> +
> +	if (is_td(kvm))
> +		return min(kvm->max_vcpus, TDX_MAX_VCPUS);
> +
> +	return kvm->max_vcpus;
> +}
> +
>   static int vt_hardware_enable(void)
>   {
>   	int ret;
> @@ -54,6 +66,14 @@ static void vt_hardware_unsetup(void)
>   	vmx_hardware_unsetup();
>   }
>   
> +static int vt_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
> +{
> +	if (is_td(kvm))
> +		return tdx_vm_enable_cap(kvm, cap);
> +
> +	return -EINVAL;
> +}
> +
>   static int vt_vm_init(struct kvm *kvm)
>   {
>   	if (is_td(kvm))
> @@ -91,7 +111,9 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
>   	.has_emulated_msr = vmx_has_emulated_msr,
>   
>   	.is_vm_type_supported = vt_is_vm_type_supported,
> +	.max_vcpus = vt_max_vcpus,
>   	.vm_size = sizeof(struct kvm_vmx),
> +	.vm_enable_cap = vt_vm_enable_cap,
>   	.vm_init = vt_vm_init,
>   	.vm_destroy = vmx_vm_destroy,
>   
> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> index 8c463407f8a8..876ad7895b88 100644
> --- a/arch/x86/kvm/vmx/tdx.c
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -100,6 +100,35 @@ struct tdx_info {
>   /* Info about the TDX module. */
>   static struct tdx_info *tdx_info;
>   
> +int tdx_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
> +{
> +	int r;
> +
> +	switch (cap->cap) {
> +	case KVM_CAP_MAX_VCPUS: {
> +		if (cap->flags || cap->args[0] == 0)
> +			return -EINVAL;
> +		if (cap->args[0] > KVM_MAX_VCPUS ||
> +		    cap->args[0] > TDX_MAX_VCPUS)
> +			return -E2BIG;
> +
> +		mutex_lock(&kvm->lock);
> +		if (kvm->created_vcpus)
> +			r = -EBUSY;
> +		else {
> +			kvm->max_vcpus = cap->args[0];
> +			r = 0;
> +		}
> +		mutex_unlock(&kvm->lock);
> +		break;
> +	}
> +	default:
> +		r = -EINVAL;
> +		break;
> +	}
> +	return r;
> +}
> +
>   static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd)
>   {
>   	struct kvm_tdx_capabilities __user *user_caps;
> diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
> index 6e238142b1e8..3a3be66888da 100644
> --- a/arch/x86/kvm/vmx/x86_ops.h
> +++ b/arch/x86/kvm/vmx/x86_ops.h
> @@ -139,12 +139,17 @@ int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops);
>   void tdx_hardware_unsetup(void);
>   bool tdx_is_vm_type_supported(unsigned long type);
>   
> +int tdx_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap);
>   int tdx_vm_ioctl(struct kvm *kvm, void __user *argp);
>   #else
>   static inline int tdx_hardware_setup(struct kvm_x86_ops *x86_ops) { return -EOPNOTSUPP; }
>   static inline void tdx_hardware_unsetup(void) {}
>   static inline bool tdx_is_vm_type_supported(unsigned long type) { return false; }
>   
> +static inline int tdx_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
> +{
> +	return -EINVAL;
> +};
>   static inline int tdx_vm_ioctl(struct kvm *kvm, void __user *argp) { return -EOPNOTSUPP; }
>   #endif
>   
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index dd3a23d56621..a1389ddb1b33 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -4726,6 +4726,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>   		break;
>   	case KVM_CAP_MAX_VCPUS:
>   		r = KVM_MAX_VCPUS;
> +		if (kvm_x86_ops.max_vcpus)
> +			r = static_call(kvm_x86_max_vcpus)(kvm);
>   		break;
>   	case KVM_CAP_MAX_VCPU_ID:
>   		r = KVM_MAX_VCPU_IDS;
> @@ -6683,6 +6685,8 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
>   		break;
>   	default:
>   		r = -EINVAL;
> +		if (kvm_x86_ops.vm_enable_cap)
> +			r = static_call(kvm_x86_vm_enable_cap)(kvm, cap);
>   		break;
>   	}
>   	return r;
Yuan Yao Feb. 1, 2024, 6:16 a.m. UTC | #2
On Wed, Jan 24, 2024 at 09:17:15AM +0800, Binbin Wu wrote:
>
>
> On 1/23/2024 7:52 AM, isaku.yamahata@intel.com wrote:
> > From: Isaku Yamahata <isaku.yamahata@intel.com>
> >
> > TDX has its own limitation on the maximum number of vcpus that the guest
> > can accommodate.  Allow x86 kvm backend to implement its own KVM_ENABLE_CAP
> > handler and implement TDX backend for KVM_CAP_MAX_VCPUS.  user space VMM,
> > e.g. qemu, can specify its value instead of KVM_MAX_VCPUS.
> For legacy VM, KVM just provides the interface to query the max_vcpus.
> Why TD needs to provide a interface for userspace to set the limitation?
> What's the scenario?

I think the reason is TDH.MNG.INIT needs it:

TD_PARAMS:
    MAX_VCPUS:
        offset: 16 bytes.
        type: Unsigned 16b Integer.
        size: 2.
        Description: Maximum number of VCPUs.

May better to clarify this in the commit yet.

>
>
> >
> > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> > ---
> > v18:
> > - use TDX instead of "x86, tdx" in subject
> > - use min(max_vcpu, TDX_MAX_VCPU) instead of
> >    min3(max_vcpu, KVM_MAX_VCPU, TDX_MAX_VCPU)
> > - make "if (KVM_MAX_VCPU) and if (TDX_MAX_VCPU)" into one if statement
> > ---
> >   arch/x86/include/asm/kvm-x86-ops.h |  2 ++
> >   arch/x86/include/asm/kvm_host.h    |  2 ++
> >   arch/x86/kvm/vmx/main.c            | 22 ++++++++++++++++++++++
> >   arch/x86/kvm/vmx/tdx.c             | 29 +++++++++++++++++++++++++++++
> >   arch/x86/kvm/vmx/x86_ops.h         |  5 +++++
> >   arch/x86/kvm/x86.c                 |  4 ++++
> >   6 files changed, 64 insertions(+)
> >
> > diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
> > index 943b21b8b106..2f976c0f3116 100644
> > --- a/arch/x86/include/asm/kvm-x86-ops.h
> > +++ b/arch/x86/include/asm/kvm-x86-ops.h
> > @@ -21,6 +21,8 @@ KVM_X86_OP(hardware_unsetup)
> >   KVM_X86_OP(has_emulated_msr)
> >   KVM_X86_OP(vcpu_after_set_cpuid)
> >   KVM_X86_OP(is_vm_type_supported)
> > +KVM_X86_OP_OPTIONAL(max_vcpus);
> > +KVM_X86_OP_OPTIONAL(vm_enable_cap)
> >   KVM_X86_OP(vm_init)
> >   KVM_X86_OP_OPTIONAL(vm_destroy)
> >   KVM_X86_OP_OPTIONAL_RET0(vcpu_precreate)
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index 26f4668b0273..db44a92e5659 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -1602,7 +1602,9 @@ struct kvm_x86_ops {
> >   	void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu);
> >   	bool (*is_vm_type_supported)(unsigned long vm_type);
> > +	int (*max_vcpus)(struct kvm *kvm);
> >   	unsigned int vm_size;
> > +	int (*vm_enable_cap)(struct kvm *kvm, struct kvm_enable_cap *cap);
> >   	int (*vm_init)(struct kvm *kvm);
> >   	void (*vm_destroy)(struct kvm *kvm);
> > diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
> > index 50da807d7aea..4611f305a450 100644
> > --- a/arch/x86/kvm/vmx/main.c
> > +++ b/arch/x86/kvm/vmx/main.c
> > @@ -6,6 +6,7 @@
> >   #include "nested.h"
> >   #include "pmu.h"
> >   #include "tdx.h"
> > +#include "tdx_arch.h"
> >   static bool enable_tdx __ro_after_init;
> >   module_param_named(tdx, enable_tdx, bool, 0444);
> > @@ -16,6 +17,17 @@ static bool vt_is_vm_type_supported(unsigned long type)
> >   		(enable_tdx && tdx_is_vm_type_supported(type));
> >   }
> > +static int vt_max_vcpus(struct kvm *kvm)
> > +{
> > +	if (!kvm)
> > +		return KVM_MAX_VCPUS;
> > +
> > +	if (is_td(kvm))
> > +		return min(kvm->max_vcpus, TDX_MAX_VCPUS);
> > +
> > +	return kvm->max_vcpus;
> > +}
> > +
> >   static int vt_hardware_enable(void)
> >   {
> >   	int ret;
> > @@ -54,6 +66,14 @@ static void vt_hardware_unsetup(void)
> >   	vmx_hardware_unsetup();
> >   }
> > +static int vt_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
> > +{
> > +	if (is_td(kvm))
> > +		return tdx_vm_enable_cap(kvm, cap);
> > +
> > +	return -EINVAL;
> > +}
> > +
> >   static int vt_vm_init(struct kvm *kvm)
> >   {
> >   	if (is_td(kvm))
> > @@ -91,7 +111,9 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
> >   	.has_emulated_msr = vmx_has_emulated_msr,
> >   	.is_vm_type_supported = vt_is_vm_type_supported,
> > +	.max_vcpus = vt_max_vcpus,
> >   	.vm_size = sizeof(struct kvm_vmx),
> > +	.vm_enable_cap = vt_vm_enable_cap,
> >   	.vm_init = vt_vm_init,
> >   	.vm_destroy = vmx_vm_destroy,
> > diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> > index 8c463407f8a8..876ad7895b88 100644
> > --- a/arch/x86/kvm/vmx/tdx.c
> > +++ b/arch/x86/kvm/vmx/tdx.c
> > @@ -100,6 +100,35 @@ struct tdx_info {
> >   /* Info about the TDX module. */
> >   static struct tdx_info *tdx_info;
> > +int tdx_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
> > +{
> > +	int r;
> > +
> > +	switch (cap->cap) {
> > +	case KVM_CAP_MAX_VCPUS: {
> > +		if (cap->flags || cap->args[0] == 0)
> > +			return -EINVAL;
> > +		if (cap->args[0] > KVM_MAX_VCPUS ||
> > +		    cap->args[0] > TDX_MAX_VCPUS)
> > +			return -E2BIG;
> > +
> > +		mutex_lock(&kvm->lock);
> > +		if (kvm->created_vcpus)
> > +			r = -EBUSY;
> > +		else {
> > +			kvm->max_vcpus = cap->args[0];
> > +			r = 0;
> > +		}
> > +		mutex_unlock(&kvm->lock);
> > +		break;
> > +	}
> > +	default:
> > +		r = -EINVAL;
> > +		break;
> > +	}
> > +	return r;
> > +}
> > +
> >   static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd)
> >   {
> >   	struct kvm_tdx_capabilities __user *user_caps;
> > diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
> > index 6e238142b1e8..3a3be66888da 100644
> > --- a/arch/x86/kvm/vmx/x86_ops.h
> > +++ b/arch/x86/kvm/vmx/x86_ops.h
> > @@ -139,12 +139,17 @@ int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops);
> >   void tdx_hardware_unsetup(void);
> >   bool tdx_is_vm_type_supported(unsigned long type);
> > +int tdx_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap);
> >   int tdx_vm_ioctl(struct kvm *kvm, void __user *argp);
> >   #else
> >   static inline int tdx_hardware_setup(struct kvm_x86_ops *x86_ops) { return -EOPNOTSUPP; }
> >   static inline void tdx_hardware_unsetup(void) {}
> >   static inline bool tdx_is_vm_type_supported(unsigned long type) { return false; }
> > +static inline int tdx_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
> > +{
> > +	return -EINVAL;
> > +};
> >   static inline int tdx_vm_ioctl(struct kvm *kvm, void __user *argp) { return -EOPNOTSUPP; }
> >   #endif
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index dd3a23d56621..a1389ddb1b33 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -4726,6 +4726,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
> >   		break;
> >   	case KVM_CAP_MAX_VCPUS:
> >   		r = KVM_MAX_VCPUS;
> > +		if (kvm_x86_ops.max_vcpus)
> > +			r = static_call(kvm_x86_max_vcpus)(kvm);
> >   		break;
> >   	case KVM_CAP_MAX_VCPU_ID:
> >   		r = KVM_MAX_VCPU_IDS;
> > @@ -6683,6 +6685,8 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
> >   		break;
> >   	default:
> >   		r = -EINVAL;
> > +		if (kvm_x86_ops.vm_enable_cap)
> > +			r = static_call(kvm_x86_vm_enable_cap)(kvm, cap);
> >   		break;
> >   	}
> >   	return r;
>
>
Binbin Wu Feb. 4, 2024, 2 a.m. UTC | #3
On 2/1/2024 2:16 PM, Yuan Yao wrote:
> On Wed, Jan 24, 2024 at 09:17:15AM +0800, Binbin Wu wrote:
>>
>> On 1/23/2024 7:52 AM, isaku.yamahata@intel.com wrote:
>>> From: Isaku Yamahata <isaku.yamahata@intel.com>
>>>
>>> TDX has its own limitation on the maximum number of vcpus that the guest
>>> can accommodate.  Allow x86 kvm backend to implement its own KVM_ENABLE_CAP
>>> handler and implement TDX backend for KVM_CAP_MAX_VCPUS.  user space VMM,
>>> e.g. qemu, can specify its value instead of KVM_MAX_VCPUS.
>> For legacy VM, KVM just provides the interface to query the max_vcpus.
>> Why TD needs to provide a interface for userspace to set the limitation?
>> What's the scenario?
> I think the reason is TDH.MNG.INIT needs it:
>
> TD_PARAMS:
>      MAX_VCPUS:
>          offset: 16 bytes.
>          type: Unsigned 16b Integer.
>          size: 2.
>          Description: Maximum number of VCPUs.
Thanks for explanation.

I am also wondering if this info can be passed via KVM_TDX_INIT_VM.
Because userspace is allowed to set the value no greater than
min(KVM_MAX_VCPUS, TDX_MAX_VCPUS), providing the extra cap KVM_CAP_MAX_VCPUS
doesn't make more restriction comparing to providing it in KVM_TDX_INIT_VM.

>
> May better to clarify this in the commit yet.
>
>>
>>> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
>>> ---
>>> v18:
>>> - use TDX instead of "x86, tdx" in subject
>>> - use min(max_vcpu, TDX_MAX_VCPU) instead of
>>>     min3(max_vcpu, KVM_MAX_VCPU, TDX_MAX_VCPU)
>>> - make "if (KVM_MAX_VCPU) and if (TDX_MAX_VCPU)" into one if statement
>>> ---
>>>    arch/x86/include/asm/kvm-x86-ops.h |  2 ++
>>>    arch/x86/include/asm/kvm_host.h    |  2 ++
>>>    arch/x86/kvm/vmx/main.c            | 22 ++++++++++++++++++++++
>>>    arch/x86/kvm/vmx/tdx.c             | 29 +++++++++++++++++++++++++++++
>>>    arch/x86/kvm/vmx/x86_ops.h         |  5 +++++
>>>    arch/x86/kvm/x86.c                 |  4 ++++
>>>    6 files changed, 64 insertions(+)
>>>
>>> diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
>>> index 943b21b8b106..2f976c0f3116 100644
>>> --- a/arch/x86/include/asm/kvm-x86-ops.h
>>> +++ b/arch/x86/include/asm/kvm-x86-ops.h
>>> @@ -21,6 +21,8 @@ KVM_X86_OP(hardware_unsetup)
>>>    KVM_X86_OP(has_emulated_msr)
>>>    KVM_X86_OP(vcpu_after_set_cpuid)
>>>    KVM_X86_OP(is_vm_type_supported)
>>> +KVM_X86_OP_OPTIONAL(max_vcpus);
>>> +KVM_X86_OP_OPTIONAL(vm_enable_cap)
>>>    KVM_X86_OP(vm_init)
>>>    KVM_X86_OP_OPTIONAL(vm_destroy)
>>>    KVM_X86_OP_OPTIONAL_RET0(vcpu_precreate)
>>> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
>>> index 26f4668b0273..db44a92e5659 100644
>>> --- a/arch/x86/include/asm/kvm_host.h
>>> +++ b/arch/x86/include/asm/kvm_host.h
>>> @@ -1602,7 +1602,9 @@ struct kvm_x86_ops {
>>>    	void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu);
>>>    	bool (*is_vm_type_supported)(unsigned long vm_type);
>>> +	int (*max_vcpus)(struct kvm *kvm);
>>>    	unsigned int vm_size;
>>> +	int (*vm_enable_cap)(struct kvm *kvm, struct kvm_enable_cap *cap);
>>>    	int (*vm_init)(struct kvm *kvm);
>>>    	void (*vm_destroy)(struct kvm *kvm);
>>> diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
>>> index 50da807d7aea..4611f305a450 100644
>>> --- a/arch/x86/kvm/vmx/main.c
>>> +++ b/arch/x86/kvm/vmx/main.c
>>> @@ -6,6 +6,7 @@
>>>    #include "nested.h"
>>>    #include "pmu.h"
>>>    #include "tdx.h"
>>> +#include "tdx_arch.h"
>>>    static bool enable_tdx __ro_after_init;
>>>    module_param_named(tdx, enable_tdx, bool, 0444);
>>> @@ -16,6 +17,17 @@ static bool vt_is_vm_type_supported(unsigned long type)
>>>    		(enable_tdx && tdx_is_vm_type_supported(type));
>>>    }
>>> +static int vt_max_vcpus(struct kvm *kvm)
>>> +{
>>> +	if (!kvm)
>>> +		return KVM_MAX_VCPUS;
>>> +
>>> +	if (is_td(kvm))
>>> +		return min(kvm->max_vcpus, TDX_MAX_VCPUS);
>>> +
>>> +	return kvm->max_vcpus;
>>> +}
>>> +
>>>    static int vt_hardware_enable(void)
>>>    {
>>>    	int ret;
>>> @@ -54,6 +66,14 @@ static void vt_hardware_unsetup(void)
>>>    	vmx_hardware_unsetup();
>>>    }
>>> +static int vt_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
>>> +{
>>> +	if (is_td(kvm))
>>> +		return tdx_vm_enable_cap(kvm, cap);
>>> +
>>> +	return -EINVAL;
>>> +}
>>> +
>>>    static int vt_vm_init(struct kvm *kvm)
>>>    {
>>>    	if (is_td(kvm))
>>> @@ -91,7 +111,9 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
>>>    	.has_emulated_msr = vmx_has_emulated_msr,
>>>    	.is_vm_type_supported = vt_is_vm_type_supported,
>>> +	.max_vcpus = vt_max_vcpus,
>>>    	.vm_size = sizeof(struct kvm_vmx),
>>> +	.vm_enable_cap = vt_vm_enable_cap,
>>>    	.vm_init = vt_vm_init,
>>>    	.vm_destroy = vmx_vm_destroy,
>>> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
>>> index 8c463407f8a8..876ad7895b88 100644
>>> --- a/arch/x86/kvm/vmx/tdx.c
>>> +++ b/arch/x86/kvm/vmx/tdx.c
>>> @@ -100,6 +100,35 @@ struct tdx_info {
>>>    /* Info about the TDX module. */
>>>    static struct tdx_info *tdx_info;
>>> +int tdx_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
>>> +{
>>> +	int r;
>>> +
>>> +	switch (cap->cap) {
>>> +	case KVM_CAP_MAX_VCPUS: {
>>> +		if (cap->flags || cap->args[0] == 0)
>>> +			return -EINVAL;
>>> +		if (cap->args[0] > KVM_MAX_VCPUS ||
>>> +		    cap->args[0] > TDX_MAX_VCPUS)
>>> +			return -E2BIG;
>>> +
>>> +		mutex_lock(&kvm->lock);
>>> +		if (kvm->created_vcpus)
>>> +			r = -EBUSY;
>>> +		else {
>>> +			kvm->max_vcpus = cap->args[0];
>>> +			r = 0;
>>> +		}
>>> +		mutex_unlock(&kvm->lock);
>>> +		break;
>>> +	}
>>> +	default:
>>> +		r = -EINVAL;
>>> +		break;
>>> +	}
>>> +	return r;
>>> +}
>>> +
>>>    static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd)
>>>    {
>>>    	struct kvm_tdx_capabilities __user *user_caps;
>>> diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
>>> index 6e238142b1e8..3a3be66888da 100644
>>> --- a/arch/x86/kvm/vmx/x86_ops.h
>>> +++ b/arch/x86/kvm/vmx/x86_ops.h
>>> @@ -139,12 +139,17 @@ int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops);
>>>    void tdx_hardware_unsetup(void);
>>>    bool tdx_is_vm_type_supported(unsigned long type);
>>> +int tdx_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap);
>>>    int tdx_vm_ioctl(struct kvm *kvm, void __user *argp);
>>>    #else
>>>    static inline int tdx_hardware_setup(struct kvm_x86_ops *x86_ops) { return -EOPNOTSUPP; }
>>>    static inline void tdx_hardware_unsetup(void) {}
>>>    static inline bool tdx_is_vm_type_supported(unsigned long type) { return false; }
>>> +static inline int tdx_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
>>> +{
>>> +	return -EINVAL;
>>> +};
>>>    static inline int tdx_vm_ioctl(struct kvm *kvm, void __user *argp) { return -EOPNOTSUPP; }
>>>    #endif
>>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>>> index dd3a23d56621..a1389ddb1b33 100644
>>> --- a/arch/x86/kvm/x86.c
>>> +++ b/arch/x86/kvm/x86.c
>>> @@ -4726,6 +4726,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>>>    		break;
>>>    	case KVM_CAP_MAX_VCPUS:
>>>    		r = KVM_MAX_VCPUS;
>>> +		if (kvm_x86_ops.max_vcpus)
>>> +			r = static_call(kvm_x86_max_vcpus)(kvm);
>>>    		break;
>>>    	case KVM_CAP_MAX_VCPU_ID:
>>>    		r = KVM_MAX_VCPU_IDS;
>>> @@ -6683,6 +6685,8 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
>>>    		break;
>>>    	default:
>>>    		r = -EINVAL;
>>> +		if (kvm_x86_ops.vm_enable_cap)
>>> +			r = static_call(kvm_x86_vm_enable_cap)(kvm, cap);
>>>    		break;
>>>    	}
>>>    	return r;
>>
Isaku Yamahata Feb. 26, 2024, 6:52 p.m. UTC | #4
On Sun, Feb 04, 2024 at 10:00:45AM +0800,
Binbin Wu <binbin.wu@linux.intel.com> wrote:

> 
> 
> On 2/1/2024 2:16 PM, Yuan Yao wrote:
> > On Wed, Jan 24, 2024 at 09:17:15AM +0800, Binbin Wu wrote:
> > > 
> > > On 1/23/2024 7:52 AM, isaku.yamahata@intel.com wrote:
> > > > From: Isaku Yamahata <isaku.yamahata@intel.com>
> > > > 
> > > > TDX has its own limitation on the maximum number of vcpus that the guest
> > > > can accommodate.  Allow x86 kvm backend to implement its own KVM_ENABLE_CAP
> > > > handler and implement TDX backend for KVM_CAP_MAX_VCPUS.  user space VMM,
> > > > e.g. qemu, can specify its value instead of KVM_MAX_VCPUS.
> > > For legacy VM, KVM just provides the interface to query the max_vcpus.
> > > Why TD needs to provide a interface for userspace to set the limitation?
> > > What's the scenario?
> > I think the reason is TDH.MNG.INIT needs it:
> > 
> > TD_PARAMS:
> >      MAX_VCPUS:
> >          offset: 16 bytes.
> >          type: Unsigned 16b Integer.
> >          size: 2.
> >          Description: Maximum number of VCPUs.
> Thanks for explanation.
> 
> I am also wondering if this info can be passed via KVM_TDX_INIT_VM.
> Because userspace is allowed to set the value no greater than
> min(KVM_MAX_VCPUS, TDX_MAX_VCPUS), providing the extra cap KVM_CAP_MAX_VCPUS
> doesn't make more restriction comparing to providing it in KVM_TDX_INIT_VM.

It's better for the API to be common, not specific to TDX.  Also I don't want
to play with max_vcpu in multiple places.
Chao Gao March 14, 2024, 10:39 a.m. UTC | #5
>+static int vt_max_vcpus(struct kvm *kvm)
>+{
>+	if (!kvm)
>+		return KVM_MAX_VCPUS;
>+
>+	if (is_td(kvm))
>+		return min(kvm->max_vcpus, TDX_MAX_VCPUS);

I suppose kvm->max_vcpus should be always smaller than TDX_MAX_VCPUS, right?
if that's the case, the min() is pointless.

I don't get why kvm->max_vcpus is concerned. do you want to enable userspace to
read back the max_vcpus configured last time? this looks useless because userspace
can keep track of that value. how about:

	/*
	 * TDX module imposes additional restrictions on the maximum number of
	 * vCPUs of a TD guest.
	 */
	if (kvm && is_td(kvm))
		return min(TDX_MAX_VCPUS, KVM_MAX_VCPUS);
	else
		return KVM_MAX_VCPUS;

>+
>+	return kvm->max_vcpus;
>+}
>+
> static int vt_hardware_enable(void)
> {
> 	int ret;
>@@ -54,6 +66,14 @@ static void vt_hardware_unsetup(void)
> 	vmx_hardware_unsetup();
> }
> 
>+static int vt_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
>+{
>+	if (is_td(kvm))
>+		return tdx_vm_enable_cap(kvm, cap);
>+
>+	return -EINVAL;
>+}
>+
> static int vt_vm_init(struct kvm *kvm)
> {
> 	if (is_td(kvm))
>@@ -91,7 +111,9 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
> 	.has_emulated_msr = vmx_has_emulated_msr,
> 
> 	.is_vm_type_supported = vt_is_vm_type_supported,
>+	.max_vcpus = vt_max_vcpus,
> 	.vm_size = sizeof(struct kvm_vmx),
>+	.vm_enable_cap = vt_vm_enable_cap,
> 	.vm_init = vt_vm_init,
> 	.vm_destroy = vmx_vm_destroy,
> 
>diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
>index 8c463407f8a8..876ad7895b88 100644
>--- a/arch/x86/kvm/vmx/tdx.c
>+++ b/arch/x86/kvm/vmx/tdx.c
>@@ -100,6 +100,35 @@ struct tdx_info {
> /* Info about the TDX module. */
> static struct tdx_info *tdx_info;
> 
>+int tdx_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
>+{
>+	int r;
>+
>+	switch (cap->cap) {
>+	case KVM_CAP_MAX_VCPUS: {
>+		if (cap->flags || cap->args[0] == 0)
>+			return -EINVAL;
>+		if (cap->args[0] > KVM_MAX_VCPUS ||
>+		    cap->args[0] > TDX_MAX_VCPUS)
>+			return -E2BIG;
>+
>+		mutex_lock(&kvm->lock);
>+		if (kvm->created_vcpus)
>+			r = -EBUSY;

Curly brackets are missing.

And -EBUSY looks improper because it isn't a temporary error.

>+		else {
>+			kvm->max_vcpus = cap->args[0];
>+			r = 0;
>+		}
>+		mutex_unlock(&kvm->lock);
>+		break;
diff mbox series

Patch

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 943b21b8b106..2f976c0f3116 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -21,6 +21,8 @@  KVM_X86_OP(hardware_unsetup)
 KVM_X86_OP(has_emulated_msr)
 KVM_X86_OP(vcpu_after_set_cpuid)
 KVM_X86_OP(is_vm_type_supported)
+KVM_X86_OP_OPTIONAL(max_vcpus);
+KVM_X86_OP_OPTIONAL(vm_enable_cap)
 KVM_X86_OP(vm_init)
 KVM_X86_OP_OPTIONAL(vm_destroy)
 KVM_X86_OP_OPTIONAL_RET0(vcpu_precreate)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 26f4668b0273..db44a92e5659 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1602,7 +1602,9 @@  struct kvm_x86_ops {
 	void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu);
 
 	bool (*is_vm_type_supported)(unsigned long vm_type);
+	int (*max_vcpus)(struct kvm *kvm);
 	unsigned int vm_size;
+	int (*vm_enable_cap)(struct kvm *kvm, struct kvm_enable_cap *cap);
 	int (*vm_init)(struct kvm *kvm);
 	void (*vm_destroy)(struct kvm *kvm);
 
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index 50da807d7aea..4611f305a450 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -6,6 +6,7 @@ 
 #include "nested.h"
 #include "pmu.h"
 #include "tdx.h"
+#include "tdx_arch.h"
 
 static bool enable_tdx __ro_after_init;
 module_param_named(tdx, enable_tdx, bool, 0444);
@@ -16,6 +17,17 @@  static bool vt_is_vm_type_supported(unsigned long type)
 		(enable_tdx && tdx_is_vm_type_supported(type));
 }
 
+static int vt_max_vcpus(struct kvm *kvm)
+{
+	if (!kvm)
+		return KVM_MAX_VCPUS;
+
+	if (is_td(kvm))
+		return min(kvm->max_vcpus, TDX_MAX_VCPUS);
+
+	return kvm->max_vcpus;
+}
+
 static int vt_hardware_enable(void)
 {
 	int ret;
@@ -54,6 +66,14 @@  static void vt_hardware_unsetup(void)
 	vmx_hardware_unsetup();
 }
 
+static int vt_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
+{
+	if (is_td(kvm))
+		return tdx_vm_enable_cap(kvm, cap);
+
+	return -EINVAL;
+}
+
 static int vt_vm_init(struct kvm *kvm)
 {
 	if (is_td(kvm))
@@ -91,7 +111,9 @@  struct kvm_x86_ops vt_x86_ops __initdata = {
 	.has_emulated_msr = vmx_has_emulated_msr,
 
 	.is_vm_type_supported = vt_is_vm_type_supported,
+	.max_vcpus = vt_max_vcpus,
 	.vm_size = sizeof(struct kvm_vmx),
+	.vm_enable_cap = vt_vm_enable_cap,
 	.vm_init = vt_vm_init,
 	.vm_destroy = vmx_vm_destroy,
 
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index 8c463407f8a8..876ad7895b88 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -100,6 +100,35 @@  struct tdx_info {
 /* Info about the TDX module. */
 static struct tdx_info *tdx_info;
 
+int tdx_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
+{
+	int r;
+
+	switch (cap->cap) {
+	case KVM_CAP_MAX_VCPUS: {
+		if (cap->flags || cap->args[0] == 0)
+			return -EINVAL;
+		if (cap->args[0] > KVM_MAX_VCPUS ||
+		    cap->args[0] > TDX_MAX_VCPUS)
+			return -E2BIG;
+
+		mutex_lock(&kvm->lock);
+		if (kvm->created_vcpus)
+			r = -EBUSY;
+		else {
+			kvm->max_vcpus = cap->args[0];
+			r = 0;
+		}
+		mutex_unlock(&kvm->lock);
+		break;
+	}
+	default:
+		r = -EINVAL;
+		break;
+	}
+	return r;
+}
+
 static int tdx_get_capabilities(struct kvm_tdx_cmd *cmd)
 {
 	struct kvm_tdx_capabilities __user *user_caps;
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index 6e238142b1e8..3a3be66888da 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -139,12 +139,17 @@  int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops);
 void tdx_hardware_unsetup(void);
 bool tdx_is_vm_type_supported(unsigned long type);
 
+int tdx_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap);
 int tdx_vm_ioctl(struct kvm *kvm, void __user *argp);
 #else
 static inline int tdx_hardware_setup(struct kvm_x86_ops *x86_ops) { return -EOPNOTSUPP; }
 static inline void tdx_hardware_unsetup(void) {}
 static inline bool tdx_is_vm_type_supported(unsigned long type) { return false; }
 
+static inline int tdx_vm_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
+{
+	return -EINVAL;
+};
 static inline int tdx_vm_ioctl(struct kvm *kvm, void __user *argp) { return -EOPNOTSUPP; }
 #endif
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index dd3a23d56621..a1389ddb1b33 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4726,6 +4726,8 @@  int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		break;
 	case KVM_CAP_MAX_VCPUS:
 		r = KVM_MAX_VCPUS;
+		if (kvm_x86_ops.max_vcpus)
+			r = static_call(kvm_x86_max_vcpus)(kvm);
 		break;
 	case KVM_CAP_MAX_VCPU_ID:
 		r = KVM_MAX_VCPU_IDS;
@@ -6683,6 +6685,8 @@  int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 		break;
 	default:
 		r = -EINVAL;
+		if (kvm_x86_ops.vm_enable_cap)
+			r = static_call(kvm_x86_vm_enable_cap)(kvm, cap);
 		break;
 	}
 	return r;