diff mbox

[3/4] KVM: add KVM_CREATE_VM2 system ioctl

Message ID 20170413201951.11939-4-rkrcmar@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Radim Krčmář April 13, 2017, 8:19 p.m. UTC
This patch allows userspace to tell how many VCPUs it is going to use,
which can save memory when allocating the kvm->vcpus array.  This will
be done with a new KVM_CREATE_VM2 IOCTL.

An alternative would be to redo kvm->vcpus as a list or protect the
array with RCU.  RCU is slower and a list is not even practical as
kvm->vcpus are being used for index-based accesses.

We could have an IOCTL that is called in between KVM_CREATE_VM and first
KVM_CREATE_VCPU and sets the size of the vcpus array, but we'd be making
one useless allocation.  Knowing the desired number of VCPUs from the
beginning is seems best for now.

This patch also prepares generic code for architectures that will set
KVM_CONFIGURABLE_MAX_VCPUS to a non-zero value.

A disputable decision is that KVM_CREATE_VM2 actually works even if
KVM_CAP_CONFIGURABLE_MAX_VCPUS is 0, but uses that capability for its
detection.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 Documentation/virtual/kvm/api.txt | 28 ++++++++++++++++++++++++
 include/linux/kvm_host.h          |  3 +++
 include/uapi/linux/kvm.h          |  8 +++++++
 virt/kvm/kvm_main.c               | 45 +++++++++++++++++++++++++++++++++------
 4 files changed, 77 insertions(+), 7 deletions(-)

Comments

Paolo Bonzini April 18, 2017, 2:16 p.m. UTC | #1
On 13/04/2017 22:19, Radim Krčmář wrote:
> This patch allows userspace to tell how many VCPUs it is going to use,
> which can save memory when allocating the kvm->vcpus array.  This will
> be done with a new KVM_CREATE_VM2 IOCTL.
> 
> An alternative would be to redo kvm->vcpus as a list or protect the
> array with RCU.  RCU is slower and a list is not even practical as
> kvm->vcpus are being used for index-based accesses.
> 
> We could have an IOCTL that is called in between KVM_CREATE_VM and first
> KVM_CREATE_VCPU and sets the size of the vcpus array, but we'd be making
> one useless allocation.  Knowing the desired number of VCPUs from the
> beginning is seems best for now.
> 
> This patch also prepares generic code for architectures that will set
> KVM_CONFIGURABLE_MAX_VCPUS to a non-zero value.

Why is KVM_MAX_VCPU_ID or KVM_MAX_VCPUS not enough?

Paolo

> A disputable decision is that KVM_CREATE_VM2 actually works even if
> KVM_CAP_CONFIGURABLE_MAX_VCPUS is 0, but uses that capability for its
> detection.
> 
> Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
> ---
>  Documentation/virtual/kvm/api.txt | 28 ++++++++++++++++++++++++
>  include/linux/kvm_host.h          |  3 +++
>  include/uapi/linux/kvm.h          |  8 +++++++
>  virt/kvm/kvm_main.c               | 45 +++++++++++++++++++++++++++++++++------
>  4 files changed, 77 insertions(+), 7 deletions(-)
> 
> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> index e60be91d8036..461130adbdc7 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -3259,6 +3259,34 @@ Otherwise, if the MCE is a corrected error, KVM will just
>  store it in the corresponding bank (provided this bank is
>  not holding a previously reported uncorrected error).
>  
> +
> +4.107 KVM_CREATE_VM2
> +
> +Capability: KVM_CAP_CONFIGURABLE_MAX_VCPUS
> +Architectures: all
> +Type: system ioctl
> +Parameters: struct kvm_vm_config
> +Returns: a VM fd that can be used to control the new virtual machine,
> +         -E2BIG if the value of max_vcpus is not supported
> +
> +This is an extension of KVM_CREATE_VM that allows the user to pass more
> +information through
> +
> +struct kvm_vm_config {
> +	__u64 type;
> +	__u32 max_vcpus;
> +	__u8 reserved[52];
> +};
> +
> +type is the argument to KVM_CREATE_VM
> +
> +max_vcpus is the desired maximal number of VCPUs, it must not exceed the value
> +returned by KVM_CAP_CONFIGURABLE_MAX_VCPUS.  Value of 0 treated as if userspace
> +passed the value returned by KVM_CAP_MAX_VCPU instead.
> +
> +reserved is must be 0.
> +
> +
>  5. The kvm_run structure
>  ------------------------
>  
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 6ba7bc831094..b875c0997328 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -39,6 +39,9 @@
>  #ifndef KVM_MAX_VCPU_ID
>  #define KVM_MAX_VCPU_ID KVM_MAX_VCPUS
>  #endif
> +#ifndef KVM_CONFIGURABLE_MAX_VCPUS
> +#define KVM_CONFIGURABLE_MAX_VCPUS 0U
> +#endif
>  
>  /*
>   * The bit 16 ~ bit 31 of kvm_memory_region::flags are internally used
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 6180ea50e9ef..8349c73b3517 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -83,6 +83,12 @@ struct kvm_debug_guest {
>  
>  /* *** End of deprecated interfaces *** */
>  
> +/* for KVM_CREATE_VM2 */
> +struct kvm_vm_config {
> +	__u64 type;
> +	__u32 max_vcpus;
> +	__u8 reserved[52];
> +};
>  
>  /* for KVM_CREATE_MEMORY_REGION */
>  struct kvm_memory_region {
> @@ -713,6 +719,7 @@ struct kvm_ppc_resize_hpt {
>   */
>  #define KVM_GET_API_VERSION       _IO(KVMIO,   0x00)
>  #define KVM_CREATE_VM             _IO(KVMIO,   0x01) /* returns a VM fd */
> +#define KVM_CREATE_VM2            _IOR(KVMIO,  0x01, struct kvm_vm_config)
>  #define KVM_GET_MSR_INDEX_LIST    _IOWR(KVMIO, 0x02, struct kvm_msr_list)
>  
>  #define KVM_S390_ENABLE_SIE       _IO(KVMIO,   0x06)
> @@ -892,6 +899,7 @@ struct kvm_ppc_resize_hpt {
>  #define KVM_CAP_MIPS_64BIT 139
>  #define KVM_CAP_S390_GS 140
>  #define KVM_CAP_S390_AIS 141
> +#define KVM_CAP_CONFIGURABLE_MAX_VCPUS 142
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 0f1579f118b4..9ef52fa006ec 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -629,11 +629,20 @@ static inline void kvm_free_vm(struct kvm *kvm)
>  	kfree(kvm);
>  }
>  
> -static struct kvm *kvm_create_vm(unsigned long type)
> +static struct kvm *kvm_create_vm(struct kvm_vm_config *vm_config)
>  {
>  	int r, i;
> -	struct kvm *kvm = kvm_alloc_vm(KVM_MAX_VCPUS);
> +	struct kvm *kvm;
>  
> +	if (!KVM_CONFIGURABLE_MAX_VCPUS && vm_config->max_vcpus)
> +		return ERR_PTR(-EINVAL);
> +	if (vm_config->max_vcpus > KVM_CONFIGURABLE_MAX_VCPUS)
> +		return ERR_PTR(-E2BIG);
> +
> +	if (!vm_config->max_vcpus)
> +		vm_config->max_vcpus = KVM_MAX_VCPUS;
> +
> +	kvm = kvm_alloc_vm(vm_config->max_vcpus);
>  	if (!kvm)
>  		return ERR_PTR(-ENOMEM);
>  
> @@ -647,7 +656,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
>  	refcount_set(&kvm->users_count, 1);
>  	INIT_LIST_HEAD(&kvm->devices);
>  
> -	r = kvm_arch_init_vm(kvm, type);
> +	r = kvm_arch_init_vm(kvm, vm_config->type);
>  	if (r)
>  		goto out_err_no_disable;
>  
> @@ -2957,6 +2966,8 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
>  #endif
>  	case KVM_CAP_MAX_VCPU_ID:
>  		return KVM_MAX_VCPU_ID;
> +	case KVM_CAP_CONFIGURABLE_MAX_VCPUS:
> +		return KVM_CONFIGURABLE_MAX_VCPUS;
>  	default:
>  		break;
>  	}
> @@ -3182,13 +3193,13 @@ static struct file_operations kvm_vm_fops = {
>  	.llseek		= noop_llseek,
>  };
>  
> -static int kvm_dev_ioctl_create_vm(unsigned long type)
> +static int kvm_dev_ioctl_create_vm(struct kvm_vm_config *vm_config)
>  {
>  	int r;
>  	struct kvm *kvm;
>  	struct file *file;
>  
> -	kvm = kvm_create_vm(type);
> +	kvm = kvm_create_vm(vm_config);
>  	if (IS_ERR(kvm))
>  		return PTR_ERR(kvm);
>  #ifdef CONFIG_KVM_MMIO
> @@ -3223,6 +3234,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
>  static long kvm_dev_ioctl(struct file *filp,
>  			  unsigned int ioctl, unsigned long arg)
>  {
> +	void __user *argp = (void __user *)arg;
>  	long r = -EINVAL;
>  
>  	switch (ioctl) {
> @@ -3231,9 +3243,28 @@ static long kvm_dev_ioctl(struct file *filp,
>  			goto out;
>  		r = KVM_API_VERSION;
>  		break;
> -	case KVM_CREATE_VM:
> -		r = kvm_dev_ioctl_create_vm(arg);
> +	case KVM_CREATE_VM: {
> +		struct kvm_vm_config vm_config = {.type = arg};
> +
> +		r = kvm_dev_ioctl_create_vm(&vm_config);
>  		break;
> +	}
> +	case KVM_CREATE_VM2: {
> +		struct kvm_vm_config vm_config, check_reserved = {};
> +
> +		r = -EFAULT;
> +		if (copy_from_user(&vm_config, argp, sizeof vm_config))
> +			goto out;
> +
> +		r = -EINVAL;
> +		check_reserved.type = vm_config.type;
> +		check_reserved.max_vcpus = vm_config.max_vcpus;
> +		if (memcmp(&vm_config, &check_reserved, sizeof check_reserved))
> +			goto out;
> +
> +		r = kvm_dev_ioctl_create_vm(&vm_config);
> +		break;
> +	}
>  	case KVM_CHECK_EXTENSION:
>  		r = kvm_vm_ioctl_check_extension_generic(NULL, arg);
>  		break;
>
Paolo Bonzini April 18, 2017, 2:30 p.m. UTC | #2
On 18/04/2017 16:16, Paolo Bonzini wrote:
>> This patch allows userspace to tell how many VCPUs it is going to use,
>> which can save memory when allocating the kvm->vcpus array.  This will
>> be done with a new KVM_CREATE_VM2 IOCTL.
>>
>> An alternative would be to redo kvm->vcpus as a list or protect the
>> array with RCU.  RCU is slower and a list is not even practical as
>> kvm->vcpus are being used for index-based accesses.
>>
>> We could have an IOCTL that is called in between KVM_CREATE_VM and first
>> KVM_CREATE_VCPU and sets the size of the vcpus array, but we'd be making
>> one useless allocation.  Knowing the desired number of VCPUs from the
>> beginning is seems best for now.
>>
>> This patch also prepares generic code for architectures that will set
>> KVM_CONFIGURABLE_MAX_VCPUS to a non-zero value.
> Why is KVM_MAX_VCPU_ID or KVM_MAX_VCPUS not enough?

Ok, for KVM_MAX_VCPUS I should have read the cover letter more carefully. :)

Paolo
Radim Krčmář April 24, 2017, 4:22 p.m. UTC | #3
2017-04-18 16:30+0200, Paolo Bonzini:
> On 18/04/2017 16:16, Paolo Bonzini wrote:
>>> This patch allows userspace to tell how many VCPUs it is going to use,
>>> which can save memory when allocating the kvm->vcpus array.  This will
>>> be done with a new KVM_CREATE_VM2 IOCTL.
>>>
>>> An alternative would be to redo kvm->vcpus as a list or protect the
>>> array with RCU.  RCU is slower and a list is not even practical as
>>> kvm->vcpus are being used for index-based accesses.
>>>
>>> We could have an IOCTL that is called in between KVM_CREATE_VM and first
>>> KVM_CREATE_VCPU and sets the size of the vcpus array, but we'd be making
>>> one useless allocation.  Knowing the desired number of VCPUs from the
>>> beginning is seems best for now.
>>>
>>> This patch also prepares generic code for architectures that will set
>>> KVM_CONFIGURABLE_MAX_VCPUS to a non-zero value.
>> Why is KVM_MAX_VCPU_ID or KVM_MAX_VCPUS not enough?
> 
> Ok, for KVM_MAX_VCPUS I should have read the cover letter more carefully. :)

KVM_MAX_VCPU_ID makes sense as the upper bound, I just didn't want to
mingle the concepts, because the kvm->vcpus array is not indexed by
VCPU_ID ...
In hindsight, it would be best to change that and get rid of the search.
I'll see how that looks in v2.
Radim Krčmář April 24, 2017, 8:22 p.m. UTC | #4
2017-04-24 18:22+0200, Radim Krčmář:
> 2017-04-18 16:30+0200, Paolo Bonzini:
>> On 18/04/2017 16:16, Paolo Bonzini wrote:
>>>> This patch allows userspace to tell how many VCPUs it is going to use,
>>>> which can save memory when allocating the kvm->vcpus array.  This will
>>>> be done with a new KVM_CREATE_VM2 IOCTL.
>>>>
>>>> An alternative would be to redo kvm->vcpus as a list or protect the
>>>> array with RCU.  RCU is slower and a list is not even practical as
>>>> kvm->vcpus are being used for index-based accesses.
>>>>
>>>> We could have an IOCTL that is called in between KVM_CREATE_VM and first
>>>> KVM_CREATE_VCPU and sets the size of the vcpus array, but we'd be making
>>>> one useless allocation.  Knowing the desired number of VCPUs from the
>>>> beginning is seems best for now.
>>>>
>>>> This patch also prepares generic code for architectures that will set
>>>> KVM_CONFIGURABLE_MAX_VCPUS to a non-zero value.
>>> Why is KVM_MAX_VCPU_ID or KVM_MAX_VCPUS not enough?
>> 
>> Ok, for KVM_MAX_VCPUS I should have read the cover letter more carefully. :)
> 
> KVM_MAX_VCPU_ID makes sense as the upper bound, I just didn't want to
> mingle the concepts, because the kvm->vcpus array is not indexed by
> VCPU_ID ...
> 
> In hindsight, it would be best to change that and get rid of the search.
> I'll see how that looks in v2.

I realized why not:
 * the major user of kvm->vcpu is kvm_for_each_vcpu and it works best
   with a packed array
 * at least arm KVM_IRQ_LINE uses the order in which cpus were created
   to communicate with userspace

Putting this work into a drawer with the "do not share data structure
between kvm_for_each_vcpu and kvm_get_vcpu" idea. :)
diff mbox

Patch

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index e60be91d8036..461130adbdc7 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3259,6 +3259,34 @@  Otherwise, if the MCE is a corrected error, KVM will just
 store it in the corresponding bank (provided this bank is
 not holding a previously reported uncorrected error).
 
+
+4.107 KVM_CREATE_VM2
+
+Capability: KVM_CAP_CONFIGURABLE_MAX_VCPUS
+Architectures: all
+Type: system ioctl
+Parameters: struct kvm_vm_config
+Returns: a VM fd that can be used to control the new virtual machine,
+         -E2BIG if the value of max_vcpus is not supported
+
+This is an extension of KVM_CREATE_VM that allows the user to pass more
+information through
+
+struct kvm_vm_config {
+	__u64 type;
+	__u32 max_vcpus;
+	__u8 reserved[52];
+};
+
+type is the argument to KVM_CREATE_VM
+
+max_vcpus is the desired maximal number of VCPUs, it must not exceed the value
+returned by KVM_CAP_CONFIGURABLE_MAX_VCPUS.  Value of 0 treated as if userspace
+passed the value returned by KVM_CAP_MAX_VCPU instead.
+
+reserved is must be 0.
+
+
 5. The kvm_run structure
 ------------------------
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6ba7bc831094..b875c0997328 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -39,6 +39,9 @@ 
 #ifndef KVM_MAX_VCPU_ID
 #define KVM_MAX_VCPU_ID KVM_MAX_VCPUS
 #endif
+#ifndef KVM_CONFIGURABLE_MAX_VCPUS
+#define KVM_CONFIGURABLE_MAX_VCPUS 0U
+#endif
 
 /*
  * The bit 16 ~ bit 31 of kvm_memory_region::flags are internally used
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 6180ea50e9ef..8349c73b3517 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -83,6 +83,12 @@  struct kvm_debug_guest {
 
 /* *** End of deprecated interfaces *** */
 
+/* for KVM_CREATE_VM2 */
+struct kvm_vm_config {
+	__u64 type;
+	__u32 max_vcpus;
+	__u8 reserved[52];
+};
 
 /* for KVM_CREATE_MEMORY_REGION */
 struct kvm_memory_region {
@@ -713,6 +719,7 @@  struct kvm_ppc_resize_hpt {
  */
 #define KVM_GET_API_VERSION       _IO(KVMIO,   0x00)
 #define KVM_CREATE_VM             _IO(KVMIO,   0x01) /* returns a VM fd */
+#define KVM_CREATE_VM2            _IOR(KVMIO,  0x01, struct kvm_vm_config)
 #define KVM_GET_MSR_INDEX_LIST    _IOWR(KVMIO, 0x02, struct kvm_msr_list)
 
 #define KVM_S390_ENABLE_SIE       _IO(KVMIO,   0x06)
@@ -892,6 +899,7 @@  struct kvm_ppc_resize_hpt {
 #define KVM_CAP_MIPS_64BIT 139
 #define KVM_CAP_S390_GS 140
 #define KVM_CAP_S390_AIS 141
+#define KVM_CAP_CONFIGURABLE_MAX_VCPUS 142
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0f1579f118b4..9ef52fa006ec 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -629,11 +629,20 @@  static inline void kvm_free_vm(struct kvm *kvm)
 	kfree(kvm);
 }
 
-static struct kvm *kvm_create_vm(unsigned long type)
+static struct kvm *kvm_create_vm(struct kvm_vm_config *vm_config)
 {
 	int r, i;
-	struct kvm *kvm = kvm_alloc_vm(KVM_MAX_VCPUS);
+	struct kvm *kvm;
 
+	if (!KVM_CONFIGURABLE_MAX_VCPUS && vm_config->max_vcpus)
+		return ERR_PTR(-EINVAL);
+	if (vm_config->max_vcpus > KVM_CONFIGURABLE_MAX_VCPUS)
+		return ERR_PTR(-E2BIG);
+
+	if (!vm_config->max_vcpus)
+		vm_config->max_vcpus = KVM_MAX_VCPUS;
+
+	kvm = kvm_alloc_vm(vm_config->max_vcpus);
 	if (!kvm)
 		return ERR_PTR(-ENOMEM);
 
@@ -647,7 +656,7 @@  static struct kvm *kvm_create_vm(unsigned long type)
 	refcount_set(&kvm->users_count, 1);
 	INIT_LIST_HEAD(&kvm->devices);
 
-	r = kvm_arch_init_vm(kvm, type);
+	r = kvm_arch_init_vm(kvm, vm_config->type);
 	if (r)
 		goto out_err_no_disable;
 
@@ -2957,6 +2966,8 @@  static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 #endif
 	case KVM_CAP_MAX_VCPU_ID:
 		return KVM_MAX_VCPU_ID;
+	case KVM_CAP_CONFIGURABLE_MAX_VCPUS:
+		return KVM_CONFIGURABLE_MAX_VCPUS;
 	default:
 		break;
 	}
@@ -3182,13 +3193,13 @@  static struct file_operations kvm_vm_fops = {
 	.llseek		= noop_llseek,
 };
 
-static int kvm_dev_ioctl_create_vm(unsigned long type)
+static int kvm_dev_ioctl_create_vm(struct kvm_vm_config *vm_config)
 {
 	int r;
 	struct kvm *kvm;
 	struct file *file;
 
-	kvm = kvm_create_vm(type);
+	kvm = kvm_create_vm(vm_config);
 	if (IS_ERR(kvm))
 		return PTR_ERR(kvm);
 #ifdef CONFIG_KVM_MMIO
@@ -3223,6 +3234,7 @@  static int kvm_dev_ioctl_create_vm(unsigned long type)
 static long kvm_dev_ioctl(struct file *filp,
 			  unsigned int ioctl, unsigned long arg)
 {
+	void __user *argp = (void __user *)arg;
 	long r = -EINVAL;
 
 	switch (ioctl) {
@@ -3231,9 +3243,28 @@  static long kvm_dev_ioctl(struct file *filp,
 			goto out;
 		r = KVM_API_VERSION;
 		break;
-	case KVM_CREATE_VM:
-		r = kvm_dev_ioctl_create_vm(arg);
+	case KVM_CREATE_VM: {
+		struct kvm_vm_config vm_config = {.type = arg};
+
+		r = kvm_dev_ioctl_create_vm(&vm_config);
 		break;
+	}
+	case KVM_CREATE_VM2: {
+		struct kvm_vm_config vm_config, check_reserved = {};
+
+		r = -EFAULT;
+		if (copy_from_user(&vm_config, argp, sizeof vm_config))
+			goto out;
+
+		r = -EINVAL;
+		check_reserved.type = vm_config.type;
+		check_reserved.max_vcpus = vm_config.max_vcpus;
+		if (memcmp(&vm_config, &check_reserved, sizeof check_reserved))
+			goto out;
+
+		r = kvm_dev_ioctl_create_vm(&vm_config);
+		break;
+	}
 	case KVM_CHECK_EXTENSION:
 		r = kvm_vm_ioctl_check_extension_generic(NULL, arg);
 		break;