diff mbox

KVM: SVM: refactor avic VM ID allocation

Message ID 20170815201159.GC5975@flask (mailing list archive)
State New, archived
Headers show

Commit Message

Radim Krčmář Aug. 15, 2017, 8:12 p.m. UTC
2017-08-11 22:11+0200, Denys Vlasenko:
> With lightly tweaked defconfig:
> 
>     text    data     bss      dec     hex filename
> 11259661 5109408 2981888 19350957 12745ad vmlinux.before
> 11259661 5109408  884736 17253805 10745ad vmlinux.after
> 
> Only compile-tested.
> 
> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
> Cc: Joerg Roedel <joro@8bytes.org>
> Cc: pbonzini@redhat.com
> Cc: rkrcmar@redhat.com
> Cc: tglx@linutronix.de
> Cc: mingo@redhat.com
> Cc: hpa@zytor.com
> Cc: x86@kernel.org
> Cc: kvm@vger.kernel.org
> Cc: linux-kernel@vger.kernel.org
> ---

Ah, I suspected my todo wasn't this short;  thanks for the patch!

> @@ -1468,6 +1433,22 @@ static int avic_vm_init(struct kvm *kvm)
>  	clear_page(page_address(l_page));
>  
>  	spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
> + again:
> +	vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK;
> +	if (vm_id == 0) { /* id is 1-based, zero is not okay */

Suravee, did the reserved zero mean something?

> +		next_vm_id_wrapped = 1;
> +		goto again;
> +	}
> +	/* Is it still in use? Only possible if wrapped at least once */
> +	if (next_vm_id_wrapped) {
> +		hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) {
> +			struct kvm *k2 = container_of(ka, struct kvm, arch);
> +			struct kvm_arch *vd2 = &k2->arch;
> +			if (vd2->avic_vm_id == vm_id)
> +				goto again;

Although hitting the case where all 2^24 ids are already used is
practically impossible, I don't like the loose end ...

What about applying something like the following on top?
---8<---
Subject: [PATCH] KVM: SVM: refactor avic VM ID allocation

We missed protection in case someone deserving a medal allocated 2^24
VMs and got a deadlock instead.  I think it is nicer without the goto
even if we droppped the error handling.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 arch/x86/kvm/svm.c | 58 +++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 38 insertions(+), 20 deletions(-)

Comments

Paolo Bonzini Aug. 17, 2017, 2:33 p.m. UTC | #1
On 15/08/2017 22:12, Radim Krčmář wrote:
> 2017-08-11 22:11+0200, Denys Vlasenko:
>> With lightly tweaked defconfig:
>>
>>     text    data     bss      dec     hex filename
>> 11259661 5109408 2981888 19350957 12745ad vmlinux.before
>> 11259661 5109408  884736 17253805 10745ad vmlinux.after
>>
>> Only compile-tested.
>>
>> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
>> Cc: Joerg Roedel <joro@8bytes.org>
>> Cc: pbonzini@redhat.com
>> Cc: rkrcmar@redhat.com
>> Cc: tglx@linutronix.de
>> Cc: mingo@redhat.com
>> Cc: hpa@zytor.com
>> Cc: x86@kernel.org
>> Cc: kvm@vger.kernel.org
>> Cc: linux-kernel@vger.kernel.org
>> ---
> 
> Ah, I suspected my todo wasn't this short;  thanks for the patch!
> 
>> @@ -1468,6 +1433,22 @@ static int avic_vm_init(struct kvm *kvm)
>>  	clear_page(page_address(l_page));
>>  
>>  	spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
>> + again:
>> +	vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK;
>> +	if (vm_id == 0) { /* id is 1-based, zero is not okay */
> 
> Suravee, did the reserved zero mean something?
> 
>> +		next_vm_id_wrapped = 1;
>> +		goto again;
>> +	}
>> +	/* Is it still in use? Only possible if wrapped at least once */
>> +	if (next_vm_id_wrapped) {
>> +		hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) {
>> +			struct kvm *k2 = container_of(ka, struct kvm, arch);
>> +			struct kvm_arch *vd2 = &k2->arch;
>> +			if (vd2->avic_vm_id == vm_id)
>> +				goto again;
> 
> Although hitting the case where all 2^24 ids are already used is
> practically impossible, I don't like the loose end ...

I think even the case where 2^16 ids are used deserves a medal.  Why
don't we just make the bitmap 8 KiB and call it a day? :)

Paolo


> What about applying something like the following on top?
> ---8<---
> Subject: [PATCH] KVM: SVM: refactor avic VM ID allocation
> 
> We missed protection in case someone deserving a medal allocated 2^24
> VMs and got a deadlock instead.  I think it is nicer without the goto
> even if we droppped the error handling.
> 
> Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
> ---
>  arch/x86/kvm/svm.c | 58 +++++++++++++++++++++++++++++++++++-------------------
>  1 file changed, 38 insertions(+), 20 deletions(-)
> 
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index c21b49b5ee96..4d9ee8d76db3 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -985,8 +985,6 @@ static void disable_nmi_singlestep(struct vcpu_svm *svm)
>   */
>  #define SVM_VM_DATA_HASH_BITS	8
>  static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
> -static u32 next_vm_id = 0;
> -static bool next_vm_id_wrapped = 0;
>  static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
>  
>  /* Note:
> @@ -1385,6 +1383,38 @@ static int avic_init_backing_page(struct kvm_vcpu *vcpu)
>  	return 0;
>  }
>  
> +static bool avic_vm_id_is_used(u32 vm_id)
> +{
> +	struct kvm_arch *ka;
> +
> +	hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id)
> +		if (ka->avic_vm_id == vm_id)
> +			return true;
> +
> +	return false;
> +}
> +
> +static u32 avic_get_next_vm_id(void)
> +{
> +	static u32 next_vm_id = 0;
> +	static bool next_vm_id_wrapped = false;
> +	unsigned i;
> +
> +	for (i = 0; i < AVIC_VM_ID_NR; i++) {
> +		next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK;
> +
> +		if (next_vm_id == 0) { /* id is 1-based, zero is not okay */
> +			next_vm_id = 1;
> +			next_vm_id_wrapped = true;
> +		}
> +
> +		if (!next_vm_id_wrapped || !avic_vm_id_is_used(next_vm_id))
> +			return next_vm_id;
> +	}
> +
> +	return 0;
> +}
> +
>  static void avic_vm_destroy(struct kvm *kvm)
>  {
>  	unsigned long flags;
> @@ -1410,8 +1440,6 @@ static int avic_vm_init(struct kvm *kvm)
>  	struct kvm_arch *vm_data = &kvm->arch;
>  	struct page *p_page;
>  	struct page *l_page;
> -	struct kvm_arch *ka;
> -	u32 vm_id;
>  
>  	if (!avic)
>  		return 0;
> @@ -1432,28 +1460,18 @@ static int avic_vm_init(struct kvm *kvm)
>  	vm_data->avic_logical_id_table_page = l_page;
>  	clear_page(page_address(l_page));
>  
> +	err = -EAGAIN;
>  	spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
> - again:
> -	vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK;
> -	if (vm_id == 0) { /* id is 1-based, zero is not okay */
> -		next_vm_id_wrapped = 1;
> -		goto again;
> -	}
> -	/* Is it still in use? Only possible if wrapped at least once */
> -	if (next_vm_id_wrapped) {
> -		hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) {
> -			struct kvm *k2 = container_of(ka, struct kvm, arch);
> -			struct kvm_arch *vd2 = &k2->arch;
> -			if (vd2->avic_vm_id == vm_id)
> -				goto again;
> -		}
> -	}
> -	vm_data->avic_vm_id = vm_id;
> +	vm_data->avic_vm_id = avic_get_next_vm_id();
> +	if (!vm_data->avic_vm_id)
> +		goto unlock;
>  	hash_add(svm_vm_data_hash, &vm_data->hnode, vm_data->avic_vm_id);
>  	spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
>  
>  	return 0;
>  
> +unlock:
> +	spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
>  free_avic:
>  	avic_vm_destroy(kvm);
>  	return err;
>
Denys Vlasenko Aug. 18, 2017, 3:22 p.m. UTC | #2
On 08/17/2017 04:33 PM, Paolo Bonzini wrote:
> On 15/08/2017 22:12, Radim Krčmář wrote:
>> 2017-08-11 22:11+0200, Denys Vlasenko:
>>> With lightly tweaked defconfig:
>>>
>>>     text    data     bss      dec     hex filename
>>> 11259661 5109408 2981888 19350957 12745ad vmlinux.before
>>> 11259661 5109408  884736 17253805 10745ad vmlinux.after
>>>
>>> Only compile-tested.
>>>
>>> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
>>> Cc: Joerg Roedel <joro@8bytes.org>
>>> Cc: pbonzini@redhat.com
>>> Cc: rkrcmar@redhat.com
>>> Cc: tglx@linutronix.de
>>> Cc: mingo@redhat.com
>>> Cc: hpa@zytor.com
>>> Cc: x86@kernel.org
>>> Cc: kvm@vger.kernel.org
>>> Cc: linux-kernel@vger.kernel.org
>>> ---
>>
>> Ah, I suspected my todo wasn't this short;  thanks for the patch!
>>
>>> @@ -1468,6 +1433,22 @@ static int avic_vm_init(struct kvm *kvm)
>>>  	clear_page(page_address(l_page));
>>>
>>>  	spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
>>> + again:
>>> +	vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK;
>>> +	if (vm_id == 0) { /* id is 1-based, zero is not okay */
>>
>> Suravee, did the reserved zero mean something?
>>
>>> +		next_vm_id_wrapped = 1;
>>> +		goto again;
>>> +	}
>>> +	/* Is it still in use? Only possible if wrapped at least once */
>>> +	if (next_vm_id_wrapped) {
>>> +		hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) {
>>> +			struct kvm *k2 = container_of(ka, struct kvm, arch);
>>> +			struct kvm_arch *vd2 = &k2->arch;
>>> +			if (vd2->avic_vm_id == vm_id)
>>> +				goto again;
>>
>> Although hitting the case where all 2^24 ids are already used is
>> practically impossible, I don't like the loose end ...
>
> I think even the case where 2^16 ids are used deserves a medal.  Why
> don't we just make the bitmap 8 KiB and call it a day? :)

Well, the RAM is cheap, but a 4-byte variable is still thousands of times
smaller than a 8K bitmap.

Since a 256 element hash table is used here, you need to have ~one hundred
VMs to start seeing (very small) degradation in speed of creation of new VMs
compared to bitmap approach, and that is only after 16777216 VMs
were created since reboot.

If you want to spend RAM on speeding this up, you can increase hash table size
instead. That would speed up avic_ga_log_notifier() too.
Paolo Bonzini Aug. 18, 2017, 4:05 p.m. UTC | #3
On 18/08/2017 17:22, Denys Vlasenko wrote:
> On 08/17/2017 04:33 PM, Paolo Bonzini wrote:
>> On 15/08/2017 22:12, Radim Krčmář wrote:
>>> 2017-08-11 22:11+0200, Denys Vlasenko:
>>>> With lightly tweaked defconfig:
>>>>
>>>>     text    data     bss      dec     hex filename
>>>> 11259661 5109408 2981888 19350957 12745ad vmlinux.before
>>>> 11259661 5109408  884736 17253805 10745ad vmlinux.after
>>>>
>>>> Only compile-tested.
>>>>
>>>> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
>>>> Cc: Joerg Roedel <joro@8bytes.org>
>>>> Cc: pbonzini@redhat.com
>>>> Cc: rkrcmar@redhat.com
>>>> Cc: tglx@linutronix.de
>>>> Cc: mingo@redhat.com
>>>> Cc: hpa@zytor.com
>>>> Cc: x86@kernel.org
>>>> Cc: kvm@vger.kernel.org
>>>> Cc: linux-kernel@vger.kernel.org
>>>> ---
>>>
>>> Ah, I suspected my todo wasn't this short;  thanks for the patch!
>>>
>>>> @@ -1468,6 +1433,22 @@ static int avic_vm_init(struct kvm *kvm)
>>>>      clear_page(page_address(l_page));
>>>>
>>>>      spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
>>>> + again:
>>>> +    vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK;
>>>> +    if (vm_id == 0) { /* id is 1-based, zero is not okay */
>>>
>>> Suravee, did the reserved zero mean something?
>>>
>>>> +        next_vm_id_wrapped = 1;
>>>> +        goto again;
>>>> +    }
>>>> +    /* Is it still in use? Only possible if wrapped at least once */
>>>> +    if (next_vm_id_wrapped) {
>>>> +        hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) {
>>>> +            struct kvm *k2 = container_of(ka, struct kvm, arch);
>>>> +            struct kvm_arch *vd2 = &k2->arch;
>>>> +            if (vd2->avic_vm_id == vm_id)
>>>> +                goto again;
>>>
>>> Although hitting the case where all 2^24 ids are already used is
>>> practically impossible, I don't like the loose end ...
>>
>> I think even the case where 2^16 ids are used deserves a medal.  Why
>> don't we just make the bitmap 8 KiB and call it a day? :)
> 
> Well, the RAM is cheap, but a 4-byte variable is still thousands of times
> smaller than a 8K bitmap.
> 
> Since a 256 element hash table is used here, you need to have ~one hundred
> VMs to start seeing (very small) degradation in speed of creation of new
> VMs compared to bitmap approach, and that is only after 16777216 VMs
> were created since reboot.

I guess that's fair since we already have the hash table for other uses.

Paolo

> If you want to spend RAM on speeding this up, you can increase hash
> table size
> instead. That would speed up avic_ga_log_notifier() too.
diff mbox

Patch

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c21b49b5ee96..4d9ee8d76db3 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -985,8 +985,6 @@  static void disable_nmi_singlestep(struct vcpu_svm *svm)
  */
 #define SVM_VM_DATA_HASH_BITS	8
 static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS);
-static u32 next_vm_id = 0;
-static bool next_vm_id_wrapped = 0;
 static DEFINE_SPINLOCK(svm_vm_data_hash_lock);
 
 /* Note:
@@ -1385,6 +1383,38 @@  static int avic_init_backing_page(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+static bool avic_vm_id_is_used(u32 vm_id)
+{
+	struct kvm_arch *ka;
+
+	hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id)
+		if (ka->avic_vm_id == vm_id)
+			return true;
+
+	return false;
+}
+
+static u32 avic_get_next_vm_id(void)
+{
+	static u32 next_vm_id = 0;
+	static bool next_vm_id_wrapped = false;
+	unsigned i;
+
+	for (i = 0; i < AVIC_VM_ID_NR; i++) {
+		next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK;
+
+		if (next_vm_id == 0) { /* id is 1-based, zero is not okay */
+			next_vm_id = 1;
+			next_vm_id_wrapped = true;
+		}
+
+		if (!next_vm_id_wrapped || !avic_vm_id_is_used(next_vm_id))
+			return next_vm_id;
+	}
+
+	return 0;
+}
+
 static void avic_vm_destroy(struct kvm *kvm)
 {
 	unsigned long flags;
@@ -1410,8 +1440,6 @@  static int avic_vm_init(struct kvm *kvm)
 	struct kvm_arch *vm_data = &kvm->arch;
 	struct page *p_page;
 	struct page *l_page;
-	struct kvm_arch *ka;
-	u32 vm_id;
 
 	if (!avic)
 		return 0;
@@ -1432,28 +1460,18 @@  static int avic_vm_init(struct kvm *kvm)
 	vm_data->avic_logical_id_table_page = l_page;
 	clear_page(page_address(l_page));
 
+	err = -EAGAIN;
 	spin_lock_irqsave(&svm_vm_data_hash_lock, flags);
- again:
-	vm_id = next_vm_id = (next_vm_id + 1) & AVIC_VM_ID_MASK;
-	if (vm_id == 0) { /* id is 1-based, zero is not okay */
-		next_vm_id_wrapped = 1;
-		goto again;
-	}
-	/* Is it still in use? Only possible if wrapped at least once */
-	if (next_vm_id_wrapped) {
-		hash_for_each_possible(svm_vm_data_hash, ka, hnode, vm_id) {
-			struct kvm *k2 = container_of(ka, struct kvm, arch);
-			struct kvm_arch *vd2 = &k2->arch;
-			if (vd2->avic_vm_id == vm_id)
-				goto again;
-		}
-	}
-	vm_data->avic_vm_id = vm_id;
+	vm_data->avic_vm_id = avic_get_next_vm_id();
+	if (!vm_data->avic_vm_id)
+		goto unlock;
 	hash_add(svm_vm_data_hash, &vm_data->hnode, vm_data->avic_vm_id);
 	spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
 
 	return 0;
 
+unlock:
+	spin_unlock_irqrestore(&svm_vm_data_hash_lock, flags);
 free_avic:
 	avic_vm_destroy(kvm);
 	return err;