diff mbox series

[v3,3/3] kvm: call kvm_arch_destroy_vm if vm creation fails

Message ID 20191024230327.140935-4-jmattson@google.com (mailing list archive)
State New, archived
Headers show
Series kvm: call kvm_arch_destroy_vm if vm creation fails | expand

Commit Message

Jim Mattson Oct. 24, 2019, 11:03 p.m. UTC
From: John Sperbeck <jsperbeck@google.com>

In kvm_create_vm(), if we've successfully called kvm_arch_init_vm(), but
then fail later in the function, we need to call kvm_arch_destroy_vm()
so that it can do any necessary cleanup (like freeing memory).

Fixes: 44a95dae1d229a ("KVM: x86: Detect and Initialize AVIC support")

Signed-off-by: John Sperbeck <jsperbeck@google.com>
Signed-off-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Junaid Shahid <junaids@google.com>
---
 virt/kvm/kvm_main.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

Comments

Sean Christopherson Oct. 24, 2019, 11:29 p.m. UTC | #1
On Thu, Oct 24, 2019 at 04:03:27PM -0700, Jim Mattson wrote:
> From: John Sperbeck <jsperbeck@google.com>
> 
> In kvm_create_vm(), if we've successfully called kvm_arch_init_vm(), but
> then fail later in the function, we need to call kvm_arch_destroy_vm()
> so that it can do any necessary cleanup (like freeing memory).
> 
> Fixes: 44a95dae1d229a ("KVM: x86: Detect and Initialize AVIC support")
> 
> Signed-off-by: John Sperbeck <jsperbeck@google.com>
> Signed-off-by: Jim Mattson <jmattson@google.com>
> Reviewed-by: Junaid Shahid <junaids@google.com>
> ---
>  virt/kvm/kvm_main.c | 10 ++++++----
>  1 file changed, 6 insertions(+), 4 deletions(-)
> 
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 77819597d7e8e..f8f0106f8d20f 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -649,7 +649,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
>  		struct kvm_memslots *slots = kvm_alloc_memslots();
>  
>  		if (!slots)
> -			goto out_err_no_disable;
> +			goto out_err_no_arch_destroy_vm;
>  		/* Generations must be different for each address space. */
>  		slots->generation = i;
>  		rcu_assign_pointer(kvm->memslots[i], slots);
> @@ -659,12 +659,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
>  		rcu_assign_pointer(kvm->buses[i],
>  			kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL_ACCOUNT));
>  		if (!kvm->buses[i])
> -			goto out_err_no_disable;
> +			goto out_err_no_arch_destroy_vm;
>  	}
>  
>  	r = kvm_arch_init_vm(kvm, type);
>  	if (r)
> -		goto out_err_no_disable;
> +		goto out_err_no_arch_destroy_vm;
>  
>  	r = hardware_enable_all();
>  	if (r)
> @@ -685,7 +685,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
>  
>  	/*
>  	 * kvm_get_kvm() isn't legal while the vm is being created
> -	 * (e.g. in kvm_arch_init_vm).
> +	 * (e.g. in kvm_arch_init_vm or kvm_arch_destroy_vm).

LOL, even I don't think this one is necessary ;-)

>  	 */
>  	refcount_set(&kvm->users_count, 1);
>  
> @@ -704,6 +704,8 @@ static struct kvm *kvm_create_vm(unsigned long type)
>  out_err_no_srcu:
>  	hardware_disable_all();
>  out_err_no_disable:
> +	kvm_arch_destroy_vm(kvm);
> +out_err_no_arch_destroy_vm:
>  	for (i = 0; i < KVM_NR_BUSES; i++)
>  		kfree(kvm_get_bus(kvm, i));
>  	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
> -- 
> 2.24.0.rc0.303.g954a862665-goog
>
Paolo Bonzini Oct. 25, 2019, 11:37 a.m. UTC | #2
On 25/10/19 01:29, Sean Christopherson wrote:
> On Thu, Oct 24, 2019 at 04:03:27PM -0700, Jim Mattson wrote:
>> From: John Sperbeck <jsperbeck@google.com>
>>
>> In kvm_create_vm(), if we've successfully called kvm_arch_init_vm(), but
>> then fail later in the function, we need to call kvm_arch_destroy_vm()
>> so that it can do any necessary cleanup (like freeing memory).
>>
>> Fixes: 44a95dae1d229a ("KVM: x86: Detect and Initialize AVIC support")
>>
>> Signed-off-by: John Sperbeck <jsperbeck@google.com>
>> Signed-off-by: Jim Mattson <jmattson@google.com>
>> Reviewed-by: Junaid Shahid <junaids@google.com>
>> ---
>>  virt/kvm/kvm_main.c | 10 ++++++----
>>  1 file changed, 6 insertions(+), 4 deletions(-)

Sorry for the back and forth on this---I actually preferred the version 
that did not move refcount_set.  It seems to me that kvm_get_kvm() in 
kvm_arch_init_vm() should be okay as long as it is balanced in 
kvm_arch_destroy_vm().  So we can apply patch 2 first, and then:


diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ec14dae2f538..d6f0696d98ef 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -641,7 +641,6 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	mutex_init(&kvm->lock);
 	mutex_init(&kvm->irq_lock);
 	mutex_init(&kvm->slots_lock);
-	refcount_set(&kvm->users_count, 1);
 	INIT_LIST_HEAD(&kvm->devices);
 
 	BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
@@ -650,7 +649,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
 		struct kvm_memslots *slots = kvm_alloc_memslots();
 
 		if (!slots)
-			goto out_err_no_disable;
+			goto out_err_no_arch_destroy_vm;
 		/* Generations must be different for each address space. */
 		slots->generation = i;
 		rcu_assign_pointer(kvm->memslots[i], slots);
@@ -660,12 +659,13 @@ static struct kvm *kvm_create_vm(unsigned long type)
 		rcu_assign_pointer(kvm->buses[i],
 			kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL_ACCOUNT));
 		if (!kvm->buses[i])
-			goto out_err_no_disable;
+			goto out_err_no_arch_destroy_vm;
 	}
 
+	refcount_set(&kvm->users_count, 1);
 	r = kvm_arch_init_vm(kvm, type);
 	if (r)
-		goto out_err_no_disable;
+		goto out_err_no_arch_destroy_vm;
 
 	r = hardware_enable_all();
 	if (r)
@@ -699,7 +699,9 @@ static struct kvm *kvm_create_vm(unsigned long type)
 out_err_no_srcu:
 	hardware_disable_all();
 out_err_no_disable:
-	refcount_set(&kvm->users_count, 0);
+	kvm_arch_destroy_vm(kvm);
+	WARN_ON_ONCE(!refcount_dec_and_test(&kvm->users_count));
+out_err_no_arch_destroy_vm:
 	for (i = 0; i < KVM_NR_BUSES; i++)
 		kfree(kvm_get_bus(kvm, i));
 	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)

Moving the refcount_set is not strictly necessary, but it is nicer as
set+init is matched by the destroy+dec_and_test pair in the unwind path.

If it's okay, I can just commit it.

Paolo
Sean Christopherson Oct. 25, 2019, 2:48 p.m. UTC | #3
On Fri, Oct 25, 2019 at 01:37:56PM +0200, Paolo Bonzini wrote:
> On 25/10/19 01:29, Sean Christopherson wrote:
> > On Thu, Oct 24, 2019 at 04:03:27PM -0700, Jim Mattson wrote:
> >> From: John Sperbeck <jsperbeck@google.com>
> >>
> >> In kvm_create_vm(), if we've successfully called kvm_arch_init_vm(), but
> >> then fail later in the function, we need to call kvm_arch_destroy_vm()
> >> so that it can do any necessary cleanup (like freeing memory).
> >>
> >> Fixes: 44a95dae1d229a ("KVM: x86: Detect and Initialize AVIC support")
> >>
> >> Signed-off-by: John Sperbeck <jsperbeck@google.com>
> >> Signed-off-by: Jim Mattson <jmattson@google.com>
> >> Reviewed-by: Junaid Shahid <junaids@google.com>
> >> ---
> >>  virt/kvm/kvm_main.c | 10 ++++++----
> >>  1 file changed, 6 insertions(+), 4 deletions(-)
> 
> Sorry for the back and forth on this---I actually preferred the version 
> that did not move refcount_set.  It seems to me that kvm_get_kvm() in 
> kvm_arch_init_vm() should be okay as long as it is balanced in 
> kvm_arch_destroy_vm().  So we can apply patch 2 first, and then:

No, this will effectively leak the VM because you'll end up with a cyclical
reference to kvm_put_kvm(), i.e. users_count will never hit zero.

void kvm_put_kvm(struct kvm *kvm)
{
	if (refcount_dec_and_test(&kvm->users_count))
		kvm_destroy_vm(kvm);
		|
		-> kvm_arch_destroy_vm()
		   |
		   -> kvm_put_kvm()
}
Paolo Bonzini Oct. 25, 2019, 2:56 p.m. UTC | #4
On 25/10/19 16:48, Sean Christopherson wrote:
>> It seems to me that kvm_get_kvm() in 
>> kvm_arch_init_vm() should be okay as long as it is balanced in 
>> kvm_arch_destroy_vm().  So we can apply patch 2 first, and then:
> No, this will effectively leak the VM because you'll end up with a cyclical
> reference to kvm_put_kvm(), i.e. users_count will never hit zero.
> 
> void kvm_put_kvm(struct kvm *kvm)
> {
> 	if (refcount_dec_and_test(&kvm->users_count))
> 		kvm_destroy_vm(kvm);
> 		|
> 		-> kvm_arch_destroy_vm()
> 		   |
> 		   -> kvm_put_kvm()
> }

There's two parts to this:

- if kvm_arch_init_vm() calls kvm_get_kvm(), then kvm_arch_destroy_vm()
won't be called until the corresponding kvm_put_kvm().

- if the error case causes kvm_arch_destroy_vm() to be called early,
however, that'd be okay and would not leak memory, as long as
kvm_arch_destroy_vm() detects the situation and calls kvm_put_kvm() itself.

One case could be where you have some kind of delayed work, where the
callback does kvm_put_kvm.  You'd have to cancel the work item and call
kvm_put_kvm in kvm_arch_destroy_vm, and you would go through that path
if kvm_create_vm() fails after kvm_arch_init_vm().

Paolo
Sean Christopherson Oct. 25, 2019, 3:22 p.m. UTC | #5
On Fri, Oct 25, 2019 at 04:56:23PM +0200, Paolo Bonzini wrote:
> On 25/10/19 16:48, Sean Christopherson wrote:
> >> It seems to me that kvm_get_kvm() in 
> >> kvm_arch_init_vm() should be okay as long as it is balanced in 
> >> kvm_arch_destroy_vm().  So we can apply patch 2 first, and then:
> > No, this will effectively leak the VM because you'll end up with a cyclical
> > reference to kvm_put_kvm(), i.e. users_count will never hit zero.
> > 
> > void kvm_put_kvm(struct kvm *kvm)
> > {
> > 	if (refcount_dec_and_test(&kvm->users_count))
> > 		kvm_destroy_vm(kvm);
> > 		|
> > 		-> kvm_arch_destroy_vm()
> > 		   |
> > 		   -> kvm_put_kvm()
> > }
> 
> There's two parts to this:
> 
> - if kvm_arch_init_vm() calls kvm_get_kvm(), then kvm_arch_destroy_vm()
> won't be called until the corresponding kvm_put_kvm().
> 
> - if the error case causes kvm_arch_destroy_vm() to be called early,
> however, that'd be okay and would not leak memory, as long as
> kvm_arch_destroy_vm() detects the situation and calls kvm_put_kvm() itself.
> 
> One case could be where you have some kind of delayed work, where the
> callback does kvm_put_kvm.  You'd have to cancel the work item and call
> kvm_put_kvm in kvm_arch_destroy_vm, and you would go through that path
> if kvm_create_vm() fails after kvm_arch_init_vm().

But do we really want/need to allow handing out references to KVM during
kvm_arch_init_vm()?  AFAICT, it's not currently required by any arch.

If an actual use case comes along then we can move refcount_set() back,
but with the requirement that the arch/user provide a mechanism to
handle the reference with respect to kvm_arch_destroy_vm().  As opposed
to the current behavior, which allows an arch to naively do get()/put()
in init_vm()/destroy_vm() without any hint that what they're doing is
broken.
Paolo Bonzini Oct. 25, 2019, 3:23 p.m. UTC | #6
On 25/10/19 17:22, Sean Christopherson wrote:
> On Fri, Oct 25, 2019 at 04:56:23PM +0200, Paolo Bonzini wrote:
>> On 25/10/19 16:48, Sean Christopherson wrote:
>>>> It seems to me that kvm_get_kvm() in 
>>>> kvm_arch_init_vm() should be okay as long as it is balanced in 
>>>> kvm_arch_destroy_vm().  So we can apply patch 2 first, and then:
>>> No, this will effectively leak the VM because you'll end up with a cyclical
>>> reference to kvm_put_kvm(), i.e. users_count will never hit zero.
>>>
>>> void kvm_put_kvm(struct kvm *kvm)
>>> {
>>> 	if (refcount_dec_and_test(&kvm->users_count))
>>> 		kvm_destroy_vm(kvm);
>>> 		|
>>> 		-> kvm_arch_destroy_vm()
>>> 		   |
>>> 		   -> kvm_put_kvm()
>>> }
>>
>> There's two parts to this:
>>
>> - if kvm_arch_init_vm() calls kvm_get_kvm(), then kvm_arch_destroy_vm()
>> won't be called until the corresponding kvm_put_kvm().
>>
>> - if the error case causes kvm_arch_destroy_vm() to be called early,
>> however, that'd be okay and would not leak memory, as long as
>> kvm_arch_destroy_vm() detects the situation and calls kvm_put_kvm() itself.
>>
>> One case could be where you have some kind of delayed work, where the
>> callback does kvm_put_kvm.  You'd have to cancel the work item and call
>> kvm_put_kvm in kvm_arch_destroy_vm, and you would go through that path
>> if kvm_create_vm() fails after kvm_arch_init_vm().
> 
> But do we really want/need to allow handing out references to KVM during
> kvm_arch_init_vm()?  AFAICT, it's not currently required by any arch.

Probably not, but the full code paths are long, so I don't see much
value in outright forbidding it.  There are very few kvm_get_kvm() calls
anyway in arch-dependent code, so it's easy to check that they're not
causing reference cycles.

Paolo

> If an actual use case comes along then we can move refcount_set() back,
> but with the requirement that the arch/user provide a mechanism to
> handle the reference with respect to kvm_arch_destroy_vm().  As opposed
> to the current behavior, which allows an arch to naively do get()/put()
> in init_vm()/destroy_vm() without any hint that what they're doing is
> broken.
>
Sean Christopherson Oct. 25, 2019, 10:29 p.m. UTC | #7
On Fri, Oct 25, 2019 at 05:23:54PM +0200, Paolo Bonzini wrote:
> On 25/10/19 17:22, Sean Christopherson wrote:
> > On Fri, Oct 25, 2019 at 04:56:23PM +0200, Paolo Bonzini wrote:
> >> On 25/10/19 16:48, Sean Christopherson wrote:
> >>>> It seems to me that kvm_get_kvm() in 
> >>>> kvm_arch_init_vm() should be okay as long as it is balanced in 
> >>>> kvm_arch_destroy_vm().  So we can apply patch 2 first, and then:
> >>> No, this will effectively leak the VM because you'll end up with a cyclical
> >>> reference to kvm_put_kvm(), i.e. users_count will never hit zero.
> >>>
> >>> void kvm_put_kvm(struct kvm *kvm)
> >>> {
> >>> 	if (refcount_dec_and_test(&kvm->users_count))
> >>> 		kvm_destroy_vm(kvm);
> >>> 		|
> >>> 		-> kvm_arch_destroy_vm()
> >>> 		   |
> >>> 		   -> kvm_put_kvm()
> >>> }
> >>
> >> There's two parts to this:
> >>
> >> - if kvm_arch_init_vm() calls kvm_get_kvm(), then kvm_arch_destroy_vm()
> >> won't be called until the corresponding kvm_put_kvm().
> >>
> >> - if the error case causes kvm_arch_destroy_vm() to be called early,
> >> however, that'd be okay and would not leak memory, as long as
> >> kvm_arch_destroy_vm() detects the situation and calls kvm_put_kvm() itself.
> >>
> >> One case could be where you have some kind of delayed work, where the
> >> callback does kvm_put_kvm.  You'd have to cancel the work item and call
> >> kvm_put_kvm in kvm_arch_destroy_vm, and you would go through that path
> >> if kvm_create_vm() fails after kvm_arch_init_vm().
> > 
> > But do we really want/need to allow handing out references to KVM during
> > kvm_arch_init_vm()?  AFAICT, it's not currently required by any arch.
> 
> Probably not, but the full code paths are long, so I don't see much
> value in outright forbidding it.  There are very few kvm_get_kvm() calls
> anyway in arch-dependent code, so it's easy to check that they're not
> causing reference cycles.

I wasn't thinking forbid it for all eternity, more like add a landmine to
force an arch to implement more robust handling in order to enable
kvm_get_kvm() during init_vm().
diff mbox series

Patch

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 77819597d7e8e..f8f0106f8d20f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -649,7 +649,7 @@  static struct kvm *kvm_create_vm(unsigned long type)
 		struct kvm_memslots *slots = kvm_alloc_memslots();
 
 		if (!slots)
-			goto out_err_no_disable;
+			goto out_err_no_arch_destroy_vm;
 		/* Generations must be different for each address space. */
 		slots->generation = i;
 		rcu_assign_pointer(kvm->memslots[i], slots);
@@ -659,12 +659,12 @@  static struct kvm *kvm_create_vm(unsigned long type)
 		rcu_assign_pointer(kvm->buses[i],
 			kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL_ACCOUNT));
 		if (!kvm->buses[i])
-			goto out_err_no_disable;
+			goto out_err_no_arch_destroy_vm;
 	}
 
 	r = kvm_arch_init_vm(kvm, type);
 	if (r)
-		goto out_err_no_disable;
+		goto out_err_no_arch_destroy_vm;
 
 	r = hardware_enable_all();
 	if (r)
@@ -685,7 +685,7 @@  static struct kvm *kvm_create_vm(unsigned long type)
 
 	/*
 	 * kvm_get_kvm() isn't legal while the vm is being created
-	 * (e.g. in kvm_arch_init_vm).
+	 * (e.g. in kvm_arch_init_vm or kvm_arch_destroy_vm).
 	 */
 	refcount_set(&kvm->users_count, 1);
 
@@ -704,6 +704,8 @@  static struct kvm *kvm_create_vm(unsigned long type)
 out_err_no_srcu:
 	hardware_disable_all();
 out_err_no_disable:
+	kvm_arch_destroy_vm(kvm);
+out_err_no_arch_destroy_vm:
 	for (i = 0; i < KVM_NR_BUSES; i++)
 		kfree(kvm_get_bus(kvm, i));
 	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)