Patchwork KVM: avoid using rcu_dereference_protected

login
register
mail settings
Submitter Christian Borntraeger
Date Sept. 13, 2017, 10:16 a.m.
Message ID <d55aa63e-1150-ac9a-913a-2cf0a2e01d75@de.ibm.com>
Download mbox | patch
Permalink /patch/9950893/
State New
Headers show

Comments

Christian Borntraeger - Sept. 13, 2017, 10:16 a.m.
Looks like this is now causing the following splat if 
kvm_arch_init_vm fails:

[   13.107404] =============================
[   13.107408] WARNING: suspicious RCU usage
[   13.107412] 4.13.0+ #105 Not tainted
[   13.107415] -----------------------------
[   13.107418] ./include/linux/kvm_host.h:481 suspicious rcu_dereference_check() usage!
[   13.107421] 
[   13.107421] other info that might help us debug this:
[   13.107421] 
[   13.107425] 
[   13.107425] rcu_scheduler_active = 2, debug_locks = 1
[   13.107429] no locks held by qemu-system-s39/79.
[   13.107432] 
[   13.107432] stack backtrace:
[   13.107436] CPU: 0 PID: 79 Comm: qemu-system-s39 Not tainted 4.13.0+ #105
[   13.107439] Hardware name: IBM 2964 NC9 704 (KVM/Linux)
[   13.107442] Call Trace:
[   13.107448] ([<00000000001140b2>] show_stack+0xea/0xf0)
[   13.107452]  [<00000000008a68a4>] dump_stack+0x94/0xd8 
[   13.107456]  [<0000000000134c12>] kvm_dev_ioctl+0x372/0x7a0 
[   13.107460]  [<000000000038f940>] do_vfs_ioctl+0xa8/0x6c8 
[   13.107463]  [<0000000000390004>] SyS_ioctl+0xa4/0xb8 
[   13.107468]  [<00000000008c7a8c>] system_call+0xc4/0x27c 
[   13.107471] no locks held by qemu-system-s39/79.
[   13.107474] 
[   13.107477] =============================


Something like this seems to do the trick.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>



If ok, let me know and I will spin a proper patch.



On 08/02/2017 05:55 PM, Paolo Bonzini wrote:
> During teardown, accesses to memslots and buses are using
> rcu_dereference_protected with an always-true condition because
> these accesses are done outside the usual mutexes.  This
> is because the last reference is gone and there cannot be any
> concurrent modifications, but rcu_dereference_protected is
> ugly and unobvious.
> 
> Instead, check the refcount in kvm_get_bus and __kvm_memslots.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  include/linux/kvm_host.h |  6 ++++--
>  virt/kvm/kvm_main.c      | 11 ++++-------
>  2 files changed, 8 insertions(+), 9 deletions(-)
> 
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index aa492cb12ce8..294082cbff0b 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -469,7 +469,8 @@ struct kvm {
>  static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
>  {
>  	return srcu_dereference_check(kvm->buses[idx], &kvm->srcu,
> -				      lockdep_is_held(&kvm->slots_lock));
> +				      lockdep_is_held(&kvm->slots_lock) ||
> +				      !refcount_read(&kvm->users_count));
>  }
> 
>  static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
> @@ -562,7 +563,8 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
>  static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
>  {
>  	return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu,
> -			lockdep_is_held(&kvm->slots_lock));
> +			lockdep_is_held(&kvm->slots_lock) ||
> +			!refcount_read(&kvm->users_count));
>  }
> 
>  static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index 7001657be535..26ae5cb726b1 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -705,10 +705,9 @@ static struct kvm *kvm_create_vm(unsigned long type)
>  	hardware_disable_all();
>  out_err_no_disable:
>  	for (i = 0; i < KVM_NR_BUSES; i++)
> -		kfree(rcu_access_pointer(kvm->buses[i]));
> +		kfree(kvm_get_bus(kvm, i));
>  	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
> -		kvm_free_memslots(kvm,
> -			rcu_dereference_protected(kvm->memslots[i], 1));
> +		kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
>  	kvm_arch_free_vm(kvm);
>  	mmdrop(current->mm);
>  	return ERR_PTR(r);
> @@ -741,9 +740,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
>  	spin_unlock(&kvm_lock);
>  	kvm_free_irq_routing(kvm);
>  	for (i = 0; i < KVM_NR_BUSES; i++) {
> -		struct kvm_io_bus *bus;
> +		struct kvm_io_bus *bus = kvm_get_bus(kvm, i);
> 
> -		bus = rcu_dereference_protected(kvm->buses[i], 1);
>  		if (bus)
>  			kvm_io_bus_destroy(bus);
>  		kvm->buses[i] = NULL;
> @@ -757,8 +755,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
>  	kvm_arch_destroy_vm(kvm);
>  	kvm_destroy_devices(kvm);
>  	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
> -		kvm_free_memslots(kvm,
> -			rcu_dereference_protected(kvm->memslots[i], 1));
> +		kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
>  	cleanup_srcu_struct(&kvm->irq_srcu);
>  	cleanup_srcu_struct(&kvm->srcu);
>  	kvm_arch_free_vm(kvm);
>

Patch

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 6ed1c20..2d7df5c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -674,6 +674,7 @@  static struct kvm *kvm_create_vm(unsigned long type)
 out_err_no_srcu:
        hardware_disable_all();
 out_err_no_disable:
+       refcount_set(&kvm->users_count, 0);
        for (i = 0; i < KVM_NR_BUSES; i++)
                kfree(kvm_get_bus(kvm, i));
        for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)