diff mbox series

virtio/vhost-user: fix qemu crash when hotunplug vhost-user-net device

Message ID 20240807095508.54750-1-yaozhenguo@jd.com (mailing list archive)
State New, archived
Headers show
Series virtio/vhost-user: fix qemu crash when hotunplug vhost-user-net device | expand

Commit Message

yaozhenguo Aug. 7, 2024, 9:55 a.m. UTC
When hotplug and hotunplug vhost-user-net device quickly.
qemu will crash. BT is as below:

0  __pthread_kill_implementation () at /usr/lib64/libc.so.6
1  raise () at /usr/lib64/libc.so.6
2  abort () at /usr/lib64/libc.so.6
3  try_dequeue () at ../util/rcu.c:235
4  call_rcu_thread (opaque=opaque@entry=0x0) at ../util/rcu.c:288
5  qemu_thread_start (args=0x55b10d9ceaa0) at ../util/qemu-thread-posix.c:541
6  start_thread () at /usr/lib64/libc.so.6
7  clone3 () at /usr/lib64/libc.so.6

1. device_del qmp process

virtio_set_status
  vhost_dev_stop
    vhost_user_get_vring_base
      vhost_user_host_notifier_remove

vhost_user_slave_handle_vring_host_notifier maybe called asynchronous after
vhost_user_host_notifier_remove. vhost_user_host_notifier_remove will not
all call_rcu because of notifier->addr is NULL at this time.

2. netdev_del qmp process

vhost_user_cleanup
       vhost_user_host_notifier_remove
       g_free_rcu

vhost_user_host_notifier_remove and g_free_rcu will sumbit same rcu_head
to rcu node list. rcu_call_count add twice but only one node is added.
rcu thread will abort when calling try_dequeue with node list is empty.
Fix this by moving g_free(n) to vhost_user_host_notifier_free.

Fixes: 503e355465 ("virtio/vhost-user: dynamically assign VhostUserHostNotifiers")
Signed-off-by: yaozhenguo <yaozhenguo@jd.com>
---
 hw/virtio/vhost-user.c         | 23 +++++++++++------------
 include/hw/virtio/vhost-user.h |  1 +
 2 files changed, 12 insertions(+), 12 deletions(-)

Comments

yaozhenguo Aug. 27, 2024, 1:37 a.m. UTC | #1
Hi Alex:
     Any update?

yaozhenguo <yaozhenguo1@gmail.com> 于2024年8月7日周三 17:55写道:
>
> When hotplug and hotunplug vhost-user-net device quickly.
> qemu will crash. BT is as below:
>
> 0  __pthread_kill_implementation () at /usr/lib64/libc.so.6
> 1  raise () at /usr/lib64/libc.so.6
> 2  abort () at /usr/lib64/libc.so.6
> 3  try_dequeue () at ../util/rcu.c:235
> 4  call_rcu_thread (opaque=opaque@entry=0x0) at ../util/rcu.c:288
> 5  qemu_thread_start (args=0x55b10d9ceaa0) at ../util/qemu-thread-posix.c:541
> 6  start_thread () at /usr/lib64/libc.so.6
> 7  clone3 () at /usr/lib64/libc.so.6
>
> 1. device_del qmp process
>
> virtio_set_status
>   vhost_dev_stop
>     vhost_user_get_vring_base
>       vhost_user_host_notifier_remove
>
> vhost_user_slave_handle_vring_host_notifier maybe called asynchronous after
> vhost_user_host_notifier_remove. vhost_user_host_notifier_remove will not
> all call_rcu because of notifier->addr is NULL at this time.
>
> 2. netdev_del qmp process
>
> vhost_user_cleanup
>        vhost_user_host_notifier_remove
>        g_free_rcu
>
> vhost_user_host_notifier_remove and g_free_rcu will sumbit same rcu_head
> to rcu node list. rcu_call_count add twice but only one node is added.
> rcu thread will abort when calling try_dequeue with node list is empty.
> Fix this by moving g_free(n) to vhost_user_host_notifier_free.
>
> Fixes: 503e355465 ("virtio/vhost-user: dynamically assign VhostUserHostNotifiers")
> Signed-off-by: yaozhenguo <yaozhenguo@jd.com>
> ---
>  hw/virtio/vhost-user.c         | 23 +++++++++++------------
>  include/hw/virtio/vhost-user.h |  1 +
>  2 files changed, 12 insertions(+), 12 deletions(-)
>
> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> index 00561daa06..7ab37c0da2 100644
> --- a/hw/virtio/vhost-user.c
> +++ b/hw/virtio/vhost-user.c
> @@ -1188,6 +1188,12 @@ static void vhost_user_host_notifier_free(VhostUserHostNotifier *n)
>      assert(n && n->unmap_addr);
>      munmap(n->unmap_addr, qemu_real_host_page_size());
>      n->unmap_addr = NULL;
> +    if (n->need_free) {
> +        memory_region_transaction_begin();
> +        object_unparent(OBJECT(&n->mr));
> +        memory_region_transaction_commit();
> +        g_free(n);
> +    }
>  }
>
>  /*
> @@ -1195,7 +1201,7 @@ static void vhost_user_host_notifier_free(VhostUserHostNotifier *n)
>   * under rcu.
>   */
>  static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n,
> -                                            VirtIODevice *vdev)
> +                                            VirtIODevice *vdev, bool free)
>  {
>      if (n->addr) {
>          if (vdev) {
> @@ -1204,6 +1210,7 @@ static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n,
>          assert(!n->unmap_addr);
>          n->unmap_addr = n->addr;
>          n->addr = NULL;
> +        n->need_free = free;
>          call_rcu(n, vhost_user_host_notifier_free, rcu);
>      }
>  }
> @@ -1280,7 +1287,7 @@ static int vhost_user_get_vring_base(struct vhost_dev *dev,
>
>      VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index);
>      if (n) {
> -        vhost_user_host_notifier_remove(n, dev->vdev);
> +        vhost_user_host_notifier_remove(n, dev->vdev, false);
>      }
>
>      ret = vhost_user_write(dev, &msg, NULL, 0);
> @@ -1562,7 +1569,7 @@ static int vhost_user_backend_handle_vring_host_notifier(struct vhost_dev *dev,
>       * new mapped address.
>       */
>      n = fetch_or_create_notifier(user, queue_idx);
> -    vhost_user_host_notifier_remove(n, vdev);
> +    vhost_user_host_notifier_remove(n, vdev, false);
>
>      if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
>          return 0;
> @@ -2737,13 +2744,7 @@ static void vhost_user_state_destroy(gpointer data)
>  {
>      VhostUserHostNotifier *n = (VhostUserHostNotifier *) data;
>      if (n) {
> -        vhost_user_host_notifier_remove(n, NULL);
> -        object_unparent(OBJECT(&n->mr));
> -        /*
> -         * We can't free until vhost_user_host_notifier_remove has
> -         * done it's thing so schedule the free with RCU.
> -         */
> -        g_free_rcu(n, rcu);
> +        vhost_user_host_notifier_remove(n, NULL, true);
>      }
>  }
>
> @@ -2765,9 +2766,7 @@ void vhost_user_cleanup(VhostUserState *user)
>      if (!user->chr) {
>          return;
>      }
> -    memory_region_transaction_begin();
>      user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true);
> -    memory_region_transaction_commit();
>      user->chr = NULL;
>  }
>
> diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h
> index 324cd8663a..a171f29e0b 100644
> --- a/include/hw/virtio/vhost-user.h
> +++ b/include/hw/virtio/vhost-user.h
> @@ -54,6 +54,7 @@ typedef struct VhostUserHostNotifier {
>      void *addr;
>      void *unmap_addr;
>      int idx;
> +    bool need_free;
>  } VhostUserHostNotifier;
>
>  /**
> --
> 2.43.0
>
Stefano Garzarella Aug. 27, 2024, 8 a.m. UTC | #2
On Wed, Aug 07, 2024 at 05:55:08PM GMT, yaozhenguo wrote:
>When hotplug and hotunplug vhost-user-net device quickly.

I'd replace the . with ,

>qemu will crash. BT is as below:
>
>0  __pthread_kill_implementation () at /usr/lib64/libc.so.6
>1  raise () at /usr/lib64/libc.so.6
>2  abort () at /usr/lib64/libc.so.6
>3  try_dequeue () at ../util/rcu.c:235
>4  call_rcu_thread (opaque=opaque@entry=0x0) at ../util/rcu.c:288
>5  qemu_thread_start (args=0x55b10d9ceaa0) at ../util/qemu-thread-posix.c:541
>6  start_thread () at /usr/lib64/libc.so.6
>7  clone3 () at /usr/lib64/libc.so.6
>
>1. device_del qmp process
>
>virtio_set_status
>  vhost_dev_stop
>    vhost_user_get_vring_base
>      vhost_user_host_notifier_remove
>
>vhost_user_slave_handle_vring_host_notifier maybe called asynchronous after
  ^
Now it's called vhost_user_backend_handle_vring_host_notifier, I'd 
suggest to use the new name.

>vhost_user_host_notifier_remove. vhost_user_host_notifier_remove will 
>not
>all call_rcu because of notifier->addr is NULL at this time.

s/all/call ?

>
>2. netdev_del qmp process
>
>vhost_user_cleanup
>       vhost_user_host_notifier_remove
>       g_free_rcu
>
>vhost_user_host_notifier_remove and g_free_rcu will sumbit same rcu_head

s/sumbit/submit

>to rcu node list. rcu_call_count add twice but only one node is added.
>rcu thread will abort when calling try_dequeue with node list is empty.

What's not clear to me is how 1 and 2 are related, could you explain 
that?

>Fix this by moving g_free(n) to vhost_user_host_notifier_free.
>`
>Fixes: 503e355465 ("virtio/vhost-user: dynamically assign VhostUserHostNotifiers")
>Signed-off-by: yaozhenguo <yaozhenguo@jd.com>
>---
> hw/virtio/vhost-user.c         | 23 +++++++++++------------
> include/hw/virtio/vhost-user.h |  1 +
> 2 files changed, 12 insertions(+), 12 deletions(-)
>
>diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
>index 00561daa06..7ab37c0da2 100644
>--- a/hw/virtio/vhost-user.c
>+++ b/hw/virtio/vhost-user.c
>@@ -1188,6 +1188,12 @@ static void vhost_user_host_notifier_free(VhostUserHostNotifier *n)
>     assert(n && n->unmap_addr);
>     munmap(n->unmap_addr, qemu_real_host_page_size());
>     n->unmap_addr = NULL;
>+    if (n->need_free) {
>+        memory_region_transaction_begin();
>+        object_unparent(OBJECT(&n->mr));
>+        memory_region_transaction_commit();
>+        g_free(n);
>+    }
> }
>
> /*
>@@ -1195,7 +1201,7 @@ static void vhost_user_host_notifier_free(VhostUserHostNotifier *n)
>  * under rcu.
>  */
> static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n,
>-                                            VirtIODevice *vdev)
>+                                            VirtIODevice *vdev, bool free)
> {
>     if (n->addr) {
>         if (vdev) {
>@@ -1204,6 +1210,7 @@ static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n,
>         assert(!n->unmap_addr);
>         n->unmap_addr = n->addr;
>         n->addr = NULL;
>+        n->need_free = free;
>         call_rcu(n, vhost_user_host_notifier_free, rcu);
>     }
> }
>@@ -1280,7 +1287,7 @@ static int vhost_user_get_vring_base(struct vhost_dev *dev,
>
>     VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index);
>     if (n) {
>-        vhost_user_host_notifier_remove(n, dev->vdev);
>+        vhost_user_host_notifier_remove(n, dev->vdev, false);
>     }
>
>     ret = vhost_user_write(dev, &msg, NULL, 0);
>@@ -1562,7 +1569,7 @@ static int vhost_user_backend_handle_vring_host_notifier(struct vhost_dev *dev,
>      * new mapped address.
>      */
>     n = fetch_or_create_notifier(user, queue_idx);
>-    vhost_user_host_notifier_remove(n, vdev);
>+    vhost_user_host_notifier_remove(n, vdev, false);
>
>     if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
>         return 0;
>@@ -2737,13 +2744,7 @@ static void vhost_user_state_destroy(gpointer data)
> {
>     VhostUserHostNotifier *n = (VhostUserHostNotifier *) data;
>     if (n) {
>-        vhost_user_host_notifier_remove(n, NULL);
>-        object_unparent(OBJECT(&n->mr));
>-        /*
>-         * We can't free until vhost_user_host_notifier_remove has
>-         * done it's thing so schedule the free with RCU.
>-         */
>-        g_free_rcu(n, rcu);
>+        vhost_user_host_notifier_remove(n, NULL, true);

I'm not sure I understand the problem well, but could it be that now we 
don't see the problem anymore, but we have a memory leak?

Here for example could it be the case that `n->addr` is NULL and 
therefore `vhost_user_host_notifier_free` with `n->need_free = true` 
will never be submitted?

>     }
> }
>
>@@ -2765,9 +2766,7 @@ void vhost_user_cleanup(VhostUserState *user)
>     if (!user->chr) {
>         return;
>     }
>-    memory_region_transaction_begin();
>     user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, 
>     true);
>-    memory_region_transaction_commit();

This is no longer necessary, because the `user->notifiers` free function 
no longer calls `object_unparent(OBJECT(&n->mr))`, right?

Maybe it's worth mentioning in the commit description.

>     user->chr = NULL;
> }
>
>diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h
>index 324cd8663a..a171f29e0b 100644
>--- a/include/hw/virtio/vhost-user.h
>+++ b/include/hw/virtio/vhost-user.h
>@@ -54,6 +54,7 @@ typedef struct VhostUserHostNotifier {
>     void *addr;
>     void *unmap_addr;
>     int idx;
>+    bool need_free;
> } VhostUserHostNotifier;
>
> /**
>-- 
>2.43.0
>
yaozhenguo Aug. 28, 2024, 6:50 a.m. UTC | #3
I am very sorry that my previous description was not accurate. Below I
will describe the steps to reproduce this problem and my analysis in
detail.The conditions for reproducing this problem are quite
demanding. First, let me introduce my environment. I use DPDK vdpa to
drive
DPU to implement a vhost-user type network card. DPDK vdpa software
communicates with QEMU through the vhost protocol. The steps to
reproduce are as follows:

1. Start a Windows virtual machine.
2. Use the vish command to hotplug and unplug a 32 queues
vhost-user-net network card.
3. After a while, QEMU will crash.

I added some logs to locate this problem. The following is how I
located the root cause of this problem based on logs and code
analysis. The steps to reproduce the problem involve hot plugging and
hot unplugging of network cards. Hot plugging of network cards
involves two processes. Process A is to insert the device into qemu,
and process B is the process of guest operating system initializing
the network card. When process A is completed, the virsh attach-device
command returns. At this time, you can call virsh detach-device to
perform hot unplug operations. Generally, this will not be a problem
because the network card initializes very quickly. However, since my
network card is a vhost-user type network card, and it is implemented
by DPDK vdpa, there is an asynchronous situation. When the Guest
operating system is initializing the network card, some messages from
vhost-user returned to QEMU may be delayed. The problem occurs here.

For example, the message VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG is
responsible for assigning the value of VhostUserHostNotifier->addr,
which is the hot plug process B. There are also two processes for hot
unplugging devices. libvirt will send two QMP commands to qemu: one is
device_del and the other is netdev_del. If this message arrives after
the first hot unplug command, there will be problems. The following is
a detailed analysis: The key function of the device_del command
execution: virtio_set_status->vhost_dev_stop. In the vhost_dev_stop
function, all queues will be traversed and
vhost_user_get_vring_base-->vhost_user_host_notifier_remove will be
called because VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG has not
arrived in some queues at this time, so VhostUserHostNotifier->addr
has no value at this time, so nothing will be done at this time. For
those with values, vhost_user_host_notifier_free will be submitted to
the rcu thread
and clear n->addr.

Next, libvirt will send the netdev_del QMP command.
netdev_del--> vhost_user_cleanup

Because some queues VhostUserHostNotifier->addr are not empty at this
time, there will be the following call path:

1. vhost_user_host_notifier_remove->call_rcu(n,
vhost_user_host_notifier_free, rcu);
2. g_free_rcu(n, rcu);

The same rcu was submitted twice. In call_rcu1, if two identical nodes
are added to the node list, only one will be successfully added, but
rcu_call_count will be added twice. When rcu processes rcu node, it
will check whether rcu_call_count is empty. If not, it will take the
node from node list. Because the actual node in node list is one less
than rcu_call_count, it will cause rcu_call_count to be not empty, but
there is no node in node list. In this way, rcu thread will abort, the
code is as follows:
if (head == &dummy && qatomic_mb_read(&tail) == &dummy.next) {
    abort();
}
This is the reason why QEMU crashed. Since the latest QEMU cannot run
in our environment due to incompatibility of vhost-user messages, and
this problem is difficult to reproduce, I was unable to reproduce the
problem on the latest qemu. However, from the upstream code, since
VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG is asynchronous, we cannot
assume that all VhostUserHostNotifier->addr in vhost_user_cleanup are
empty. If addr is not empty, there will be a problem. Otherwise,
vhost_user_host_notifier_remove is not necessary to call in
vhost_user_cleanup.

Therefore, the two tasks of vhost_user_host_notifier_free and free
VhostUserHostNotifier need to be placed in a rcu node, and use
n->need_free to determine whether it is free VhostUserHostNotifier.

Stefano Garzarella <sgarzare@redhat.com> 于2024年8月27日周二 16:00写道:
>
> On Wed, Aug 07, 2024 at 05:55:08PM GMT, yaozhenguo wrote:
> >When hotplug and hotunplug vhost-user-net device quickly.
>
> I'd replace the . with ,
>
> >qemu will crash. BT is as below:
> >
> >0  __pthread_kill_implementation () at /usr/lib64/libc.so.6
> >1  raise () at /usr/lib64/libc.so.6
> >2  abort () at /usr/lib64/libc.so.6
> >3  try_dequeue () at ../util/rcu.c:235
> >4  call_rcu_thread (opaque=opaque@entry=0x0) at ../util/rcu.c:288
> >5  qemu_thread_start (args=0x55b10d9ceaa0) at ../util/qemu-thread-posix.c:541
> >6  start_thread () at /usr/lib64/libc.so.6
> >7  clone3 () at /usr/lib64/libc.so.6
> >
> >1. device_del qmp process
> >
> >virtio_set_status
> >  vhost_dev_stop
> >    vhost_user_get_vring_base
> >      vhost_user_host_notifier_remove
> >
> >vhost_user_slave_handle_vring_host_notifier maybe called asynchronous after
>   ^
> Now it's called vhost_user_backend_handle_vring_host_notifier, I'd
> suggest to use the new name.
>
> >vhost_user_host_notifier_remove. vhost_user_host_notifier_remove will
> >not
> >all call_rcu because of notifier->addr is NULL at this time.
>
> s/all/call ?
>
Yes
> >
> >2. netdev_del qmp process
> >
> >vhost_user_cleanup
> >       vhost_user_host_notifier_remove
> >       g_free_rcu
> >
> >vhost_user_host_notifier_remove and g_free_rcu will sumbit same rcu_head
>
> s/sumbit/submit
>
Sorry about this mistake
> >to rcu node list. rcu_call_count add twice but only one node is added.
> >rcu thread will abort when calling try_dequeue with node list is empty.
>
> What's not clear to me is how 1 and 2 are related, could you explain
> that?
>
Libvirt will send two QMP commands in succession when the network card
is hot-unplugged, which can help understand the problem. However, I
did not describe it clearly. Please see the detailed description at
the top.
> >Fix this by moving g_free(n) to vhost_user_host_notifier_free.
> >`
> >Fixes: 503e355465 ("virtio/vhost-user: dynamically assign VhostUserHostNotifiers")
> >Signed-off-by: yaozhenguo <yaozhenguo@jd.com>
> >---
> > hw/virtio/vhost-user.c         | 23 +++++++++++------------
> > include/hw/virtio/vhost-user.h |  1 +
> > 2 files changed, 12 insertions(+), 12 deletions(-)
> >
> >diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> >index 00561daa06..7ab37c0da2 100644
> >--- a/hw/virtio/vhost-user.c
> >+++ b/hw/virtio/vhost-user.c
> >@@ -1188,6 +1188,12 @@ static void vhost_user_host_notifier_free(VhostUserHostNotifier *n)
> >     assert(n && n->unmap_addr);
> >     munmap(n->unmap_addr, qemu_real_host_page_size());
> >     n->unmap_addr = NULL;
> >+    if (n->need_free) {
> >+        memory_region_transaction_begin();
> >+        object_unparent(OBJECT(&n->mr));
> >+        memory_region_transaction_commit();
> >+        g_free(n);
> >+    }
> > }
> >
> > /*
> >@@ -1195,7 +1201,7 @@ static void vhost_user_host_notifier_free(VhostUserHostNotifier *n)
> >  * under rcu.
> >  */
> > static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n,
> >-                                            VirtIODevice *vdev)
> >+                                            VirtIODevice *vdev, bool free)
> > {
> >     if (n->addr) {
> >         if (vdev) {
> >@@ -1204,6 +1210,7 @@ static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n,
> >         assert(!n->unmap_addr);
> >         n->unmap_addr = n->addr;
> >         n->addr = NULL;
> >+        n->need_free = free;
> >         call_rcu(n, vhost_user_host_notifier_free, rcu);
> >     }
> > }
> >@@ -1280,7 +1287,7 @@ static int vhost_user_get_vring_base(struct vhost_dev *dev,
> >
> >     VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index);
> >     if (n) {
> >-        vhost_user_host_notifier_remove(n, dev->vdev);
> >+        vhost_user_host_notifier_remove(n, dev->vdev, false);
> >     }
> >
> >     ret = vhost_user_write(dev, &msg, NULL, 0);
> >@@ -1562,7 +1569,7 @@ static int vhost_user_backend_handle_vring_host_notifier(struct vhost_dev *dev,
> >      * new mapped address.
> >      */
> >     n = fetch_or_create_notifier(user, queue_idx);
> >-    vhost_user_host_notifier_remove(n, vdev);
> >+    vhost_user_host_notifier_remove(n, vdev, false);
> >
> >     if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
> >         return 0;
> >@@ -2737,13 +2744,7 @@ static void vhost_user_state_destroy(gpointer data)
> > {
> >     VhostUserHostNotifier *n = (VhostUserHostNotifier *) data;
> >     if (n) {
> >-        vhost_user_host_notifier_remove(n, NULL);
> >-        object_unparent(OBJECT(&n->mr));
> >-        /*
> >-         * We can't free until vhost_user_host_notifier_remove has
> >-         * done it's thing so schedule the free with RCU.
> >-         */
> >-        g_free_rcu(n, rcu);
> >+        vhost_user_host_notifier_remove(n, NULL, true);
>
> I'm not sure I understand the problem well, but could it be that now we
> don't see the problem anymore, but we have a memory leak?
>
> Here for example could it be the case that `n->addr` is NULL and
> therefore `vhost_user_host_notifier_free` with `n->need_free = true`
> will never be submitted?
>
Please see the detailed description at the top.
> >     }
> > }
> >
> >@@ -2765,9 +2766,7 @@ void vhost_user_cleanup(VhostUserState *user)
> >     if (!user->chr) {
> >         return;
> >     }
> >-    memory_region_transaction_begin();
> >     user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers,
> >     true);
> >-    memory_region_transaction_commit();
>
> This is no longer necessary, because the `user->notifiers` free function
> no longer calls `object_unparent(OBJECT(&n->mr))`, right?
>
> Maybe it's worth mentioning in the commit description.
>
Yes, I originally thought so. But after reading the code carefully, I
found that my understanding was not correct.It is still needed here,
because the user->notifiers free function will call
virtio_queue_set_host_notifier_mr. This should also require
memory_region_transaction_commit. Is this correct?

> >     user->chr = NULL;
> > }
> >
> >diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h
> >index 324cd8663a..a171f29e0b 100644
> >--- a/include/hw/virtio/vhost-user.h
> >+++ b/include/hw/virtio/vhost-user.h
> >@@ -54,6 +54,7 @@ typedef struct VhostUserHostNotifier {
> >     void *addr;
> >     void *unmap_addr;
> >     int idx;
> >+    bool need_free;
> > } VhostUserHostNotifier;
> >
> > /**
> >--
> >2.43.0
> >
>
Stefano Garzarella Sept. 3, 2024, 10:05 a.m. UTC | #4
On Wed, Aug 28, 2024 at 02:50:57PM GMT, Zhenguo Yao wrote:
>I am very sorry that my previous description was not accurate. Below I
>will describe the steps to reproduce this problem and my analysis in
>detail.The conditions for reproducing this problem are quite
>demanding. First, let me introduce my environment. I use DPDK vdpa to
>drive
>DPU to implement a vhost-user type network card. DPDK vdpa software
>communicates with QEMU through the vhost protocol. The steps to
>reproduce are as follows:
>
>1. Start a Windows virtual machine.
>2. Use the vish command to hotplug and unplug a 32 queues
>vhost-user-net network card.
>3. After a while, QEMU will crash.
>
>I added some logs to locate this problem. The following is how I
>located the root cause of this problem based on logs and code
>analysis. The steps to reproduce the problem involve hot plugging and
>hot unplugging of network cards. Hot plugging of network cards
>involves two processes. Process A is to insert the device into qemu,
>and process B is the process of guest operating system initializing
>the network card. When process A is completed, the virsh attach-device
>command returns. At this time, you can call virsh detach-device to
>perform hot unplug operations. Generally, this will not be a problem
>because the network card initializes very quickly. However, since my
>network card is a vhost-user type network card, and it is implemented
>by DPDK vdpa, there is an asynchronous situation. When the Guest
>operating system is initializing the network card, some messages from
>vhost-user returned to QEMU may be delayed. The problem occurs here.
>
>For example, the message VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG is
>responsible for assigning the value of VhostUserHostNotifier->addr,
>which is the hot plug process B. There are also two processes for hot
>unplugging devices. libvirt will send two QMP commands to qemu: one is
>device_del and the other is netdev_del. If this message arrives after
>the first hot unplug command, there will be problems. The following is
>a detailed analysis: The key function of the device_del command
>execution: virtio_set_status->vhost_dev_stop. In the vhost_dev_stop
>function, all queues will be traversed and
>vhost_user_get_vring_base-->vhost_user_host_notifier_remove will be
>called because VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG has not
>arrived in some queues at this time, so VhostUserHostNotifier->addr
>has no value at this time, so nothing will be done at this time. For
>those with values, vhost_user_host_notifier_free will be submitted to
>the rcu thread
>and clear n->addr.
>
>Next, libvirt will send the netdev_del QMP command.
>netdev_del--> vhost_user_cleanup
>
>Because some queues VhostUserHostNotifier->addr are not empty at this
>time, there will be the following call path:
>
>1. vhost_user_host_notifier_remove->call_rcu(n,
>vhost_user_host_notifier_free, rcu);
>2. g_free_rcu(n, rcu);

Got it, so call_rcu() and g_free_rcu() must be avoided on the same node
in any case, right?

I went through docs/devel/rcu.txt and code, it's not explicit, but it
seems clear that only one should be used for cleanup.

>
>The same rcu was submitted twice. In call_rcu1, if two identical nodes
>are added to the node list, only one will be successfully added, but
>rcu_call_count will be added twice. When rcu processes rcu node, it
>will check whether rcu_call_count is empty. If not, it will take the
>node from node list. Because the actual node in node list is one less
>than rcu_call_count, it will cause rcu_call_count to be not empty, but
>there is no node in node list. In this way, rcu thread will abort, the
>code is as follows:
>if (head == &dummy && qatomic_mb_read(&tail) == &dummy.next) {
>    abort();
>}
>This is the reason why QEMU crashed. Since the latest QEMU cannot run
>in our environment due to incompatibility of vhost-user messages, and
>this problem is difficult to reproduce, I was unable to reproduce the
>problem on the latest qemu. However, from the upstream code, since
>VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG is asynchronous, we cannot
>assume that all VhostUserHostNotifier->addr in vhost_user_cleanup are
>empty. If addr is not empty, there will be a problem. Otherwise,
>vhost_user_host_notifier_remove is not necessary to call in
>vhost_user_cleanup.
>
>Therefore, the two tasks of vhost_user_host_notifier_free and free
>VhostUserHostNotifier need to be placed in a rcu node, and use
>n->need_free to determine whether it is free VhostUserHostNotifier.

yeah, thank you for this detailed analysis, now it's clear.

My suggestion is to put some of it in the commit description as well,
maybe a summary with the main things.

>
>Stefano Garzarella <sgarzare@redhat.com> 于2024年8月27日周二 16:00写道:
>>
>> On Wed, Aug 07, 2024 at 05:55:08PM GMT, yaozhenguo wrote:
>> >When hotplug and hotunplug vhost-user-net device quickly.
>>
>> I'd replace the . with ,
>>
>> >qemu will crash. BT is as below:
>> >
>> >0  __pthread_kill_implementation () at /usr/lib64/libc.so.6
>> >1  raise () at /usr/lib64/libc.so.6
>> >2  abort () at /usr/lib64/libc.so.6
>> >3  try_dequeue () at ../util/rcu.c:235
>> >4  call_rcu_thread (opaque=opaque@entry=0x0) at ../util/rcu.c:288
>> >5  qemu_thread_start (args=0x55b10d9ceaa0) at ../util/qemu-thread-posix.c:541
>> >6  start_thread () at /usr/lib64/libc.so.6
>> >7  clone3 () at /usr/lib64/libc.so.6
>> >
>> >1. device_del qmp process
>> >
>> >virtio_set_status
>> >  vhost_dev_stop
>> >    vhost_user_get_vring_base
>> >      vhost_user_host_notifier_remove
>> >
>> >vhost_user_slave_handle_vring_host_notifier maybe called asynchronous after
>>   ^
>> Now it's called vhost_user_backend_handle_vring_host_notifier, I'd
>> suggest to use the new name.
>>
>> >vhost_user_host_notifier_remove. vhost_user_host_notifier_remove will
>> >not
>> >all call_rcu because of notifier->addr is NULL at this time.
>>
>> s/all/call ?
>>
>Yes
>> >
>> >2. netdev_del qmp process
>> >
>> >vhost_user_cleanup
>> >       vhost_user_host_notifier_remove
>> >       g_free_rcu
>> >
>> >vhost_user_host_notifier_remove and g_free_rcu will sumbit same rcu_head
>>
>> s/sumbit/submit
>>
>Sorry about this mistake

don't apologize, it can happen ;-)

I usually use `--codespell` with ./scripts/checkpatch.pl

>> >to rcu node list. rcu_call_count add twice but only one node is added.
>> >rcu thread will abort when calling try_dequeue with node list is empty.
>>
>> What's not clear to me is how 1 and 2 are related, could you explain
>> that?
>>
>Libvirt will send two QMP commands in succession when the network card
>is hot-unplugged, which can help understand the problem. However, I
>did not describe it clearly. Please see the detailed description at
>the top.

yep, now I got it.

>> >Fix this by moving g_free(n) to vhost_user_host_notifier_free.
>> >`
>> >Fixes: 503e355465 ("virtio/vhost-user: dynamically assign VhostUserHostNotifiers")
>> >Signed-off-by: yaozhenguo <yaozhenguo@jd.com>
>> >---
>> > hw/virtio/vhost-user.c         | 23 +++++++++++------------
>> > include/hw/virtio/vhost-user.h |  1 +
>> > 2 files changed, 12 insertions(+), 12 deletions(-)
>> >
>> >diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
>> >index 00561daa06..7ab37c0da2 100644
>> >--- a/hw/virtio/vhost-user.c
>> >+++ b/hw/virtio/vhost-user.c
>> >@@ -1188,6 +1188,12 @@ static void vhost_user_host_notifier_free(VhostUserHostNotifier *n)
>> >     assert(n && n->unmap_addr);
>> >     munmap(n->unmap_addr, qemu_real_host_page_size());
>> >     n->unmap_addr = NULL;
>> >+    if (n->need_free) {
>> >+        memory_region_transaction_begin();
>> >+        object_unparent(OBJECT(&n->mr));
>> >+        memory_region_transaction_commit();
>> >+        g_free(n);
>> >+    }
>> > }
>> >
>> > /*
>> >@@ -1195,7 +1201,7 @@ static void vhost_user_host_notifier_free(VhostUserHostNotifier *n)
>> >  * under rcu.
>> >  */
>> > static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n,
>> >-                                            VirtIODevice *vdev)
>> >+                                            VirtIODevice *vdev, bool free)
>> > {
>> >     if (n->addr) {
>> >         if (vdev) {
>> >@@ -1204,6 +1210,7 @@ static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n,
>> >         assert(!n->unmap_addr);
>> >         n->unmap_addr = n->addr;
>> >         n->addr = NULL;
>> >+        n->need_free = free;
>> >         call_rcu(n, vhost_user_host_notifier_free, rcu);
>> >     }
>> > }
>> >@@ -1280,7 +1287,7 @@ static int vhost_user_get_vring_base(struct vhost_dev *dev,
>> >
>> >     VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index);
>> >     if (n) {
>> >-        vhost_user_host_notifier_remove(n, dev->vdev);
>> >+        vhost_user_host_notifier_remove(n, dev->vdev, false);
>> >     }
>> >
>> >     ret = vhost_user_write(dev, &msg, NULL, 0);
>> >@@ -1562,7 +1569,7 @@ static int 
>> >vhost_user_backend_handle_vring_host_notifier(struct vhost_dev *dev,
>> >      * new mapped address.
>> >      *)
>> >     n = fetch_or_create_notifier(user, queue_idx);
>> >-    vhost_user_host_notifier_remove(n, vdev);
>> >+    vhost_user_host_notifier_remove(n, vdev, false);
>> >
>> >     if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
>> >         return 0;
>> >@@ -2737,13 +2744,7 @@ static void vhost_user_state_destroy(gpointer data)
>> > {
>> >     VhostUserHostNotifier *n = (VhostUserHostNotifier *) data;
>> >     if (n) {
>> >-        vhost_user_host_notifier_remove(n, NULL);
>> >-        object_unparent(OBJECT(&n->mr));
>> >-        /*
>> >-         * We can't free until vhost_user_host_notifier_remove has
>> >-         * done it's thing so schedule the free with RCU.
>> >-         */
>> >-        g_free_rcu(n, rcu);
>> >+        vhost_user_host_notifier_remove(n, NULL, true);
>>
>> I'm not sure I understand the problem well, but could it be that now we
>> don't see the problem anymore, but we have a memory leak?
>>
>> Here for example could it be the case that `n->addr` is NULL and
>> therefore `vhost_user_host_notifier_free` with `n->need_free = true`
>> will never be submitted?
>>
>Please see the detailed description at the top.

I see it, but my question is still there.

I mean, in vhost_user_host_notifier_free() should we call the new steps
you added (object_unparent, g_free, etc.), also if `n->addr` is NULL ?

>> >     }
>> > }
>> >
>> >@@ -2765,9 +2766,7 @@ void vhost_user_cleanup(VhostUserState *user)
>> >     if (!user->chr) {
>> >         return;
>> >     }
>> >-    memory_region_transaction_begin();
>> >     user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers,
>> >     true);
>> >-    memory_region_transaction_commit();
>>
>> This is no longer necessary, because the `user->notifiers` free function
>> no longer calls `object_unparent(OBJECT(&n->mr))`, right?
>>
>> Maybe it's worth mentioning in the commit description.
>>
>Yes, I originally thought so. But after reading the code carefully, I
>found that my understanding was not correct.It is still needed here,
>because the user->notifiers free function will call
>virtio_queue_set_host_notifier_mr. This should also require
>memory_region_transaction_commit. Is this correct?

yeah, I think so. For examples virtio_pci_set_host_notifier_mr() is
calling memory_region_add_subregion_overlap() or
memory_region_del_subregion()

Thanks,
Stefano

>
>> >     user->chr = NULL;
>> > }
>> >
>> >diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h
>> >index 324cd8663a..a171f29e0b 100644
>> >--- a/include/hw/virtio/vhost-user.h
>> >+++ b/include/hw/virtio/vhost-user.h
>> >@@ -54,6 +54,7 @@ typedef struct VhostUserHostNotifier {
>> >     void *addr;
>> >     void *unmap_addr;
>> >     int idx;
>> >+    bool need_free;
>> > } VhostUserHostNotifier;
>> >
>> > /**
>> >--
>> >2.43.0
>> >
>>
>
yaozhenguo Sept. 11, 2024, 2:56 a.m. UTC | #5
Stefano Garzarella <sgarzare@redhat.com> 于2024年9月3日周二 18:05写道:
>
> On Wed, Aug 28, 2024 at 02:50:57PM GMT, Zhenguo Yao wrote:
> >I am very sorry that my previous description was not accurate. Below I
> >will describe the steps to reproduce this problem and my analysis in
> >detail.The conditions for reproducing this problem are quite
> >demanding. First, let me introduce my environment. I use DPDK vdpa to
> >drive
> >DPU to implement a vhost-user type network card. DPDK vdpa software
> >communicates with QEMU through the vhost protocol. The steps to
> >reproduce are as follows:
> >
> >1. Start a Windows virtual machine.
> >2. Use the vish command to hotplug and unplug a 32 queues
> >vhost-user-net network card.
> >3. After a while, QEMU will crash.
> >
> >I added some logs to locate this problem. The following is how I
> >located the root cause of this problem based on logs and code
> >analysis. The steps to reproduce the problem involve hot plugging and
> >hot unplugging of network cards. Hot plugging of network cards
> >involves two processes. Process A is to insert the device into qemu,
> >and process B is the process of guest operating system initializing
> >the network card. When process A is completed, the virsh attach-device
> >command returns. At this time, you can call virsh detach-device to
> >perform hot unplug operations. Generally, this will not be a problem
> >because the network card initializes very quickly. However, since my
> >network card is a vhost-user type network card, and it is implemented
> >by DPDK vdpa, there is an asynchronous situation. When the Guest
> >operating system is initializing the network card, some messages from
> >vhost-user returned to QEMU may be delayed. The problem occurs here.
> >
> >For example, the message VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG is
> >responsible for assigning the value of VhostUserHostNotifier->addr,
> >which is the hot plug process B. There are also two processes for hot
> >unplugging devices. libvirt will send two QMP commands to qemu: one is
> >device_del and the other is netdev_del. If this message arrives after
> >the first hot unplug command, there will be problems. The following is
> >a detailed analysis: The key function of the device_del command
> >execution: virtio_set_status->vhost_dev_stop. In the vhost_dev_stop
> >function, all queues will be traversed and
> >vhost_user_get_vring_base-->vhost_user_host_notifier_remove will be
> >called because VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG has not
> >arrived in some queues at this time, so VhostUserHostNotifier->addr
> >has no value at this time, so nothing will be done at this time. For
> >those with values, vhost_user_host_notifier_free will be submitted to
> >the rcu thread
> >and clear n->addr.
> >
> >Next, libvirt will send the netdev_del QMP command.
> >netdev_del--> vhost_user_cleanup
> >
> >Because some queues VhostUserHostNotifier->addr are not empty at this
> >time, there will be the following call path:
> >
> >1. vhost_user_host_notifier_remove->call_rcu(n,
> >vhost_user_host_notifier_free, rcu);
> >2. g_free_rcu(n, rcu);
>
> Got it, so call_rcu() and g_free_rcu() must be avoided on the same node
> in any case, right?
>
Yes
> I went through docs/devel/rcu.txt and code, it's not explicit, but it
> seems clear that only one should be used for cleanup.
>
If two identical nodes are added to a singly linked list in
succession, the singly linked list will become a loop. However, the
rcu linked list is somewhat special, as it has a dummy node. When
queue_pop pops this dummy node, it will be added back to the end of
the queue. This will open the loop that was just formed, and thus
cause the abort in this problem.

> >
> >The same rcu was submitted twice. In call_rcu1, if two identical nodes
> >are added to the node list, only one will be successfully added, but
> >rcu_call_count will be added twice. When rcu processes rcu node, it
> >will check whether rcu_call_count is empty. If not, it will take the
> >node from node list. Because the actual node in node list is one less
> >than rcu_call_count, it will cause rcu_call_count to be not empty, but
> >there is no node in node list. In this way, rcu thread will abort, the
> >code is as follows:
> >if (head == &dummy && qatomic_mb_read(&tail) == &dummy.next) {
> >    abort();
> >}
> >This is the reason why QEMU crashed. Since the latest QEMU cannot run
> >in our environment due to incompatibility of vhost-user messages, and
> >this problem is difficult to reproduce, I was unable to reproduce the
> >problem on the latest qemu. However, from the upstream code, since
> >VHOST_USER_BACKEND_VRING_HOST_NOTIFIER_MSG is asynchronous, we cannot
> >assume that all VhostUserHostNotifier->addr in vhost_user_cleanup are
> >empty. If addr is not empty, there will be a problem. Otherwise,
> >vhost_user_host_notifier_remove is not necessary to call in
> >vhost_user_cleanup.
> >
> >Therefore, the two tasks of vhost_user_host_notifier_free and free
> >VhostUserHostNotifier need to be placed in a rcu node, and use
> >n->need_free to determine whether it is free VhostUserHostNotifier.
>
> yeah, thank you for this detailed analysis, now it's clear.
>
> My suggestion is to put some of it in the commit description as well,
> maybe a summary with the main things.
>
Yes,I will submit another patch with a description and summary of the problem.
> >
> >Stefano Garzarella <sgarzare@redhat.com> 于2024年8月27日周二 16:00写道:
> >>
> >> On Wed, Aug 07, 2024 at 05:55:08PM GMT, yaozhenguo wrote:
> >> >When hotplug and hotunplug vhost-user-net device quickly.
> >>
> >> I'd replace the . with ,
> >>
> >> >qemu will crash. BT is as below:
> >> >
> >> >0  __pthread_kill_implementation () at /usr/lib64/libc.so.6
> >> >1  raise () at /usr/lib64/libc.so.6
> >> >2  abort () at /usr/lib64/libc.so.6
> >> >3  try_dequeue () at ../util/rcu.c:235
> >> >4  call_rcu_thread (opaque=opaque@entry=0x0) at ../util/rcu.c:288
> >> >5  qemu_thread_start (args=0x55b10d9ceaa0) at ../util/qemu-thread-posix.c:541
> >> >6  start_thread () at /usr/lib64/libc.so.6
> >> >7  clone3 () at /usr/lib64/libc.so.6
> >> >
> >> >1. device_del qmp process
> >> >
> >> >virtio_set_status
> >> >  vhost_dev_stop
> >> >    vhost_user_get_vring_base
> >> >      vhost_user_host_notifier_remove
> >> >
> >> >vhost_user_slave_handle_vring_host_notifier maybe called asynchronous after
> >>   ^
> >> Now it's called vhost_user_backend_handle_vring_host_notifier, I'd
> >> suggest to use the new name.
> >>
> >> >vhost_user_host_notifier_remove. vhost_user_host_notifier_remove will
> >> >not
> >> >all call_rcu because of notifier->addr is NULL at this time.
> >>
> >> s/all/call ?
> >>
> >Yes
> >> >
> >> >2. netdev_del qmp process
> >> >
> >> >vhost_user_cleanup
> >> >       vhost_user_host_notifier_remove
> >> >       g_free_rcu
> >> >
> >> >vhost_user_host_notifier_remove and g_free_rcu will sumbit same rcu_head
> >>
> >> s/sumbit/submit
> >>
> >Sorry about this mistake
>
> don't apologize, it can happen ;-)
>
> I usually use `--codespell` with ./scripts/checkpatch.pl
>
Great! I learned a new skill.

> >> >to rcu node list. rcu_call_count add twice but only one node is added.
> >> >rcu thread will abort when calling try_dequeue with node list is empty.
> >>
> >> What's not clear to me is how 1 and 2 are related, could you explain
> >> that?
> >>
> >Libvirt will send two QMP commands in succession when the network card
> >is hot-unplugged, which can help understand the problem. However, I
> >did not describe it clearly. Please see the detailed description at
> >the top.
>
> yep, now I got it.
>
> >> >Fix this by moving g_free(n) to vhost_user_host_notifier_free.
> >> >`
> >> >Fixes: 503e355465 ("virtio/vhost-user: dynamically assign VhostUserHostNotifiers")
> >> >Signed-off-by: yaozhenguo <yaozhenguo@jd.com>
> >> >---
> >> > hw/virtio/vhost-user.c         | 23 +++++++++++------------
> >> > include/hw/virtio/vhost-user.h |  1 +
> >> > 2 files changed, 12 insertions(+), 12 deletions(-)
> >> >
> >> >diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> >> >index 00561daa06..7ab37c0da2 100644
> >> >--- a/hw/virtio/vhost-user.c
> >> >+++ b/hw/virtio/vhost-user.c
> >> >@@ -1188,6 +1188,12 @@ static void vhost_user_host_notifier_free(VhostUserHostNotifier *n)
> >> >     assert(n && n->unmap_addr);
> >> >     munmap(n->unmap_addr, qemu_real_host_page_size());
> >> >     n->unmap_addr = NULL;
> >> >+    if (n->need_free) {
> >> >+        memory_region_transaction_begin();
> >> >+        object_unparent(OBJECT(&n->mr));
> >> >+        memory_region_transaction_commit();
> >> >+        g_free(n);
> >> >+    }
> >> > }
> >> >
> >> > /*
> >> >@@ -1195,7 +1201,7 @@ static void vhost_user_host_notifier_free(VhostUserHostNotifier *n)
> >> >  * under rcu.
> >> >  */
> >> > static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n,
> >> >-                                            VirtIODevice *vdev)
> >> >+                                            VirtIODevice *vdev, bool free)
> >> > {
> >> >     if (n->addr) {
> >> >         if (vdev) {
> >> >@@ -1204,6 +1210,7 @@ static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n,
> >> >         assert(!n->unmap_addr);
> >> >         n->unmap_addr = n->addr;
> >> >         n->addr = NULL;
> >> >+        n->need_free = free;
> >> >         call_rcu(n, vhost_user_host_notifier_free, rcu);
> >> >     }
> >> > }
> >> >@@ -1280,7 +1287,7 @@ static int vhost_user_get_vring_base(struct vhost_dev *dev,
> >> >
> >> >     VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index);
> >> >     if (n) {
> >> >-        vhost_user_host_notifier_remove(n, dev->vdev);
> >> >+        vhost_user_host_notifier_remove(n, dev->vdev, false);
> >> >     }
> >> >
> >> >     ret = vhost_user_write(dev, &msg, NULL, 0);
> >> >@@ -1562,7 +1569,7 @@ static int
> >> >vhost_user_backend_handle_vring_host_notifier(struct vhost_dev *dev,
> >> >      * new mapped address.
> >> >      *)
> >> >     n = fetch_or_create_notifier(user, queue_idx);
> >> >-    vhost_user_host_notifier_remove(n, vdev);
> >> >+    vhost_user_host_notifier_remove(n, vdev, false);
> >> >
> >> >     if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
> >> >         return 0;
> >> >@@ -2737,13 +2744,7 @@ static void vhost_user_state_destroy(gpointer data)
> >> > {
> >> >     VhostUserHostNotifier *n = (VhostUserHostNotifier *) data;
> >> >     if (n) {
> >> >-        vhost_user_host_notifier_remove(n, NULL);
> >> >-        object_unparent(OBJECT(&n->mr));
> >> >-        /*
> >> >-         * We can't free until vhost_user_host_notifier_remove has
> >> >-         * done it's thing so schedule the free with RCU.
> >> >-         */
> >> >-        g_free_rcu(n, rcu);
> >> >+        vhost_user_host_notifier_remove(n, NULL, true);
> >>
> >> I'm not sure I understand the problem well, but could it be that now we
> >> don't see the problem anymore, but we have a memory leak?
> >>
> >> Here for example could it be the case that `n->addr` is NULL and
> >> therefore `vhost_user_host_notifier_free` with `n->need_free = true`
> >> will never be submitted?
> >>
> >Please see the detailed description at the top.
>
> I see it, but my question is still there.
>
> I mean, in vhost_user_host_notifier_free() should we call the new steps
> you added (object_unparent, g_free, etc.), also if `n->addr` is NULL ?
>
This is indeed a problem, there is a memory leak. I will fix it in the
next version.
> >> >     }
> >> > }
> >> >
> >> >@@ -2765,9 +2766,7 @@ void vhost_user_cleanup(VhostUserState *user)
> >> >     if (!user->chr) {
> >> >         return;
> >> >     }
> >> >-    memory_region_transaction_begin();
> >> >     user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers,
> >> >     true);
> >> >-    memory_region_transaction_commit();
> >>
> >> This is no longer necessary, because the `user->notifiers` free function
> >> no longer calls `object_unparent(OBJECT(&n->mr))`, right?
> >>
> >> Maybe it's worth mentioning in the commit description.
> >>
> >Yes, I originally thought so. But after reading the code carefully, I
> >found that my understanding was not correct.It is still needed here,
> >because the user->notifiers free function will call
> >virtio_queue_set_host_notifier_mr. This should also require
> >memory_region_transaction_commit. Is this correct?
>
> yeah, I think so. For examples virtio_pci_set_host_notifier_mr() is
> calling memory_region_add_subregion_overlap() or
> memory_region_del_subregion()
>
I will add it back in the next version.
> Thanks,
> Stefano
>
> >
> >> >     user->chr = NULL;
> >> > }
> >> >
> >> >diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h
> >> >index 324cd8663a..a171f29e0b 100644
> >> >--- a/include/hw/virtio/vhost-user.h
> >> >+++ b/include/hw/virtio/vhost-user.h
> >> >@@ -54,6 +54,7 @@ typedef struct VhostUserHostNotifier {
> >> >     void *addr;
> >> >     void *unmap_addr;
> >> >     int idx;
> >> >+    bool need_free;
> >> > } VhostUserHostNotifier;
> >> >
> >> > /**
> >> >--
> >> >2.43.0
> >> >
> >>
> >
>
diff mbox series

Patch

diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 00561daa06..7ab37c0da2 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -1188,6 +1188,12 @@  static void vhost_user_host_notifier_free(VhostUserHostNotifier *n)
     assert(n && n->unmap_addr);
     munmap(n->unmap_addr, qemu_real_host_page_size());
     n->unmap_addr = NULL;
+    if (n->need_free) {
+        memory_region_transaction_begin();
+        object_unparent(OBJECT(&n->mr));
+        memory_region_transaction_commit();
+        g_free(n);
+    }
 }
 
 /*
@@ -1195,7 +1201,7 @@  static void vhost_user_host_notifier_free(VhostUserHostNotifier *n)
  * under rcu.
  */
 static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n,
-                                            VirtIODevice *vdev)
+                                            VirtIODevice *vdev, bool free)
 {
     if (n->addr) {
         if (vdev) {
@@ -1204,6 +1210,7 @@  static void vhost_user_host_notifier_remove(VhostUserHostNotifier *n,
         assert(!n->unmap_addr);
         n->unmap_addr = n->addr;
         n->addr = NULL;
+        n->need_free = free;
         call_rcu(n, vhost_user_host_notifier_free, rcu);
     }
 }
@@ -1280,7 +1287,7 @@  static int vhost_user_get_vring_base(struct vhost_dev *dev,
 
     VhostUserHostNotifier *n = fetch_notifier(u->user, ring->index);
     if (n) {
-        vhost_user_host_notifier_remove(n, dev->vdev);
+        vhost_user_host_notifier_remove(n, dev->vdev, false);
     }
 
     ret = vhost_user_write(dev, &msg, NULL, 0);
@@ -1562,7 +1569,7 @@  static int vhost_user_backend_handle_vring_host_notifier(struct vhost_dev *dev,
      * new mapped address.
      */
     n = fetch_or_create_notifier(user, queue_idx);
-    vhost_user_host_notifier_remove(n, vdev);
+    vhost_user_host_notifier_remove(n, vdev, false);
 
     if (area->u64 & VHOST_USER_VRING_NOFD_MASK) {
         return 0;
@@ -2737,13 +2744,7 @@  static void vhost_user_state_destroy(gpointer data)
 {
     VhostUserHostNotifier *n = (VhostUserHostNotifier *) data;
     if (n) {
-        vhost_user_host_notifier_remove(n, NULL);
-        object_unparent(OBJECT(&n->mr));
-        /*
-         * We can't free until vhost_user_host_notifier_remove has
-         * done it's thing so schedule the free with RCU.
-         */
-        g_free_rcu(n, rcu);
+        vhost_user_host_notifier_remove(n, NULL, true);
     }
 }
 
@@ -2765,9 +2766,7 @@  void vhost_user_cleanup(VhostUserState *user)
     if (!user->chr) {
         return;
     }
-    memory_region_transaction_begin();
     user->notifiers = (GPtrArray *) g_ptr_array_free(user->notifiers, true);
-    memory_region_transaction_commit();
     user->chr = NULL;
 }
 
diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h
index 324cd8663a..a171f29e0b 100644
--- a/include/hw/virtio/vhost-user.h
+++ b/include/hw/virtio/vhost-user.h
@@ -54,6 +54,7 @@  typedef struct VhostUserHostNotifier {
     void *addr;
     void *unmap_addr;
     int idx;
+    bool need_free;
 } VhostUserHostNotifier;
 
 /**