diff mbox series

[4/7] virtio: don't read pending event on host notifier if disabled

Message ID 1648621997-22416-5-git-send-email-si-wei.liu@oracle.com (mailing list archive)
State New, archived
Headers show
Series vhost-vdpa multiqueue fixes | expand

Commit Message

Si-Wei Liu March 30, 2022, 6:33 a.m. UTC
Previous commit prevents vhost-user and vhost-vdpa from using
userland vq handler via disable_ioeventfd_handler. The same
needs to be done for host notifier cleanup too, as the
virtio_queue_host_notifier_read handler still tends to read
pending event left behind on ioeventfd and attempts to handle
outstanding kicks from QEMU userland vq.

If vq handler is not disabled on cleanup, it may lead to sigsegv
with recursive virtio_net_set_status call on the control vq:

0  0x00007f8ce3ff3387 in raise () at /lib64/libc.so.6
1  0x00007f8ce3ff4a78 in abort () at /lib64/libc.so.6
2  0x00007f8ce3fec1a6 in __assert_fail_base () at /lib64/libc.so.6
3  0x00007f8ce3fec252 in  () at /lib64/libc.so.6
4  0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=<optimized out>, idx=<optimized out>) at ../hw/virtio/vhost-vdpa.c:563
5  0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=<optimized out>, idx=<optimized out>) at ../hw/virtio/vhost-vdpa.c:558
6  0x0000558f52d7329a in vhost_virtqueue_mask (hdev=0x558f55c01800, vdev=0x558f568f91f0, n=2, mask=<optimized out>) at ../hw/virtio/vhost.c:1557
7  0x0000558f52c6b89a in virtio_pci_set_guest_notifier (d=d@entry=0x558f568f0f60, n=n@entry=2, assign=assign@entry=true, with_irqfd=with_irqfd@entry=false)
   at ../hw/virtio/virtio-pci.c:974
8  0x0000558f52c6c0d8 in virtio_pci_set_guest_notifiers (d=0x558f568f0f60, nvqs=3, assign=true) at ../hw/virtio/virtio-pci.c:1019
9  0x0000558f52bf091d in vhost_net_start (dev=dev@entry=0x558f568f91f0, ncs=0x558f56937cd0, data_queue_pairs=data_queue_pairs@entry=1, cvq=cvq@entry=1)
   at ../hw/net/vhost_net.c:361
10 0x0000558f52d4e5e7 in virtio_net_set_status (status=<optimized out>, n=0x558f568f91f0) at ../hw/net/virtio-net.c:289
11 0x0000558f52d4e5e7 in virtio_net_set_status (vdev=0x558f568f91f0, status=15 '\017') at ../hw/net/virtio-net.c:370
12 0x0000558f52d6c4b2 in virtio_set_status (vdev=vdev@entry=0x558f568f91f0, val=val@entry=15 '\017') at ../hw/virtio/virtio.c:1945
13 0x0000558f52c69eff in virtio_pci_common_write (opaque=0x558f568f0f60, addr=<optimized out>, val=<optimized out>, size=<optimized out>) at ../hw/virtio/virtio-pci.c:1292
14 0x0000558f52d15d6e in memory_region_write_accessor (mr=0x558f568f19d0, addr=20, value=<optimized out>, size=1, shift=<optimized out>, mask=<optimized out>, attrs=...)
   at ../softmmu/memory.c:492
15 0x0000558f52d127de in access_with_adjusted_size (addr=addr@entry=20, value=value@entry=0x7f8cdbffe748, size=size@entry=1, access_size_min=<optimized out>, access_size_max=<optimized out>, access_fn=0x558f52d15cf0 <memory_region_write_accessor>, mr=0x558f568f19d0, attrs=...) at ../softmmu/memory.c:554
16 0x0000558f52d157ef in memory_region_dispatch_write (mr=mr@entry=0x558f568f19d0, addr=20, data=<optimized out>, op=<optimized out>, attrs=attrs@entry=...)
   at ../softmmu/memory.c:1504
17 0x0000558f52d078e7 in flatview_write_continue (fv=fv@entry=0x7f8accbc3b90, addr=addr@entry=103079215124, attrs=..., ptr=ptr@entry=0x7f8ce6300028, len=len@entry=1, addr1=<optimized out>, l=<optimized out>, mr=0x558f568f19d0) at ../../../include/qemu/host-utils.h:165
18 0x0000558f52d07b06 in flatview_write (fv=0x7f8accbc3b90, addr=103079215124, attrs=..., buf=0x7f8ce6300028, len=1) at ../softmmu/physmem.c:2822
19 0x0000558f52d0b36b in address_space_write (as=<optimized out>, addr=<optimized out>, attrs=..., buf=buf@entry=0x7f8ce6300028, len=<optimized out>)
   at ../softmmu/physmem.c:2914
20 0x0000558f52d0b3da in address_space_rw (as=<optimized out>, addr=<optimized out>, attrs=...,
   attrs@entry=..., buf=buf@entry=0x7f8ce6300028, len=<optimized out>, is_write=<optimized out>) at ../softmmu/physmem.c:2924
21 0x0000558f52dced09 in kvm_cpu_exec (cpu=cpu@entry=0x558f55c2da60) at ../accel/kvm/kvm-all.c:2903
22 0x0000558f52dcfabd in kvm_vcpu_thread_fn (arg=arg@entry=0x558f55c2da60) at ../accel/kvm/kvm-accel-ops.c:49
23 0x0000558f52f9f04a in qemu_thread_start (args=<optimized out>) at ../util/qemu-thread-posix.c:556
24 0x00007f8ce4392ea5 in start_thread () at /lib64/libpthread.so.0
25 0x00007f8ce40bb9fd in clone () at /lib64/libc.so.6

Fixes: 4023784 ("vhost-vdpa: multiqueue support")
Cc: Jason Wang <jasowang@redhat.com>
Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
---
 hw/virtio/virtio-bus.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

Comments

Jason Wang March 30, 2022, 9:14 a.m. UTC | #1
On Wed, Mar 30, 2022 at 2:33 PM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>
> Previous commit prevents vhost-user and vhost-vdpa from using
> userland vq handler via disable_ioeventfd_handler. The same
> needs to be done for host notifier cleanup too, as the
> virtio_queue_host_notifier_read handler still tends to read
> pending event left behind on ioeventfd and attempts to handle
> outstanding kicks from QEMU userland vq.
>
> If vq handler is not disabled on cleanup, it may lead to sigsegv
> with recursive virtio_net_set_status call on the control vq:
>
> 0  0x00007f8ce3ff3387 in raise () at /lib64/libc.so.6
> 1  0x00007f8ce3ff4a78 in abort () at /lib64/libc.so.6
> 2  0x00007f8ce3fec1a6 in __assert_fail_base () at /lib64/libc.so.6
> 3  0x00007f8ce3fec252 in  () at /lib64/libc.so.6
> 4  0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=<optimized out>, idx=<optimized out>) at ../hw/virtio/vhost-vdpa.c:563
> 5  0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=<optimized out>, idx=<optimized out>) at ../hw/virtio/vhost-vdpa.c:558
> 6  0x0000558f52d7329a in vhost_virtqueue_mask (hdev=0x558f55c01800, vdev=0x558f568f91f0, n=2, mask=<optimized out>) at ../hw/virtio/vhost.c:1557

I feel it's probably a bug elsewhere e.g when we fail to start
vhost-vDPA, it's the charge of the Qemu to poll host notifier and we
will fallback to the userspace vq handler.

Thanks

> 7  0x0000558f52c6b89a in virtio_pci_set_guest_notifier (d=d@entry=0x558f568f0f60, n=n@entry=2, assign=assign@entry=true, with_irqfd=with_irqfd@entry=false)
>    at ../hw/virtio/virtio-pci.c:974
> 8  0x0000558f52c6c0d8 in virtio_pci_set_guest_notifiers (d=0x558f568f0f60, nvqs=3, assign=true) at ../hw/virtio/virtio-pci.c:1019
> 9  0x0000558f52bf091d in vhost_net_start (dev=dev@entry=0x558f568f91f0, ncs=0x558f56937cd0, data_queue_pairs=data_queue_pairs@entry=1, cvq=cvq@entry=1)
>    at ../hw/net/vhost_net.c:361
> 10 0x0000558f52d4e5e7 in virtio_net_set_status (status=<optimized out>, n=0x558f568f91f0) at ../hw/net/virtio-net.c:289
> 11 0x0000558f52d4e5e7 in virtio_net_set_status (vdev=0x558f568f91f0, status=15 '\017') at ../hw/net/virtio-net.c:370
> 12 0x0000558f52d6c4b2 in virtio_set_status (vdev=vdev@entry=0x558f568f91f0, val=val@entry=15 '\017') at ../hw/virtio/virtio.c:1945
> 13 0x0000558f52c69eff in virtio_pci_common_write (opaque=0x558f568f0f60, addr=<optimized out>, val=<optimized out>, size=<optimized out>) at ../hw/virtio/virtio-pci.c:1292
> 14 0x0000558f52d15d6e in memory_region_write_accessor (mr=0x558f568f19d0, addr=20, value=<optimized out>, size=1, shift=<optimized out>, mask=<optimized out>, attrs=...)
>    at ../softmmu/memory.c:492
> 15 0x0000558f52d127de in access_with_adjusted_size (addr=addr@entry=20, value=value@entry=0x7f8cdbffe748, size=size@entry=1, access_size_min=<optimized out>, access_size_max=<optimized out>, access_fn=0x558f52d15cf0 <memory_region_write_accessor>, mr=0x558f568f19d0, attrs=...) at ../softmmu/memory.c:554
> 16 0x0000558f52d157ef in memory_region_dispatch_write (mr=mr@entry=0x558f568f19d0, addr=20, data=<optimized out>, op=<optimized out>, attrs=attrs@entry=...)
>    at ../softmmu/memory.c:1504
> 17 0x0000558f52d078e7 in flatview_write_continue (fv=fv@entry=0x7f8accbc3b90, addr=addr@entry=103079215124, attrs=..., ptr=ptr@entry=0x7f8ce6300028, len=len@entry=1, addr1=<optimized out>, l=<optimized out>, mr=0x558f568f19d0) at ../../../include/qemu/host-utils.h:165
> 18 0x0000558f52d07b06 in flatview_write (fv=0x7f8accbc3b90, addr=103079215124, attrs=..., buf=0x7f8ce6300028, len=1) at ../softmmu/physmem.c:2822
> 19 0x0000558f52d0b36b in address_space_write (as=<optimized out>, addr=<optimized out>, attrs=..., buf=buf@entry=0x7f8ce6300028, len=<optimized out>)
>    at ../softmmu/physmem.c:2914
> 20 0x0000558f52d0b3da in address_space_rw (as=<optimized out>, addr=<optimized out>, attrs=...,
>    attrs@entry=..., buf=buf@entry=0x7f8ce6300028, len=<optimized out>, is_write=<optimized out>) at ../softmmu/physmem.c:2924
> 21 0x0000558f52dced09 in kvm_cpu_exec (cpu=cpu@entry=0x558f55c2da60) at ../accel/kvm/kvm-all.c:2903
> 22 0x0000558f52dcfabd in kvm_vcpu_thread_fn (arg=arg@entry=0x558f55c2da60) at ../accel/kvm/kvm-accel-ops.c:49
> 23 0x0000558f52f9f04a in qemu_thread_start (args=<optimized out>) at ../util/qemu-thread-posix.c:556
> 24 0x00007f8ce4392ea5 in start_thread () at /lib64/libpthread.so.0
> 25 0x00007f8ce40bb9fd in clone () at /lib64/libc.so.6
>
> Fixes: 4023784 ("vhost-vdpa: multiqueue support")
> Cc: Jason Wang <jasowang@redhat.com>
> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
> ---
>  hw/virtio/virtio-bus.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
> index 0f69d1c..3159b58 100644
> --- a/hw/virtio/virtio-bus.c
> +++ b/hw/virtio/virtio-bus.c
> @@ -311,7 +311,8 @@ void virtio_bus_cleanup_host_notifier(VirtioBusState *bus, int n)
>      /* Test and clear notifier after disabling event,
>       * in case poll callback didn't have time to run.
>       */
> -    virtio_queue_host_notifier_read(notifier);
> +    if (!vdev->disable_ioeventfd_handler)
> +        virtio_queue_host_notifier_read(notifier);
>      event_notifier_cleanup(notifier);
>  }
>
> --
> 1.8.3.1
>
Si-Wei Liu March 30, 2022, 4:40 p.m. UTC | #2
On 3/30/2022 2:14 AM, Jason Wang wrote:
> On Wed, Mar 30, 2022 at 2:33 PM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>> Previous commit prevents vhost-user and vhost-vdpa from using
>> userland vq handler via disable_ioeventfd_handler. The same
>> needs to be done for host notifier cleanup too, as the
>> virtio_queue_host_notifier_read handler still tends to read
>> pending event left behind on ioeventfd and attempts to handle
>> outstanding kicks from QEMU userland vq.
>>
>> If vq handler is not disabled on cleanup, it may lead to sigsegv
>> with recursive virtio_net_set_status call on the control vq:
>>
>> 0  0x00007f8ce3ff3387 in raise () at /lib64/libc.so.6
>> 1  0x00007f8ce3ff4a78 in abort () at /lib64/libc.so.6
>> 2  0x00007f8ce3fec1a6 in __assert_fail_base () at /lib64/libc.so.6
>> 3  0x00007f8ce3fec252 in  () at /lib64/libc.so.6
>> 4  0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=<optimized out>, idx=<optimized out>) at ../hw/virtio/vhost-vdpa.c:563
>> 5  0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=<optimized out>, idx=<optimized out>) at ../hw/virtio/vhost-vdpa.c:558
>> 6  0x0000558f52d7329a in vhost_virtqueue_mask (hdev=0x558f55c01800, vdev=0x558f568f91f0, n=2, mask=<optimized out>) at ../hw/virtio/vhost.c:1557
> I feel it's probably a bug elsewhere e.g when we fail to start
> vhost-vDPA, it's the charge of the Qemu to poll host notifier and we
> will fallback to the userspace vq handler.
Apologies, an incorrect stack trace was pasted which actually came from 
patch #1. I will post a v2 with the corresponding one as below:

0  0x000055f800df1780 in qdev_get_parent_bus (dev=0x0) at 
../hw/core/qdev.c:376
1  0x000055f800c68ad8 in virtio_bus_device_iommu_enabled 
(vdev=vdev@entry=0x0) at ../hw/virtio/virtio-bus.c:331
2  0x000055f800d70d7f in vhost_memory_unmap (dev=<optimized out>) at 
../hw/virtio/vhost.c:318
3  0x000055f800d70d7f in vhost_memory_unmap (dev=<optimized out>, 
buffer=0x7fc19bec5240, len=2052, is_write=1, access_len=2052) at 
../hw/virtio/vhost.c:336
4  0x000055f800d71867 in vhost_virtqueue_stop 
(dev=dev@entry=0x55f8037ccc30, vdev=vdev@entry=0x55f8044ec590, 
vq=0x55f8037cceb0, idx=0) at ../hw/virtio/vhost.c:1241
5  0x000055f800d7406c in vhost_dev_stop (hdev=hdev@entry=0x55f8037ccc30, 
vdev=vdev@entry=0x55f8044ec590) at ../hw/virtio/vhost.c:1839
6  0x000055f800bf00a7 in vhost_net_stop_one (net=0x55f8037ccc30, 
dev=0x55f8044ec590) at ../hw/net/vhost_net.c:315
7  0x000055f800bf0678 in vhost_net_stop (dev=dev@entry=0x55f8044ec590, 
ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7, 
cvq=cvq@entry=1)
    at ../hw/net/vhost_net.c:423
8  0x000055f800d4e628 in virtio_net_set_status (status=<optimized out>, 
n=0x55f8044ec590) at ../hw/net/virtio-net.c:296
9  0x000055f800d4e628 in virtio_net_set_status 
(vdev=vdev@entry=0x55f8044ec590, status=15 '\017') at 
../hw/net/virtio-net.c:370
10 0x000055f800d534d8 in virtio_net_handle_ctrl (iov_cnt=<optimized 
out>, iov=<optimized out>, cmd=0 '\000', n=0x55f8044ec590) at 
../hw/net/virtio-net.c:1408
11 0x000055f800d534d8 in virtio_net_handle_ctrl (vdev=0x55f8044ec590, 
vq=0x7fc1a7e888d0) at ../hw/net/virtio-net.c:1452
12 0x000055f800d69f37 in virtio_queue_host_notifier_read 
(vq=0x7fc1a7e888d0) at ../hw/virtio/virtio.c:2331
13 0x000055f800d69f37 in virtio_queue_host_notifier_read 
(n=n@entry=0x7fc1a7e8894c) at ../hw/virtio/virtio.c:3575
14 0x000055f800c688e6 in virtio_bus_cleanup_host_notifier 
(bus=<optimized out>, n=n@entry=14) at ../hw/virtio/virtio-bus.c:312
15 0x000055f800d73106 in vhost_dev_disable_notifiers 
(hdev=hdev@entry=0x55f8035b51b0, vdev=vdev@entry=0x55f8044ec590)
    at ../../../include/hw/virtio/virtio-bus.h:35
16 0x000055f800bf00b2 in vhost_net_stop_one (net=0x55f8035b51b0, 
dev=0x55f8044ec590) at ../hw/net/vhost_net.c:316
17 0x000055f800bf0678 in vhost_net_stop (dev=dev@entry=0x55f8044ec590, 
ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7, 
cvq=cvq@entry=1)
    at ../hw/net/vhost_net.c:423
18 0x000055f800d4e628 in virtio_net_set_status (status=<optimized out>, 
n=0x55f8044ec590) at ../hw/net/virtio-net.c:296
19 0x000055f800d4e628 in virtio_net_set_status (vdev=0x55f8044ec590, 
status=15 '\017') at ../hw/net/virtio-net.c:370
20 0x000055f800d6c4b2 in virtio_set_status (vdev=0x55f8044ec590, 
val=<optimized out>) at ../hw/virtio/virtio.c:1945
21 0x000055f800d11d9d in vm_state_notify (running=running@entry=false, 
state=state@entry=RUN_STATE_SHUTDOWN) at ../softmmu/runstate.c:333
22 0x000055f800d04e7a in do_vm_stop 
(state=state@entry=RUN_STATE_SHUTDOWN, send_stop=send_stop@entry=false) 
at ../softmmu/cpus.c:262
23 0x000055f800d04e99 in vm_shutdown () at ../softmmu/cpus.c:280
24 0x000055f800d126af in qemu_cleanup () at ../softmmu/runstate.c:812
25 0x000055f800ad5b13 in main (argc=<optimized out>, argv=<optimized 
out>, envp=<optimized out>) at ../softmmu/main.c:51

 From the trace pending read only occurs in stop path. The recursive 
virtio_net_set_status from virtio_net_handle_ctrl doesn't make sense to me.
Not sure I got the reason why we need to handle pending host 
notification in userland vq, can you elaborate?

Thanks,
-Siwei

>
> Thanks
>
>> 7  0x0000558f52c6b89a in virtio_pci_set_guest_notifier (d=d@entry=0x558f568f0f60, n=n@entry=2, assign=assign@entry=true, with_irqfd=with_irqfd@entry=false)
>>     at ../hw/virtio/virtio-pci.c:974
>> 8  0x0000558f52c6c0d8 in virtio_pci_set_guest_notifiers (d=0x558f568f0f60, nvqs=3, assign=true) at ../hw/virtio/virtio-pci.c:1019
>> 9  0x0000558f52bf091d in vhost_net_start (dev=dev@entry=0x558f568f91f0, ncs=0x558f56937cd0, data_queue_pairs=data_queue_pairs@entry=1, cvq=cvq@entry=1)
>>     at ../hw/net/vhost_net.c:361
>> 10 0x0000558f52d4e5e7 in virtio_net_set_status (status=<optimized out>, n=0x558f568f91f0) at ../hw/net/virtio-net.c:289
>> 11 0x0000558f52d4e5e7 in virtio_net_set_status (vdev=0x558f568f91f0, status=15 '\017') at ../hw/net/virtio-net.c:370
>> 12 0x0000558f52d6c4b2 in virtio_set_status (vdev=vdev@entry=0x558f568f91f0, val=val@entry=15 '\017') at ../hw/virtio/virtio.c:1945
>> 13 0x0000558f52c69eff in virtio_pci_common_write (opaque=0x558f568f0f60, addr=<optimized out>, val=<optimized out>, size=<optimized out>) at ../hw/virtio/virtio-pci.c:1292
>> 14 0x0000558f52d15d6e in memory_region_write_accessor (mr=0x558f568f19d0, addr=20, value=<optimized out>, size=1, shift=<optimized out>, mask=<optimized out>, attrs=...)
>>     at ../softmmu/memory.c:492
>> 15 0x0000558f52d127de in access_with_adjusted_size (addr=addr@entry=20, value=value@entry=0x7f8cdbffe748, size=size@entry=1, access_size_min=<optimized out>, access_size_max=<optimized out>, access_fn=0x558f52d15cf0 <memory_region_write_accessor>, mr=0x558f568f19d0, attrs=...) at ../softmmu/memory.c:554
>> 16 0x0000558f52d157ef in memory_region_dispatch_write (mr=mr@entry=0x558f568f19d0, addr=20, data=<optimized out>, op=<optimized out>, attrs=attrs@entry=...)
>>     at ../softmmu/memory.c:1504
>> 17 0x0000558f52d078e7 in flatview_write_continue (fv=fv@entry=0x7f8accbc3b90, addr=addr@entry=103079215124, attrs=..., ptr=ptr@entry=0x7f8ce6300028, len=len@entry=1, addr1=<optimized out>, l=<optimized out>, mr=0x558f568f19d0) at ../../../include/qemu/host-utils.h:165
>> 18 0x0000558f52d07b06 in flatview_write (fv=0x7f8accbc3b90, addr=103079215124, attrs=..., buf=0x7f8ce6300028, len=1) at ../softmmu/physmem.c:2822
>> 19 0x0000558f52d0b36b in address_space_write (as=<optimized out>, addr=<optimized out>, attrs=..., buf=buf@entry=0x7f8ce6300028, len=<optimized out>)
>>     at ../softmmu/physmem.c:2914
>> 20 0x0000558f52d0b3da in address_space_rw (as=<optimized out>, addr=<optimized out>, attrs=...,
>>     attrs@entry=..., buf=buf@entry=0x7f8ce6300028, len=<optimized out>, is_write=<optimized out>) at ../softmmu/physmem.c:2924
>> 21 0x0000558f52dced09 in kvm_cpu_exec (cpu=cpu@entry=0x558f55c2da60) at ../accel/kvm/kvm-all.c:2903
>> 22 0x0000558f52dcfabd in kvm_vcpu_thread_fn (arg=arg@entry=0x558f55c2da60) at ../accel/kvm/kvm-accel-ops.c:49
>> 23 0x0000558f52f9f04a in qemu_thread_start (args=<optimized out>) at ../util/qemu-thread-posix.c:556
>> 24 0x00007f8ce4392ea5 in start_thread () at /lib64/libpthread.so.0
>> 25 0x00007f8ce40bb9fd in clone () at /lib64/libc.so.6
>>
>> Fixes: 4023784 ("vhost-vdpa: multiqueue support")
>> Cc: Jason Wang <jasowang@redhat.com>
>> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
>> ---
>>   hw/virtio/virtio-bus.c | 3 ++-
>>   1 file changed, 2 insertions(+), 1 deletion(-)
>>
>> diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
>> index 0f69d1c..3159b58 100644
>> --- a/hw/virtio/virtio-bus.c
>> +++ b/hw/virtio/virtio-bus.c
>> @@ -311,7 +311,8 @@ void virtio_bus_cleanup_host_notifier(VirtioBusState *bus, int n)
>>       /* Test and clear notifier after disabling event,
>>        * in case poll callback didn't have time to run.
>>        */
>> -    virtio_queue_host_notifier_read(notifier);
>> +    if (!vdev->disable_ioeventfd_handler)
>> +        virtio_queue_host_notifier_read(notifier);
>>       event_notifier_cleanup(notifier);
>>   }
>>
>> --
>> 1.8.3.1
>>
Jason Wang March 31, 2022, 8:36 a.m. UTC | #3
On Thu, Mar 31, 2022 at 12:41 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>
>
>
> On 3/30/2022 2:14 AM, Jason Wang wrote:
> > On Wed, Mar 30, 2022 at 2:33 PM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
> >> Previous commit prevents vhost-user and vhost-vdpa from using
> >> userland vq handler via disable_ioeventfd_handler. The same
> >> needs to be done for host notifier cleanup too, as the
> >> virtio_queue_host_notifier_read handler still tends to read
> >> pending event left behind on ioeventfd and attempts to handle
> >> outstanding kicks from QEMU userland vq.
> >>
> >> If vq handler is not disabled on cleanup, it may lead to sigsegv
> >> with recursive virtio_net_set_status call on the control vq:
> >>
> >> 0  0x00007f8ce3ff3387 in raise () at /lib64/libc.so.6
> >> 1  0x00007f8ce3ff4a78 in abort () at /lib64/libc.so.6
> >> 2  0x00007f8ce3fec1a6 in __assert_fail_base () at /lib64/libc.so.6
> >> 3  0x00007f8ce3fec252 in  () at /lib64/libc.so.6
> >> 4  0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=<optimized out>, idx=<optimized out>) at ../hw/virtio/vhost-vdpa.c:563
> >> 5  0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=<optimized out>, idx=<optimized out>) at ../hw/virtio/vhost-vdpa.c:558
> >> 6  0x0000558f52d7329a in vhost_virtqueue_mask (hdev=0x558f55c01800, vdev=0x558f568f91f0, n=2, mask=<optimized out>) at ../hw/virtio/vhost.c:1557
> > I feel it's probably a bug elsewhere e.g when we fail to start
> > vhost-vDPA, it's the charge of the Qemu to poll host notifier and we
> > will fallback to the userspace vq handler.
> Apologies, an incorrect stack trace was pasted which actually came from
> patch #1. I will post a v2 with the corresponding one as below:
>
> 0  0x000055f800df1780 in qdev_get_parent_bus (dev=0x0) at
> ../hw/core/qdev.c:376
> 1  0x000055f800c68ad8 in virtio_bus_device_iommu_enabled
> (vdev=vdev@entry=0x0) at ../hw/virtio/virtio-bus.c:331
> 2  0x000055f800d70d7f in vhost_memory_unmap (dev=<optimized out>) at
> ../hw/virtio/vhost.c:318
> 3  0x000055f800d70d7f in vhost_memory_unmap (dev=<optimized out>,
> buffer=0x7fc19bec5240, len=2052, is_write=1, access_len=2052) at
> ../hw/virtio/vhost.c:336
> 4  0x000055f800d71867 in vhost_virtqueue_stop
> (dev=dev@entry=0x55f8037ccc30, vdev=vdev@entry=0x55f8044ec590,
> vq=0x55f8037cceb0, idx=0) at ../hw/virtio/vhost.c:1241
> 5  0x000055f800d7406c in vhost_dev_stop (hdev=hdev@entry=0x55f8037ccc30,
> vdev=vdev@entry=0x55f8044ec590) at ../hw/virtio/vhost.c:1839
> 6  0x000055f800bf00a7 in vhost_net_stop_one (net=0x55f8037ccc30,
> dev=0x55f8044ec590) at ../hw/net/vhost_net.c:315
> 7  0x000055f800bf0678 in vhost_net_stop (dev=dev@entry=0x55f8044ec590,
> ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7,
> cvq=cvq@entry=1)
>     at ../hw/net/vhost_net.c:423
> 8  0x000055f800d4e628 in virtio_net_set_status (status=<optimized out>,
> n=0x55f8044ec590) at ../hw/net/virtio-net.c:296
> 9  0x000055f800d4e628 in virtio_net_set_status
> (vdev=vdev@entry=0x55f8044ec590, status=15 '\017') at
> ../hw/net/virtio-net.c:370

I don't understand why virtio_net_handle_ctrl() call virtio_net_set_stauts()...

> 10 0x000055f800d534d8 in virtio_net_handle_ctrl (iov_cnt=<optimized
> out>, iov=<optimized out>, cmd=0 '\000', n=0x55f8044ec590) at
> ../hw/net/virtio-net.c:1408
> 11 0x000055f800d534d8 in virtio_net_handle_ctrl (vdev=0x55f8044ec590,
> vq=0x7fc1a7e888d0) at ../hw/net/virtio-net.c:1452
> 12 0x000055f800d69f37 in virtio_queue_host_notifier_read
> (vq=0x7fc1a7e888d0) at ../hw/virtio/virtio.c:2331
> 13 0x000055f800d69f37 in virtio_queue_host_notifier_read
> (n=n@entry=0x7fc1a7e8894c) at ../hw/virtio/virtio.c:3575
> 14 0x000055f800c688e6 in virtio_bus_cleanup_host_notifier
> (bus=<optimized out>, n=n@entry=14) at ../hw/virtio/virtio-bus.c:312
> 15 0x000055f800d73106 in vhost_dev_disable_notifiers
> (hdev=hdev@entry=0x55f8035b51b0, vdev=vdev@entry=0x55f8044ec590)
>     at ../../../include/hw/virtio/virtio-bus.h:35
> 16 0x000055f800bf00b2 in vhost_net_stop_one (net=0x55f8035b51b0,
> dev=0x55f8044ec590) at ../hw/net/vhost_net.c:316
> 17 0x000055f800bf0678 in vhost_net_stop (dev=dev@entry=0x55f8044ec590,
> ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7,
> cvq=cvq@entry=1)
>     at ../hw/net/vhost_net.c:423
> 18 0x000055f800d4e628 in virtio_net_set_status (status=<optimized out>,
> n=0x55f8044ec590) at ../hw/net/virtio-net.c:296
> 19 0x000055f800d4e628 in virtio_net_set_status (vdev=0x55f8044ec590,
> status=15 '\017') at ../hw/net/virtio-net.c:370
> 20 0x000055f800d6c4b2 in virtio_set_status (vdev=0x55f8044ec590,
> val=<optimized out>) at ../hw/virtio/virtio.c:1945
> 21 0x000055f800d11d9d in vm_state_notify (running=running@entry=false,
> state=state@entry=RUN_STATE_SHUTDOWN) at ../softmmu/runstate.c:333
> 22 0x000055f800d04e7a in do_vm_stop
> (state=state@entry=RUN_STATE_SHUTDOWN, send_stop=send_stop@entry=false)
> at ../softmmu/cpus.c:262
> 23 0x000055f800d04e99 in vm_shutdown () at ../softmmu/cpus.c:280
> 24 0x000055f800d126af in qemu_cleanup () at ../softmmu/runstate.c:812
> 25 0x000055f800ad5b13 in main (argc=<optimized out>, argv=<optimized
> out>, envp=<optimized out>) at ../softmmu/main.c:51
>
>  From the trace pending read only occurs in stop path. The recursive
> virtio_net_set_status from virtio_net_handle_ctrl doesn't make sense to me.

Yes, we need to figure this out to know the root cause.

The code should work for the case when vhost-vdp fails to start.

> Not sure I got the reason why we need to handle pending host
> notification in userland vq, can you elaborate?

Because vhost-vDPA fails to start, we will "fallback" to a dummy userspace.

Thanks

>
> Thanks,
> -Siwei
>
> >
> > Thanks
> >
> >> 7  0x0000558f52c6b89a in virtio_pci_set_guest_notifier (d=d@entry=0x558f568f0f60, n=n@entry=2, assign=assign@entry=true, with_irqfd=with_irqfd@entry=false)
> >>     at ../hw/virtio/virtio-pci.c:974
> >> 8  0x0000558f52c6c0d8 in virtio_pci_set_guest_notifiers (d=0x558f568f0f60, nvqs=3, assign=true) at ../hw/virtio/virtio-pci.c:1019
> >> 9  0x0000558f52bf091d in vhost_net_start (dev=dev@entry=0x558f568f91f0, ncs=0x558f56937cd0, data_queue_pairs=data_queue_pairs@entry=1, cvq=cvq@entry=1)
> >>     at ../hw/net/vhost_net.c:361
> >> 10 0x0000558f52d4e5e7 in virtio_net_set_status (status=<optimized out>, n=0x558f568f91f0) at ../hw/net/virtio-net.c:289
> >> 11 0x0000558f52d4e5e7 in virtio_net_set_status (vdev=0x558f568f91f0, status=15 '\017') at ../hw/net/virtio-net.c:370
> >> 12 0x0000558f52d6c4b2 in virtio_set_status (vdev=vdev@entry=0x558f568f91f0, val=val@entry=15 '\017') at ../hw/virtio/virtio.c:1945
> >> 13 0x0000558f52c69eff in virtio_pci_common_write (opaque=0x558f568f0f60, addr=<optimized out>, val=<optimized out>, size=<optimized out>) at ../hw/virtio/virtio-pci.c:1292
> >> 14 0x0000558f52d15d6e in memory_region_write_accessor (mr=0x558f568f19d0, addr=20, value=<optimized out>, size=1, shift=<optimized out>, mask=<optimized out>, attrs=...)
> >>     at ../softmmu/memory.c:492
> >> 15 0x0000558f52d127de in access_with_adjusted_size (addr=addr@entry=20, value=value@entry=0x7f8cdbffe748, size=size@entry=1, access_size_min=<optimized out>, access_size_max=<optimized out>, access_fn=0x558f52d15cf0 <memory_region_write_accessor>, mr=0x558f568f19d0, attrs=...) at ../softmmu/memory.c:554
> >> 16 0x0000558f52d157ef in memory_region_dispatch_write (mr=mr@entry=0x558f568f19d0, addr=20, data=<optimized out>, op=<optimized out>, attrs=attrs@entry=...)
> >>     at ../softmmu/memory.c:1504
> >> 17 0x0000558f52d078e7 in flatview_write_continue (fv=fv@entry=0x7f8accbc3b90, addr=addr@entry=103079215124, attrs=..., ptr=ptr@entry=0x7f8ce6300028, len=len@entry=1, addr1=<optimized out>, l=<optimized out>, mr=0x558f568f19d0) at ../../../include/qemu/host-utils.h:165
> >> 18 0x0000558f52d07b06 in flatview_write (fv=0x7f8accbc3b90, addr=103079215124, attrs=..., buf=0x7f8ce6300028, len=1) at ../softmmu/physmem.c:2822
> >> 19 0x0000558f52d0b36b in address_space_write (as=<optimized out>, addr=<optimized out>, attrs=..., buf=buf@entry=0x7f8ce6300028, len=<optimized out>)
> >>     at ../softmmu/physmem.c:2914
> >> 20 0x0000558f52d0b3da in address_space_rw (as=<optimized out>, addr=<optimized out>, attrs=...,
> >>     attrs@entry=..., buf=buf@entry=0x7f8ce6300028, len=<optimized out>, is_write=<optimized out>) at ../softmmu/physmem.c:2924
> >> 21 0x0000558f52dced09 in kvm_cpu_exec (cpu=cpu@entry=0x558f55c2da60) at ../accel/kvm/kvm-all.c:2903
> >> 22 0x0000558f52dcfabd in kvm_vcpu_thread_fn (arg=arg@entry=0x558f55c2da60) at ../accel/kvm/kvm-accel-ops.c:49
> >> 23 0x0000558f52f9f04a in qemu_thread_start (args=<optimized out>) at ../util/qemu-thread-posix.c:556
> >> 24 0x00007f8ce4392ea5 in start_thread () at /lib64/libpthread.so.0
> >> 25 0x00007f8ce40bb9fd in clone () at /lib64/libc.so.6
> >>
> >> Fixes: 4023784 ("vhost-vdpa: multiqueue support")
> >> Cc: Jason Wang <jasowang@redhat.com>
> >> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
> >> ---
> >>   hw/virtio/virtio-bus.c | 3 ++-
> >>   1 file changed, 2 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
> >> index 0f69d1c..3159b58 100644
> >> --- a/hw/virtio/virtio-bus.c
> >> +++ b/hw/virtio/virtio-bus.c
> >> @@ -311,7 +311,8 @@ void virtio_bus_cleanup_host_notifier(VirtioBusState *bus, int n)
> >>       /* Test and clear notifier after disabling event,
> >>        * in case poll callback didn't have time to run.
> >>        */
> >> -    virtio_queue_host_notifier_read(notifier);
> >> +    if (!vdev->disable_ioeventfd_handler)
> >> +        virtio_queue_host_notifier_read(notifier);
> >>       event_notifier_cleanup(notifier);
> >>   }
> >>
> >> --
> >> 1.8.3.1
> >>
>
Si-Wei Liu April 1, 2022, 8:37 p.m. UTC | #4
On 3/31/2022 1:36 AM, Jason Wang wrote:
> On Thu, Mar 31, 2022 at 12:41 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>>
>>
>> On 3/30/2022 2:14 AM, Jason Wang wrote:
>>> On Wed, Mar 30, 2022 at 2:33 PM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>>>> Previous commit prevents vhost-user and vhost-vdpa from using
>>>> userland vq handler via disable_ioeventfd_handler. The same
>>>> needs to be done for host notifier cleanup too, as the
>>>> virtio_queue_host_notifier_read handler still tends to read
>>>> pending event left behind on ioeventfd and attempts to handle
>>>> outstanding kicks from QEMU userland vq.
>>>>
>>>> If vq handler is not disabled on cleanup, it may lead to sigsegv
>>>> with recursive virtio_net_set_status call on the control vq:
>>>>
>>>> 0  0x00007f8ce3ff3387 in raise () at /lib64/libc.so.6
>>>> 1  0x00007f8ce3ff4a78 in abort () at /lib64/libc.so.6
>>>> 2  0x00007f8ce3fec1a6 in __assert_fail_base () at /lib64/libc.so.6
>>>> 3  0x00007f8ce3fec252 in  () at /lib64/libc.so.6
>>>> 4  0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=<optimized out>, idx=<optimized out>) at ../hw/virtio/vhost-vdpa.c:563
>>>> 5  0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=<optimized out>, idx=<optimized out>) at ../hw/virtio/vhost-vdpa.c:558
>>>> 6  0x0000558f52d7329a in vhost_virtqueue_mask (hdev=0x558f55c01800, vdev=0x558f568f91f0, n=2, mask=<optimized out>) at ../hw/virtio/vhost.c:1557
>>> I feel it's probably a bug elsewhere e.g when we fail to start
>>> vhost-vDPA, it's the charge of the Qemu to poll host notifier and we
>>> will fallback to the userspace vq handler.
>> Apologies, an incorrect stack trace was pasted which actually came from
>> patch #1. I will post a v2 with the corresponding one as below:
>>
>> 0  0x000055f800df1780 in qdev_get_parent_bus (dev=0x0) at
>> ../hw/core/qdev.c:376
>> 1  0x000055f800c68ad8 in virtio_bus_device_iommu_enabled
>> (vdev=vdev@entry=0x0) at ../hw/virtio/virtio-bus.c:331
>> 2  0x000055f800d70d7f in vhost_memory_unmap (dev=<optimized out>) at
>> ../hw/virtio/vhost.c:318
>> 3  0x000055f800d70d7f in vhost_memory_unmap (dev=<optimized out>,
>> buffer=0x7fc19bec5240, len=2052, is_write=1, access_len=2052) at
>> ../hw/virtio/vhost.c:336
>> 4  0x000055f800d71867 in vhost_virtqueue_stop
>> (dev=dev@entry=0x55f8037ccc30, vdev=vdev@entry=0x55f8044ec590,
>> vq=0x55f8037cceb0, idx=0) at ../hw/virtio/vhost.c:1241
>> 5  0x000055f800d7406c in vhost_dev_stop (hdev=hdev@entry=0x55f8037ccc30,
>> vdev=vdev@entry=0x55f8044ec590) at ../hw/virtio/vhost.c:1839
>> 6  0x000055f800bf00a7 in vhost_net_stop_one (net=0x55f8037ccc30,
>> dev=0x55f8044ec590) at ../hw/net/vhost_net.c:315
>> 7  0x000055f800bf0678 in vhost_net_stop (dev=dev@entry=0x55f8044ec590,
>> ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7,
>> cvq=cvq@entry=1)
>>      at ../hw/net/vhost_net.c:423
>> 8  0x000055f800d4e628 in virtio_net_set_status (status=<optimized out>,
>> n=0x55f8044ec590) at ../hw/net/virtio-net.c:296
>> 9  0x000055f800d4e628 in virtio_net_set_status
>> (vdev=vdev@entry=0x55f8044ec590, status=15 '\017') at
>> ../hw/net/virtio-net.c:370
> I don't understand why virtio_net_handle_ctrl() call virtio_net_set_stauts()...
The pending request left over on the ctrl vq was a VIRTIO_NET_CTRL_MQ 
command, i.e. in virtio_net_handle_mq():

1413     n->curr_queue_pairs = queue_pairs;
1414     /* stop the backend before changing the number of queue_pairs 
to avoid handling a
1415      * disabled queue */
1416     virtio_net_set_status(vdev, vdev->status);
1417     virtio_net_set_queue_pairs(n);

Noted before the vdpa multiqueue support, there was never a vhost_dev 
for ctrl_vq exposed, i.e. there's no host notifier set up for the 
ctrl_vq on vhost_kernel as it is emulated in QEMU software.

>
>> 10 0x000055f800d534d8 in virtio_net_handle_ctrl (iov_cnt=<optimized
>> out>, iov=<optimized out>, cmd=0 '\000', n=0x55f8044ec590) at
>> ../hw/net/virtio-net.c:1408
>> 11 0x000055f800d534d8 in virtio_net_handle_ctrl (vdev=0x55f8044ec590,
>> vq=0x7fc1a7e888d0) at ../hw/net/virtio-net.c:1452
>> 12 0x000055f800d69f37 in virtio_queue_host_notifier_read
>> (vq=0x7fc1a7e888d0) at ../hw/virtio/virtio.c:2331
>> 13 0x000055f800d69f37 in virtio_queue_host_notifier_read
>> (n=n@entry=0x7fc1a7e8894c) at ../hw/virtio/virtio.c:3575
>> 14 0x000055f800c688e6 in virtio_bus_cleanup_host_notifier
>> (bus=<optimized out>, n=n@entry=14) at ../hw/virtio/virtio-bus.c:312
>> 15 0x000055f800d73106 in vhost_dev_disable_notifiers
>> (hdev=hdev@entry=0x55f8035b51b0, vdev=vdev@entry=0x55f8044ec590)
>>      at ../../../include/hw/virtio/virtio-bus.h:35
>> 16 0x000055f800bf00b2 in vhost_net_stop_one (net=0x55f8035b51b0,
>> dev=0x55f8044ec590) at ../hw/net/vhost_net.c:316
>> 17 0x000055f800bf0678 in vhost_net_stop (dev=dev@entry=0x55f8044ec590,
>> ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7,
>> cvq=cvq@entry=1)
>>      at ../hw/net/vhost_net.c:423
>> 18 0x000055f800d4e628 in virtio_net_set_status (status=<optimized out>,
>> n=0x55f8044ec590) at ../hw/net/virtio-net.c:296
>> 19 0x000055f800d4e628 in virtio_net_set_status (vdev=0x55f8044ec590,
>> status=15 '\017') at ../hw/net/virtio-net.c:370
>> 20 0x000055f800d6c4b2 in virtio_set_status (vdev=0x55f8044ec590,
>> val=<optimized out>) at ../hw/virtio/virtio.c:1945
>> 21 0x000055f800d11d9d in vm_state_notify (running=running@entry=false,
>> state=state@entry=RUN_STATE_SHUTDOWN) at ../softmmu/runstate.c:333
>> 22 0x000055f800d04e7a in do_vm_stop
>> (state=state@entry=RUN_STATE_SHUTDOWN, send_stop=send_stop@entry=false)
>> at ../softmmu/cpus.c:262
>> 23 0x000055f800d04e99 in vm_shutdown () at ../softmmu/cpus.c:280
>> 24 0x000055f800d126af in qemu_cleanup () at ../softmmu/runstate.c:812
>> 25 0x000055f800ad5b13 in main (argc=<optimized out>, argv=<optimized
>> out>, envp=<optimized out>) at ../softmmu/main.c:51
>>
>>   From the trace pending read only occurs in stop path. The recursive
>> virtio_net_set_status from virtio_net_handle_ctrl doesn't make sense to me.
> Yes, we need to figure this out to know the root cause.
I think it has something to do with the virtqueue unready issue that the 
vhost_reset_device() refactoring series attempt to fix. If that is fixed 
we should not see this sigsegv with mlx5_vdpa. However I guess we both 
agreed that the vq_unready support would need new uAPI (some flag) to 
define, hence this fix applies to the situation where uAPI doesn't exist 
on the kernel or the vq_unready is not supported by vdpa vendor driver.

>
> The code should work for the case when vhost-vdp fails to start.
Unlike the other datapath queues for net vdpa, the events left behind in 
the control queue can't be processed by the userspace, as unlike 
vhost-kernel we don't have a fallback path in the userspace. To ignore 
the pending event and let vhost vdpa process it on resume/start is 
perhaps the best thing to do. This is even true for datapath queues for 
other vdpa devices than of network.

>
>> Not sure I got the reason why we need to handle pending host
>> notification in userland vq, can you elaborate?
> Because vhost-vDPA fails to start, we will "fallback" to a dummy userspace.
Is the dummy userspace working or operational? What's the use case of 
this "fallback" dummy if what guest user can get is a busted NIC? I 
think this is very different from the vhost-kernel case in that once 
vhost fails, we can fallback to userspace to emulate the network through 
the tap fd in a good way. However, there's no equivalent yet for 
vhost-vdpa...

Thanks,
-Siwei

>
> Thanks
>
>> Thanks,
>> -Siwei
>>
>>> Thanks
>>>
>>>> 7  0x0000558f52c6b89a in virtio_pci_set_guest_notifier (d=d@entry=0x558f568f0f60, n=n@entry=2, assign=assign@entry=true, with_irqfd=with_irqfd@entry=false)
>>>>      at ../hw/virtio/virtio-pci.c:974
>>>> 8  0x0000558f52c6c0d8 in virtio_pci_set_guest_notifiers (d=0x558f568f0f60, nvqs=3, assign=true) at ../hw/virtio/virtio-pci.c:1019
>>>> 9  0x0000558f52bf091d in vhost_net_start (dev=dev@entry=0x558f568f91f0, ncs=0x558f56937cd0, data_queue_pairs=data_queue_pairs@entry=1, cvq=cvq@entry=1)
>>>>      at ../hw/net/vhost_net.c:361
>>>> 10 0x0000558f52d4e5e7 in virtio_net_set_status (status=<optimized out>, n=0x558f568f91f0) at ../hw/net/virtio-net.c:289
>>>> 11 0x0000558f52d4e5e7 in virtio_net_set_status (vdev=0x558f568f91f0, status=15 '\017') at ../hw/net/virtio-net.c:370
>>>> 12 0x0000558f52d6c4b2 in virtio_set_status (vdev=vdev@entry=0x558f568f91f0, val=val@entry=15 '\017') at ../hw/virtio/virtio.c:1945
>>>> 13 0x0000558f52c69eff in virtio_pci_common_write (opaque=0x558f568f0f60, addr=<optimized out>, val=<optimized out>, size=<optimized out>) at ../hw/virtio/virtio-pci.c:1292
>>>> 14 0x0000558f52d15d6e in memory_region_write_accessor (mr=0x558f568f19d0, addr=20, value=<optimized out>, size=1, shift=<optimized out>, mask=<optimized out>, attrs=...)
>>>>      at ../softmmu/memory.c:492
>>>> 15 0x0000558f52d127de in access_with_adjusted_size (addr=addr@entry=20, value=value@entry=0x7f8cdbffe748, size=size@entry=1, access_size_min=<optimized out>, access_size_max=<optimized out>, access_fn=0x558f52d15cf0 <memory_region_write_accessor>, mr=0x558f568f19d0, attrs=...) at ../softmmu/memory.c:554
>>>> 16 0x0000558f52d157ef in memory_region_dispatch_write (mr=mr@entry=0x558f568f19d0, addr=20, data=<optimized out>, op=<optimized out>, attrs=attrs@entry=...)
>>>>      at ../softmmu/memory.c:1504
>>>> 17 0x0000558f52d078e7 in flatview_write_continue (fv=fv@entry=0x7f8accbc3b90, addr=addr@entry=103079215124, attrs=..., ptr=ptr@entry=0x7f8ce6300028, len=len@entry=1, addr1=<optimized out>, l=<optimized out>, mr=0x558f568f19d0) at ../../../include/qemu/host-utils.h:165
>>>> 18 0x0000558f52d07b06 in flatview_write (fv=0x7f8accbc3b90, addr=103079215124, attrs=..., buf=0x7f8ce6300028, len=1) at ../softmmu/physmem.c:2822
>>>> 19 0x0000558f52d0b36b in address_space_write (as=<optimized out>, addr=<optimized out>, attrs=..., buf=buf@entry=0x7f8ce6300028, len=<optimized out>)
>>>>      at ../softmmu/physmem.c:2914
>>>> 20 0x0000558f52d0b3da in address_space_rw (as=<optimized out>, addr=<optimized out>, attrs=...,
>>>>      attrs@entry=..., buf=buf@entry=0x7f8ce6300028, len=<optimized out>, is_write=<optimized out>) at ../softmmu/physmem.c:2924
>>>> 21 0x0000558f52dced09 in kvm_cpu_exec (cpu=cpu@entry=0x558f55c2da60) at ../accel/kvm/kvm-all.c:2903
>>>> 22 0x0000558f52dcfabd in kvm_vcpu_thread_fn (arg=arg@entry=0x558f55c2da60) at ../accel/kvm/kvm-accel-ops.c:49
>>>> 23 0x0000558f52f9f04a in qemu_thread_start (args=<optimized out>) at ../util/qemu-thread-posix.c:556
>>>> 24 0x00007f8ce4392ea5 in start_thread () at /lib64/libpthread.so.0
>>>> 25 0x00007f8ce40bb9fd in clone () at /lib64/libc.so.6
>>>>
>>>> Fixes: 4023784 ("vhost-vdpa: multiqueue support")
>>>> Cc: Jason Wang <jasowang@redhat.com>
>>>> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
>>>> ---
>>>>    hw/virtio/virtio-bus.c | 3 ++-
>>>>    1 file changed, 2 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
>>>> index 0f69d1c..3159b58 100644
>>>> --- a/hw/virtio/virtio-bus.c
>>>> +++ b/hw/virtio/virtio-bus.c
>>>> @@ -311,7 +311,8 @@ void virtio_bus_cleanup_host_notifier(VirtioBusState *bus, int n)
>>>>        /* Test and clear notifier after disabling event,
>>>>         * in case poll callback didn't have time to run.
>>>>         */
>>>> -    virtio_queue_host_notifier_read(notifier);
>>>> +    if (!vdev->disable_ioeventfd_handler)
>>>> +        virtio_queue_host_notifier_read(notifier);
>>>>        event_notifier_cleanup(notifier);
>>>>    }
>>>>
>>>> --
>>>> 1.8.3.1
>>>>
Jason Wang April 2, 2022, 2 a.m. UTC | #5
On Sat, Apr 2, 2022 at 4:37 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>
>
>
> On 3/31/2022 1:36 AM, Jason Wang wrote:
> > On Thu, Mar 31, 2022 at 12:41 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
> >>
> >>
> >> On 3/30/2022 2:14 AM, Jason Wang wrote:
> >>> On Wed, Mar 30, 2022 at 2:33 PM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
> >>>> Previous commit prevents vhost-user and vhost-vdpa from using
> >>>> userland vq handler via disable_ioeventfd_handler. The same
> >>>> needs to be done for host notifier cleanup too, as the
> >>>> virtio_queue_host_notifier_read handler still tends to read
> >>>> pending event left behind on ioeventfd and attempts to handle
> >>>> outstanding kicks from QEMU userland vq.
> >>>>
> >>>> If vq handler is not disabled on cleanup, it may lead to sigsegv
> >>>> with recursive virtio_net_set_status call on the control vq:
> >>>>
> >>>> 0  0x00007f8ce3ff3387 in raise () at /lib64/libc.so.6
> >>>> 1  0x00007f8ce3ff4a78 in abort () at /lib64/libc.so.6
> >>>> 2  0x00007f8ce3fec1a6 in __assert_fail_base () at /lib64/libc.so.6
> >>>> 3  0x00007f8ce3fec252 in  () at /lib64/libc.so.6
> >>>> 4  0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=<optimized out>, idx=<optimized out>) at ../hw/virtio/vhost-vdpa.c:563
> >>>> 5  0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=<optimized out>, idx=<optimized out>) at ../hw/virtio/vhost-vdpa.c:558
> >>>> 6  0x0000558f52d7329a in vhost_virtqueue_mask (hdev=0x558f55c01800, vdev=0x558f568f91f0, n=2, mask=<optimized out>) at ../hw/virtio/vhost.c:1557
> >>> I feel it's probably a bug elsewhere e.g when we fail to start
> >>> vhost-vDPA, it's the charge of the Qemu to poll host notifier and we
> >>> will fallback to the userspace vq handler.
> >> Apologies, an incorrect stack trace was pasted which actually came from
> >> patch #1. I will post a v2 with the corresponding one as below:
> >>
> >> 0  0x000055f800df1780 in qdev_get_parent_bus (dev=0x0) at
> >> ../hw/core/qdev.c:376
> >> 1  0x000055f800c68ad8 in virtio_bus_device_iommu_enabled
> >> (vdev=vdev@entry=0x0) at ../hw/virtio/virtio-bus.c:331
> >> 2  0x000055f800d70d7f in vhost_memory_unmap (dev=<optimized out>) at
> >> ../hw/virtio/vhost.c:318
> >> 3  0x000055f800d70d7f in vhost_memory_unmap (dev=<optimized out>,
> >> buffer=0x7fc19bec5240, len=2052, is_write=1, access_len=2052) at
> >> ../hw/virtio/vhost.c:336
> >> 4  0x000055f800d71867 in vhost_virtqueue_stop
> >> (dev=dev@entry=0x55f8037ccc30, vdev=vdev@entry=0x55f8044ec590,
> >> vq=0x55f8037cceb0, idx=0) at ../hw/virtio/vhost.c:1241
> >> 5  0x000055f800d7406c in vhost_dev_stop (hdev=hdev@entry=0x55f8037ccc30,
> >> vdev=vdev@entry=0x55f8044ec590) at ../hw/virtio/vhost.c:1839
> >> 6  0x000055f800bf00a7 in vhost_net_stop_one (net=0x55f8037ccc30,
> >> dev=0x55f8044ec590) at ../hw/net/vhost_net.c:315
> >> 7  0x000055f800bf0678 in vhost_net_stop (dev=dev@entry=0x55f8044ec590,
> >> ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7,
> >> cvq=cvq@entry=1)
> >>      at ../hw/net/vhost_net.c:423
> >> 8  0x000055f800d4e628 in virtio_net_set_status (status=<optimized out>,
> >> n=0x55f8044ec590) at ../hw/net/virtio-net.c:296
> >> 9  0x000055f800d4e628 in virtio_net_set_status
> >> (vdev=vdev@entry=0x55f8044ec590, status=15 '\017') at
> >> ../hw/net/virtio-net.c:370
> > I don't understand why virtio_net_handle_ctrl() call virtio_net_set_stauts()...
> The pending request left over on the ctrl vq was a VIRTIO_NET_CTRL_MQ
> command, i.e. in virtio_net_handle_mq():

Completely forget that the code was actually written by me :\

>
> 1413     n->curr_queue_pairs = queue_pairs;
> 1414     /* stop the backend before changing the number of queue_pairs
> to avoid handling a
> 1415      * disabled queue */
> 1416     virtio_net_set_status(vdev, vdev->status);
> 1417     virtio_net_set_queue_pairs(n);
>
> Noted before the vdpa multiqueue support, there was never a vhost_dev
> for ctrl_vq exposed, i.e. there's no host notifier set up for the
> ctrl_vq on vhost_kernel as it is emulated in QEMU software.
>
> >
> >> 10 0x000055f800d534d8 in virtio_net_handle_ctrl (iov_cnt=<optimized
> >> out>, iov=<optimized out>, cmd=0 '\000', n=0x55f8044ec590) at
> >> ../hw/net/virtio-net.c:1408
> >> 11 0x000055f800d534d8 in virtio_net_handle_ctrl (vdev=0x55f8044ec590,
> >> vq=0x7fc1a7e888d0) at ../hw/net/virtio-net.c:1452
> >> 12 0x000055f800d69f37 in virtio_queue_host_notifier_read
> >> (vq=0x7fc1a7e888d0) at ../hw/virtio/virtio.c:2331
> >> 13 0x000055f800d69f37 in virtio_queue_host_notifier_read
> >> (n=n@entry=0x7fc1a7e8894c) at ../hw/virtio/virtio.c:3575
> >> 14 0x000055f800c688e6 in virtio_bus_cleanup_host_notifier
> >> (bus=<optimized out>, n=n@entry=14) at ../hw/virtio/virtio-bus.c:312
> >> 15 0x000055f800d73106 in vhost_dev_disable_notifiers
> >> (hdev=hdev@entry=0x55f8035b51b0, vdev=vdev@entry=0x55f8044ec590)
> >>      at ../../../include/hw/virtio/virtio-bus.h:35
> >> 16 0x000055f800bf00b2 in vhost_net_stop_one (net=0x55f8035b51b0,
> >> dev=0x55f8044ec590) at ../hw/net/vhost_net.c:316
> >> 17 0x000055f800bf0678 in vhost_net_stop (dev=dev@entry=0x55f8044ec590,
> >> ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7,
> >> cvq=cvq@entry=1)
> >>      at ../hw/net/vhost_net.c:423
> >> 18 0x000055f800d4e628 in virtio_net_set_status (status=<optimized out>,
> >> n=0x55f8044ec590) at ../hw/net/virtio-net.c:296
> >> 19 0x000055f800d4e628 in virtio_net_set_status (vdev=0x55f8044ec590,
> >> status=15 '\017') at ../hw/net/virtio-net.c:370
> >> 20 0x000055f800d6c4b2 in virtio_set_status (vdev=0x55f8044ec590,
> >> val=<optimized out>) at ../hw/virtio/virtio.c:1945
> >> 21 0x000055f800d11d9d in vm_state_notify (running=running@entry=false,
> >> state=state@entry=RUN_STATE_SHUTDOWN) at ../softmmu/runstate.c:333
> >> 22 0x000055f800d04e7a in do_vm_stop
> >> (state=state@entry=RUN_STATE_SHUTDOWN, send_stop=send_stop@entry=false)
> >> at ../softmmu/cpus.c:262
> >> 23 0x000055f800d04e99 in vm_shutdown () at ../softmmu/cpus.c:280
> >> 24 0x000055f800d126af in qemu_cleanup () at ../softmmu/runstate.c:812
> >> 25 0x000055f800ad5b13 in main (argc=<optimized out>, argv=<optimized
> >> out>, envp=<optimized out>) at ../softmmu/main.c:51
> >>
> >>   From the trace pending read only occurs in stop path. The recursive
> >> virtio_net_set_status from virtio_net_handle_ctrl doesn't make sense to me.
> > Yes, we need to figure this out to know the root cause.
> I think it has something to do with the virtqueue unready issue that the
> vhost_reset_device() refactoring series attempt to fix. If that is fixed
> we should not see this sigsegv with mlx5_vdpa. However I guess we both
> agreed that the vq_unready support would need new uAPI (some flag) to
> define, hence this fix applies to the situation where uAPI doesn't exist
> on the kernel or the vq_unready is not supported by vdpa vendor driver.
>

Yes.

> >
> > The code should work for the case when vhost-vdp fails to start.
> Unlike the other datapath queues for net vdpa, the events left behind in
> the control queue can't be processed by the userspace, as unlike
> vhost-kernel we don't have a fallback path in the userspace.

So that's the question, we should have a safe fallback.

> To ignore
> the pending event and let vhost vdpa process it on resume/start is
> perhaps the best thing to do. This is even true for datapath queues for
> other vdpa devices than of network.
>
> >
> >> Not sure I got the reason why we need to handle pending host
> >> notification in userland vq, can you elaborate?
> > Because vhost-vDPA fails to start, we will "fallback" to a dummy userspace.
> Is the dummy userspace working or operational? What's the use case of
> this "fallback" dummy if what guest user can get is a busted NIC?

The problem is we can't do better in this case now. Such fallack (e.g
for vhost-user) has been used for years. Or do you have any better
ideas?

It doesn't differ too much of the two approaches:

1) dummy fallback which can do even cvq

and

2) disable host notifiers

Especially consider 2) requires more changes.

> I
> think this is very different from the vhost-kernel case in that once
> vhost fails, we can fallback to userspace to emulate the network through
> the tap fd in a good way. However, there's no equivalent yet for
> vhost-vdpa...
>

As said previously, technically we can have vhost-vDPA network backend
as a fallback. (So did for vhost-user).

Thanks

> Thanks,
> -Siwei
>
> >
> > Thanks
> >
> >> Thanks,
> >> -Siwei
> >>
> >>> Thanks
> >>>
> >>>> 7  0x0000558f52c6b89a in virtio_pci_set_guest_notifier (d=d@entry=0x558f568f0f60, n=n@entry=2, assign=assign@entry=true, with_irqfd=with_irqfd@entry=false)
> >>>>      at ../hw/virtio/virtio-pci.c:974
> >>>> 8  0x0000558f52c6c0d8 in virtio_pci_set_guest_notifiers (d=0x558f568f0f60, nvqs=3, assign=true) at ../hw/virtio/virtio-pci.c:1019
> >>>> 9  0x0000558f52bf091d in vhost_net_start (dev=dev@entry=0x558f568f91f0, ncs=0x558f56937cd0, data_queue_pairs=data_queue_pairs@entry=1, cvq=cvq@entry=1)
> >>>>      at ../hw/net/vhost_net.c:361
> >>>> 10 0x0000558f52d4e5e7 in virtio_net_set_status (status=<optimized out>, n=0x558f568f91f0) at ../hw/net/virtio-net.c:289
> >>>> 11 0x0000558f52d4e5e7 in virtio_net_set_status (vdev=0x558f568f91f0, status=15 '\017') at ../hw/net/virtio-net.c:370
> >>>> 12 0x0000558f52d6c4b2 in virtio_set_status (vdev=vdev@entry=0x558f568f91f0, val=val@entry=15 '\017') at ../hw/virtio/virtio.c:1945
> >>>> 13 0x0000558f52c69eff in virtio_pci_common_write (opaque=0x558f568f0f60, addr=<optimized out>, val=<optimized out>, size=<optimized out>) at ../hw/virtio/virtio-pci.c:1292
> >>>> 14 0x0000558f52d15d6e in memory_region_write_accessor (mr=0x558f568f19d0, addr=20, value=<optimized out>, size=1, shift=<optimized out>, mask=<optimized out>, attrs=...)
> >>>>      at ../softmmu/memory.c:492
> >>>> 15 0x0000558f52d127de in access_with_adjusted_size (addr=addr@entry=20, value=value@entry=0x7f8cdbffe748, size=size@entry=1, access_size_min=<optimized out>, access_size_max=<optimized out>, access_fn=0x558f52d15cf0 <memory_region_write_accessor>, mr=0x558f568f19d0, attrs=...) at ../softmmu/memory.c:554
> >>>> 16 0x0000558f52d157ef in memory_region_dispatch_write (mr=mr@entry=0x558f568f19d0, addr=20, data=<optimized out>, op=<optimized out>, attrs=attrs@entry=...)
> >>>>      at ../softmmu/memory.c:1504
> >>>> 17 0x0000558f52d078e7 in flatview_write_continue (fv=fv@entry=0x7f8accbc3b90, addr=addr@entry=103079215124, attrs=..., ptr=ptr@entry=0x7f8ce6300028, len=len@entry=1, addr1=<optimized out>, l=<optimized out>, mr=0x558f568f19d0) at ../../../include/qemu/host-utils.h:165
> >>>> 18 0x0000558f52d07b06 in flatview_write (fv=0x7f8accbc3b90, addr=103079215124, attrs=..., buf=0x7f8ce6300028, len=1) at ../softmmu/physmem.c:2822
> >>>> 19 0x0000558f52d0b36b in address_space_write (as=<optimized out>, addr=<optimized out>, attrs=..., buf=buf@entry=0x7f8ce6300028, len=<optimized out>)
> >>>>      at ../softmmu/physmem.c:2914
> >>>> 20 0x0000558f52d0b3da in address_space_rw (as=<optimized out>, addr=<optimized out>, attrs=...,
> >>>>      attrs@entry=..., buf=buf@entry=0x7f8ce6300028, len=<optimized out>, is_write=<optimized out>) at ../softmmu/physmem.c:2924
> >>>> 21 0x0000558f52dced09 in kvm_cpu_exec (cpu=cpu@entry=0x558f55c2da60) at ../accel/kvm/kvm-all.c:2903
> >>>> 22 0x0000558f52dcfabd in kvm_vcpu_thread_fn (arg=arg@entry=0x558f55c2da60) at ../accel/kvm/kvm-accel-ops.c:49
> >>>> 23 0x0000558f52f9f04a in qemu_thread_start (args=<optimized out>) at ../util/qemu-thread-posix.c:556
> >>>> 24 0x00007f8ce4392ea5 in start_thread () at /lib64/libpthread.so.0
> >>>> 25 0x00007f8ce40bb9fd in clone () at /lib64/libc.so.6
> >>>>
> >>>> Fixes: 4023784 ("vhost-vdpa: multiqueue support")
> >>>> Cc: Jason Wang <jasowang@redhat.com>
> >>>> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
> >>>> ---
> >>>>    hw/virtio/virtio-bus.c | 3 ++-
> >>>>    1 file changed, 2 insertions(+), 1 deletion(-)
> >>>>
> >>>> diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
> >>>> index 0f69d1c..3159b58 100644
> >>>> --- a/hw/virtio/virtio-bus.c
> >>>> +++ b/hw/virtio/virtio-bus.c
> >>>> @@ -311,7 +311,8 @@ void virtio_bus_cleanup_host_notifier(VirtioBusState *bus, int n)
> >>>>        /* Test and clear notifier after disabling event,
> >>>>         * in case poll callback didn't have time to run.
> >>>>         */
> >>>> -    virtio_queue_host_notifier_read(notifier);
> >>>> +    if (!vdev->disable_ioeventfd_handler)
> >>>> +        virtio_queue_host_notifier_read(notifier);
> >>>>        event_notifier_cleanup(notifier);
> >>>>    }
> >>>>
> >>>> --
> >>>> 1.8.3.1
> >>>>
>
Si-Wei Liu April 5, 2022, 7:18 p.m. UTC | #6
On 4/1/2022 7:00 PM, Jason Wang wrote:
> On Sat, Apr 2, 2022 at 4:37 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>>
>>
>> On 3/31/2022 1:36 AM, Jason Wang wrote:
>>> On Thu, Mar 31, 2022 at 12:41 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>>>>
>>>> On 3/30/2022 2:14 AM, Jason Wang wrote:
>>>>> On Wed, Mar 30, 2022 at 2:33 PM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>>>>>> Previous commit prevents vhost-user and vhost-vdpa from using
>>>>>> userland vq handler via disable_ioeventfd_handler. The same
>>>>>> needs to be done for host notifier cleanup too, as the
>>>>>> virtio_queue_host_notifier_read handler still tends to read
>>>>>> pending event left behind on ioeventfd and attempts to handle
>>>>>> outstanding kicks from QEMU userland vq.
>>>>>>
>>>>>> If vq handler is not disabled on cleanup, it may lead to sigsegv
>>>>>> with recursive virtio_net_set_status call on the control vq:
>>>>>>
>>>>>> 0  0x00007f8ce3ff3387 in raise () at /lib64/libc.so.6
>>>>>> 1  0x00007f8ce3ff4a78 in abort () at /lib64/libc.so.6
>>>>>> 2  0x00007f8ce3fec1a6 in __assert_fail_base () at /lib64/libc.so.6
>>>>>> 3  0x00007f8ce3fec252 in  () at /lib64/libc.so.6
>>>>>> 4  0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=<optimized out>, idx=<optimized out>) at ../hw/virtio/vhost-vdpa.c:563
>>>>>> 5  0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=<optimized out>, idx=<optimized out>) at ../hw/virtio/vhost-vdpa.c:558
>>>>>> 6  0x0000558f52d7329a in vhost_virtqueue_mask (hdev=0x558f55c01800, vdev=0x558f568f91f0, n=2, mask=<optimized out>) at ../hw/virtio/vhost.c:1557
>>>>> I feel it's probably a bug elsewhere e.g when we fail to start
>>>>> vhost-vDPA, it's the charge of the Qemu to poll host notifier and we
>>>>> will fallback to the userspace vq handler.
>>>> Apologies, an incorrect stack trace was pasted which actually came from
>>>> patch #1. I will post a v2 with the corresponding one as below:
>>>>
>>>> 0  0x000055f800df1780 in qdev_get_parent_bus (dev=0x0) at
>>>> ../hw/core/qdev.c:376
>>>> 1  0x000055f800c68ad8 in virtio_bus_device_iommu_enabled
>>>> (vdev=vdev@entry=0x0) at ../hw/virtio/virtio-bus.c:331
>>>> 2  0x000055f800d70d7f in vhost_memory_unmap (dev=<optimized out>) at
>>>> ../hw/virtio/vhost.c:318
>>>> 3  0x000055f800d70d7f in vhost_memory_unmap (dev=<optimized out>,
>>>> buffer=0x7fc19bec5240, len=2052, is_write=1, access_len=2052) at
>>>> ../hw/virtio/vhost.c:336
>>>> 4  0x000055f800d71867 in vhost_virtqueue_stop
>>>> (dev=dev@entry=0x55f8037ccc30, vdev=vdev@entry=0x55f8044ec590,
>>>> vq=0x55f8037cceb0, idx=0) at ../hw/virtio/vhost.c:1241
>>>> 5  0x000055f800d7406c in vhost_dev_stop (hdev=hdev@entry=0x55f8037ccc30,
>>>> vdev=vdev@entry=0x55f8044ec590) at ../hw/virtio/vhost.c:1839
>>>> 6  0x000055f800bf00a7 in vhost_net_stop_one (net=0x55f8037ccc30,
>>>> dev=0x55f8044ec590) at ../hw/net/vhost_net.c:315
>>>> 7  0x000055f800bf0678 in vhost_net_stop (dev=dev@entry=0x55f8044ec590,
>>>> ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7,
>>>> cvq=cvq@entry=1)
>>>>       at ../hw/net/vhost_net.c:423
>>>> 8  0x000055f800d4e628 in virtio_net_set_status (status=<optimized out>,
>>>> n=0x55f8044ec590) at ../hw/net/virtio-net.c:296
>>>> 9  0x000055f800d4e628 in virtio_net_set_status
>>>> (vdev=vdev@entry=0x55f8044ec590, status=15 '\017') at
>>>> ../hw/net/virtio-net.c:370
>>> I don't understand why virtio_net_handle_ctrl() call virtio_net_set_stauts()...
>> The pending request left over on the ctrl vq was a VIRTIO_NET_CTRL_MQ
>> command, i.e. in virtio_net_handle_mq():
> Completely forget that the code was actually written by me :\
>
>> 1413     n->curr_queue_pairs = queue_pairs;
>> 1414     /* stop the backend before changing the number of queue_pairs
>> to avoid handling a
>> 1415      * disabled queue */
>> 1416     virtio_net_set_status(vdev, vdev->status);
>> 1417     virtio_net_set_queue_pairs(n);
>>
>> Noted before the vdpa multiqueue support, there was never a vhost_dev
>> for ctrl_vq exposed, i.e. there's no host notifier set up for the
>> ctrl_vq on vhost_kernel as it is emulated in QEMU software.
>>
>>>> 10 0x000055f800d534d8 in virtio_net_handle_ctrl (iov_cnt=<optimized
>>>> out>, iov=<optimized out>, cmd=0 '\000', n=0x55f8044ec590) at
>>>> ../hw/net/virtio-net.c:1408
>>>> 11 0x000055f800d534d8 in virtio_net_handle_ctrl (vdev=0x55f8044ec590,
>>>> vq=0x7fc1a7e888d0) at ../hw/net/virtio-net.c:1452
>>>> 12 0x000055f800d69f37 in virtio_queue_host_notifier_read
>>>> (vq=0x7fc1a7e888d0) at ../hw/virtio/virtio.c:2331
>>>> 13 0x000055f800d69f37 in virtio_queue_host_notifier_read
>>>> (n=n@entry=0x7fc1a7e8894c) at ../hw/virtio/virtio.c:3575
>>>> 14 0x000055f800c688e6 in virtio_bus_cleanup_host_notifier
>>>> (bus=<optimized out>, n=n@entry=14) at ../hw/virtio/virtio-bus.c:312
>>>> 15 0x000055f800d73106 in vhost_dev_disable_notifiers
>>>> (hdev=hdev@entry=0x55f8035b51b0, vdev=vdev@entry=0x55f8044ec590)
>>>>       at ../../../include/hw/virtio/virtio-bus.h:35
>>>> 16 0x000055f800bf00b2 in vhost_net_stop_one (net=0x55f8035b51b0,
>>>> dev=0x55f8044ec590) at ../hw/net/vhost_net.c:316
>>>> 17 0x000055f800bf0678 in vhost_net_stop (dev=dev@entry=0x55f8044ec590,
>>>> ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7,
>>>> cvq=cvq@entry=1)
>>>>       at ../hw/net/vhost_net.c:423
>>>> 18 0x000055f800d4e628 in virtio_net_set_status (status=<optimized out>,
>>>> n=0x55f8044ec590) at ../hw/net/virtio-net.c:296
>>>> 19 0x000055f800d4e628 in virtio_net_set_status (vdev=0x55f8044ec590,
>>>> status=15 '\017') at ../hw/net/virtio-net.c:370
>>>> 20 0x000055f800d6c4b2 in virtio_set_status (vdev=0x55f8044ec590,
>>>> val=<optimized out>) at ../hw/virtio/virtio.c:1945
>>>> 21 0x000055f800d11d9d in vm_state_notify (running=running@entry=false,
>>>> state=state@entry=RUN_STATE_SHUTDOWN) at ../softmmu/runstate.c:333
>>>> 22 0x000055f800d04e7a in do_vm_stop
>>>> (state=state@entry=RUN_STATE_SHUTDOWN, send_stop=send_stop@entry=false)
>>>> at ../softmmu/cpus.c:262
>>>> 23 0x000055f800d04e99 in vm_shutdown () at ../softmmu/cpus.c:280
>>>> 24 0x000055f800d126af in qemu_cleanup () at ../softmmu/runstate.c:812
>>>> 25 0x000055f800ad5b13 in main (argc=<optimized out>, argv=<optimized
>>>> out>, envp=<optimized out>) at ../softmmu/main.c:51
>>>>
>>>>    From the trace pending read only occurs in stop path. The recursive
>>>> virtio_net_set_status from virtio_net_handle_ctrl doesn't make sense to me.
>>> Yes, we need to figure this out to know the root cause.
>> I think it has something to do with the virtqueue unready issue that the
>> vhost_reset_device() refactoring series attempt to fix. If that is fixed
>> we should not see this sigsegv with mlx5_vdpa. However I guess we both
>> agreed that the vq_unready support would need new uAPI (some flag) to
>> define, hence this fix applies to the situation where uAPI doesn't exist
>> on the kernel or the vq_unready is not supported by vdpa vendor driver.
>>
> Yes.
>
>>> The code should work for the case when vhost-vdp fails to start.
>> Unlike the other datapath queues for net vdpa, the events left behind in
>> the control queue can't be processed by the userspace, as unlike
>> vhost-kernel we don't have a fallback path in the userspace.
> So that's the question, we should have a safe fallback.
>
>> To ignore
>> the pending event and let vhost vdpa process it on resume/start is
>> perhaps the best thing to do. This is even true for datapath queues for
>> other vdpa devices than of network.
>>
>>>> Not sure I got the reason why we need to handle pending host
>>>> notification in userland vq, can you elaborate?
>>> Because vhost-vDPA fails to start, we will "fallback" to a dummy userspace.
>> Is the dummy userspace working or operational? What's the use case of
>> this "fallback" dummy if what guest user can get is a busted NIC?
> The problem is we can't do better in this case now. Such fallack (e.g
> for vhost-user) has been used for years. Or do you have any better
> ideas?
In my opinion if vhost-vdpa or vhost-user fails to start, maybe we 
should try to disable the device via virtio_error(), which would set 
broken to true and set NEEDS_RESET in case of VERSION_1. That way the 
device won't move forward further and the guest may get the indication 
via config interrupt that something had gone wrong underneath. If device 
reset is well supported there the guest driver would retry. This can at 
least give the backend some chance to recover if running into 
intermittent error. The worst result would be the device keeps resetting 
repeatedly, for which we may introduce tunable to control the rate if 
seeing reset occurs too often.. Did this ever get considered before?

Noted that the dummy userspace can't handle any control vq command 
effectively once the vhost backend fails, for e.g. how does it handle 
those guest offload, rx mode, MAC or VLAN filter changes without sending 
the request down to the backend? This could easily get inconsistent 
state to the guest if somehow we are able to resume the virtqueue 
without a reset. Even so, I suspect the device reset eventually is still 
needed on the other part, which is subject to the specific failure. It 
looks to me at least for vhost-vdpa, it might be the safest fallback so 
far to ignore pending event in ctrl_vq, and disable the device from 
moving forward in case of backend start failure.

>
> It doesn't differ too much of the two approaches:
>
> 1) dummy fallback which can do even cvq
>
> and
>
> 2) disable host notifiers
>
> Especially consider 2) requires more changes.
I'm not clear if 2) really needs more changes, as it seems to me that it 
would take more unwarranted changes to make dummy fallback to work on 
cvq? And suppose we can fallback to disabling device via virtio_error(), 
we don't even need to change any code on cvq?

On the other hand, for the specific code path this patch tries to fix, 
it is not due to failure to start vhost-vdpa backend, but more of a 
control flow flaw in the stop path due to lack of VQ stop uAPI. Let 
alone dummy or host notifier, considering currently it's in the stop 
path followed by a reset, I feel it should be pretty safe to just ignore 
the pending event on the control vq rather than process it prematurely 
in userspace. What do you think? I can leave without the host notifier 
handler change for sure.

>
>> I
>> think this is very different from the vhost-kernel case in that once
>> vhost fails, we can fallback to userspace to emulate the network through
>> the tap fd in a good way. However, there's no equivalent yet for
>> vhost-vdpa...
>>
> As said previously, technically we can have vhost-vDPA network backend
> as a fallback.
But this is working as yet. And how do you envision the datapath may 
work given that we don't have a fallback tap fd?

-Siwei


>   (So did for vhost-user).
>
> Thanks
>
>> Thanks,
>> -Siwei
>>
>>> Thanks
>>>
>>>> Thanks,
>>>> -Siwei
>>>>
>>>>> Thanks
>>>>>
>>>>>> 7  0x0000558f52c6b89a in virtio_pci_set_guest_notifier (d=d@entry=0x558f568f0f60, n=n@entry=2, assign=assign@entry=true, with_irqfd=with_irqfd@entry=false)
>>>>>>       at ../hw/virtio/virtio-pci.c:974
>>>>>> 8  0x0000558f52c6c0d8 in virtio_pci_set_guest_notifiers (d=0x558f568f0f60, nvqs=3, assign=true) at ../hw/virtio/virtio-pci.c:1019
>>>>>> 9  0x0000558f52bf091d in vhost_net_start (dev=dev@entry=0x558f568f91f0, ncs=0x558f56937cd0, data_queue_pairs=data_queue_pairs@entry=1, cvq=cvq@entry=1)
>>>>>>       at ../hw/net/vhost_net.c:361
>>>>>> 10 0x0000558f52d4e5e7 in virtio_net_set_status (status=<optimized out>, n=0x558f568f91f0) at ../hw/net/virtio-net.c:289
>>>>>> 11 0x0000558f52d4e5e7 in virtio_net_set_status (vdev=0x558f568f91f0, status=15 '\017') at ../hw/net/virtio-net.c:370
>>>>>> 12 0x0000558f52d6c4b2 in virtio_set_status (vdev=vdev@entry=0x558f568f91f0, val=val@entry=15 '\017') at ../hw/virtio/virtio.c:1945
>>>>>> 13 0x0000558f52c69eff in virtio_pci_common_write (opaque=0x558f568f0f60, addr=<optimized out>, val=<optimized out>, size=<optimized out>) at ../hw/virtio/virtio-pci.c:1292
>>>>>> 14 0x0000558f52d15d6e in memory_region_write_accessor (mr=0x558f568f19d0, addr=20, value=<optimized out>, size=1, shift=<optimized out>, mask=<optimized out>, attrs=...)
>>>>>>       at ../softmmu/memory.c:492
>>>>>> 15 0x0000558f52d127de in access_with_adjusted_size (addr=addr@entry=20, value=value@entry=0x7f8cdbffe748, size=size@entry=1, access_size_min=<optimized out>, access_size_max=<optimized out>, access_fn=0x558f52d15cf0 <memory_region_write_accessor>, mr=0x558f568f19d0, attrs=...) at ../softmmu/memory.c:554
>>>>>> 16 0x0000558f52d157ef in memory_region_dispatch_write (mr=mr@entry=0x558f568f19d0, addr=20, data=<optimized out>, op=<optimized out>, attrs=attrs@entry=...)
>>>>>>       at ../softmmu/memory.c:1504
>>>>>> 17 0x0000558f52d078e7 in flatview_write_continue (fv=fv@entry=0x7f8accbc3b90, addr=addr@entry=103079215124, attrs=..., ptr=ptr@entry=0x7f8ce6300028, len=len@entry=1, addr1=<optimized out>, l=<optimized out>, mr=0x558f568f19d0) at ../../../include/qemu/host-utils.h:165
>>>>>> 18 0x0000558f52d07b06 in flatview_write (fv=0x7f8accbc3b90, addr=103079215124, attrs=..., buf=0x7f8ce6300028, len=1) at ../softmmu/physmem.c:2822
>>>>>> 19 0x0000558f52d0b36b in address_space_write (as=<optimized out>, addr=<optimized out>, attrs=..., buf=buf@entry=0x7f8ce6300028, len=<optimized out>)
>>>>>>       at ../softmmu/physmem.c:2914
>>>>>> 20 0x0000558f52d0b3da in address_space_rw (as=<optimized out>, addr=<optimized out>, attrs=...,
>>>>>>       attrs@entry=..., buf=buf@entry=0x7f8ce6300028, len=<optimized out>, is_write=<optimized out>) at ../softmmu/physmem.c:2924
>>>>>> 21 0x0000558f52dced09 in kvm_cpu_exec (cpu=cpu@entry=0x558f55c2da60) at ../accel/kvm/kvm-all.c:2903
>>>>>> 22 0x0000558f52dcfabd in kvm_vcpu_thread_fn (arg=arg@entry=0x558f55c2da60) at ../accel/kvm/kvm-accel-ops.c:49
>>>>>> 23 0x0000558f52f9f04a in qemu_thread_start (args=<optimized out>) at ../util/qemu-thread-posix.c:556
>>>>>> 24 0x00007f8ce4392ea5 in start_thread () at /lib64/libpthread.so.0
>>>>>> 25 0x00007f8ce40bb9fd in clone () at /lib64/libc.so.6
>>>>>>
>>>>>> Fixes: 4023784 ("vhost-vdpa: multiqueue support")
>>>>>> Cc: Jason Wang <jasowang@redhat.com>
>>>>>> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
>>>>>> ---
>>>>>>     hw/virtio/virtio-bus.c | 3 ++-
>>>>>>     1 file changed, 2 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
>>>>>> index 0f69d1c..3159b58 100644
>>>>>> --- a/hw/virtio/virtio-bus.c
>>>>>> +++ b/hw/virtio/virtio-bus.c
>>>>>> @@ -311,7 +311,8 @@ void virtio_bus_cleanup_host_notifier(VirtioBusState *bus, int n)
>>>>>>         /* Test and clear notifier after disabling event,
>>>>>>          * in case poll callback didn't have time to run.
>>>>>>          */
>>>>>> -    virtio_queue_host_notifier_read(notifier);
>>>>>> +    if (!vdev->disable_ioeventfd_handler)
>>>>>> +        virtio_queue_host_notifier_read(notifier);
>>>>>>         event_notifier_cleanup(notifier);
>>>>>>     }
>>>>>>
>>>>>> --
>>>>>> 1.8.3.1
>>>>>>
Jason Wang April 7, 2022, 7:05 a.m. UTC | #7
在 2022/4/6 上午3:18, Si-Wei Liu 写道:
>
>
> On 4/1/2022 7:00 PM, Jason Wang wrote:
>> On Sat, Apr 2, 2022 at 4:37 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>>>
>>>
>>> On 3/31/2022 1:36 AM, Jason Wang wrote:
>>>> On Thu, Mar 31, 2022 at 12:41 AM Si-Wei Liu <si-wei.liu@oracle.com> 
>>>> wrote:
>>>>>
>>>>> On 3/30/2022 2:14 AM, Jason Wang wrote:
>>>>>> On Wed, Mar 30, 2022 at 2:33 PM Si-Wei Liu 
>>>>>> <si-wei.liu@oracle.com> wrote:
>>>>>>> Previous commit prevents vhost-user and vhost-vdpa from using
>>>>>>> userland vq handler via disable_ioeventfd_handler. The same
>>>>>>> needs to be done for host notifier cleanup too, as the
>>>>>>> virtio_queue_host_notifier_read handler still tends to read
>>>>>>> pending event left behind on ioeventfd and attempts to handle
>>>>>>> outstanding kicks from QEMU userland vq.
>>>>>>>
>>>>>>> If vq handler is not disabled on cleanup, it may lead to sigsegv
>>>>>>> with recursive virtio_net_set_status call on the control vq:
>>>>>>>
>>>>>>> 0  0x00007f8ce3ff3387 in raise () at /lib64/libc.so.6
>>>>>>> 1  0x00007f8ce3ff4a78 in abort () at /lib64/libc.so.6
>>>>>>> 2  0x00007f8ce3fec1a6 in __assert_fail_base () at /lib64/libc.so.6
>>>>>>> 3  0x00007f8ce3fec252 in  () at /lib64/libc.so.6
>>>>>>> 4  0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=<optimized 
>>>>>>> out>, idx=<optimized out>) at ../hw/virtio/vhost-vdpa.c:563
>>>>>>> 5  0x0000558f52d79421 in vhost_vdpa_get_vq_index (dev=<optimized 
>>>>>>> out>, idx=<optimized out>) at ../hw/virtio/vhost-vdpa.c:558
>>>>>>> 6  0x0000558f52d7329a in vhost_virtqueue_mask 
>>>>>>> (hdev=0x558f55c01800, vdev=0x558f568f91f0, n=2, mask=<optimized 
>>>>>>> out>) at ../hw/virtio/vhost.c:1557
>>>>>> I feel it's probably a bug elsewhere e.g when we fail to start
>>>>>> vhost-vDPA, it's the charge of the Qemu to poll host notifier and we
>>>>>> will fallback to the userspace vq handler.
>>>>> Apologies, an incorrect stack trace was pasted which actually came 
>>>>> from
>>>>> patch #1. I will post a v2 with the corresponding one as below:
>>>>>
>>>>> 0  0x000055f800df1780 in qdev_get_parent_bus (dev=0x0) at
>>>>> ../hw/core/qdev.c:376
>>>>> 1  0x000055f800c68ad8 in virtio_bus_device_iommu_enabled
>>>>> (vdev=vdev@entry=0x0) at ../hw/virtio/virtio-bus.c:331
>>>>> 2  0x000055f800d70d7f in vhost_memory_unmap (dev=<optimized out>) at
>>>>> ../hw/virtio/vhost.c:318
>>>>> 3  0x000055f800d70d7f in vhost_memory_unmap (dev=<optimized out>,
>>>>> buffer=0x7fc19bec5240, len=2052, is_write=1, access_len=2052) at
>>>>> ../hw/virtio/vhost.c:336
>>>>> 4  0x000055f800d71867 in vhost_virtqueue_stop
>>>>> (dev=dev@entry=0x55f8037ccc30, vdev=vdev@entry=0x55f8044ec590,
>>>>> vq=0x55f8037cceb0, idx=0) at ../hw/virtio/vhost.c:1241
>>>>> 5  0x000055f800d7406c in vhost_dev_stop 
>>>>> (hdev=hdev@entry=0x55f8037ccc30,
>>>>> vdev=vdev@entry=0x55f8044ec590) at ../hw/virtio/vhost.c:1839
>>>>> 6  0x000055f800bf00a7 in vhost_net_stop_one (net=0x55f8037ccc30,
>>>>> dev=0x55f8044ec590) at ../hw/net/vhost_net.c:315
>>>>> 7  0x000055f800bf0678 in vhost_net_stop 
>>>>> (dev=dev@entry=0x55f8044ec590,
>>>>> ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7,
>>>>> cvq=cvq@entry=1)
>>>>>       at ../hw/net/vhost_net.c:423
>>>>> 8  0x000055f800d4e628 in virtio_net_set_status (status=<optimized 
>>>>> out>,
>>>>> n=0x55f8044ec590) at ../hw/net/virtio-net.c:296
>>>>> 9  0x000055f800d4e628 in virtio_net_set_status
>>>>> (vdev=vdev@entry=0x55f8044ec590, status=15 '\017') at
>>>>> ../hw/net/virtio-net.c:370
>>>> I don't understand why virtio_net_handle_ctrl() call 
>>>> virtio_net_set_stauts()...
>>> The pending request left over on the ctrl vq was a VIRTIO_NET_CTRL_MQ
>>> command, i.e. in virtio_net_handle_mq():
>> Completely forget that the code was actually written by me :\
>>
>>> 1413     n->curr_queue_pairs = queue_pairs;
>>> 1414     /* stop the backend before changing the number of queue_pairs
>>> to avoid handling a
>>> 1415      * disabled queue */
>>> 1416     virtio_net_set_status(vdev, vdev->status);
>>> 1417     virtio_net_set_queue_pairs(n);
>>>
>>> Noted before the vdpa multiqueue support, there was never a vhost_dev
>>> for ctrl_vq exposed, i.e. there's no host notifier set up for the
>>> ctrl_vq on vhost_kernel as it is emulated in QEMU software.
>>>
>>>>> 10 0x000055f800d534d8 in virtio_net_handle_ctrl (iov_cnt=<optimized
>>>>> out>, iov=<optimized out>, cmd=0 '\000', n=0x55f8044ec590) at
>>>>> ../hw/net/virtio-net.c:1408
>>>>> 11 0x000055f800d534d8 in virtio_net_handle_ctrl (vdev=0x55f8044ec590,
>>>>> vq=0x7fc1a7e888d0) at ../hw/net/virtio-net.c:1452
>>>>> 12 0x000055f800d69f37 in virtio_queue_host_notifier_read
>>>>> (vq=0x7fc1a7e888d0) at ../hw/virtio/virtio.c:2331
>>>>> 13 0x000055f800d69f37 in virtio_queue_host_notifier_read
>>>>> (n=n@entry=0x7fc1a7e8894c) at ../hw/virtio/virtio.c:3575
>>>>> 14 0x000055f800c688e6 in virtio_bus_cleanup_host_notifier
>>>>> (bus=<optimized out>, n=n@entry=14) at ../hw/virtio/virtio-bus.c:312
>>>>> 15 0x000055f800d73106 in vhost_dev_disable_notifiers
>>>>> (hdev=hdev@entry=0x55f8035b51b0, vdev=vdev@entry=0x55f8044ec590)
>>>>>       at ../../../include/hw/virtio/virtio-bus.h:35
>>>>> 16 0x000055f800bf00b2 in vhost_net_stop_one (net=0x55f8035b51b0,
>>>>> dev=0x55f8044ec590) at ../hw/net/vhost_net.c:316
>>>>> 17 0x000055f800bf0678 in vhost_net_stop 
>>>>> (dev=dev@entry=0x55f8044ec590,
>>>>> ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7,
>>>>> cvq=cvq@entry=1)
>>>>>       at ../hw/net/vhost_net.c:423
>>>>> 18 0x000055f800d4e628 in virtio_net_set_status (status=<optimized 
>>>>> out>,
>>>>> n=0x55f8044ec590) at ../hw/net/virtio-net.c:296
>>>>> 19 0x000055f800d4e628 in virtio_net_set_status (vdev=0x55f8044ec590,
>>>>> status=15 '\017') at ../hw/net/virtio-net.c:370
>>>>> 20 0x000055f800d6c4b2 in virtio_set_status (vdev=0x55f8044ec590,
>>>>> val=<optimized out>) at ../hw/virtio/virtio.c:1945
>>>>> 21 0x000055f800d11d9d in vm_state_notify 
>>>>> (running=running@entry=false,
>>>>> state=state@entry=RUN_STATE_SHUTDOWN) at ../softmmu/runstate.c:333
>>>>> 22 0x000055f800d04e7a in do_vm_stop
>>>>> (state=state@entry=RUN_STATE_SHUTDOWN, 
>>>>> send_stop=send_stop@entry=false)
>>>>> at ../softmmu/cpus.c:262
>>>>> 23 0x000055f800d04e99 in vm_shutdown () at ../softmmu/cpus.c:280
>>>>> 24 0x000055f800d126af in qemu_cleanup () at ../softmmu/runstate.c:812
>>>>> 25 0x000055f800ad5b13 in main (argc=<optimized out>, argv=<optimized
>>>>> out>, envp=<optimized out>) at ../softmmu/main.c:51
>>>>>
>>>>>    From the trace pending read only occurs in stop path. The 
>>>>> recursive
>>>>> virtio_net_set_status from virtio_net_handle_ctrl doesn't make 
>>>>> sense to me.
>>>> Yes, we need to figure this out to know the root cause.
>>> I think it has something to do with the virtqueue unready issue that 
>>> the
>>> vhost_reset_device() refactoring series attempt to fix. If that is 
>>> fixed
>>> we should not see this sigsegv with mlx5_vdpa. However I guess we both
>>> agreed that the vq_unready support would need new uAPI (some flag) to
>>> define, hence this fix applies to the situation where uAPI doesn't 
>>> exist
>>> on the kernel or the vq_unready is not supported by vdpa vendor driver.
>>>
>> Yes.
>>
>>>> The code should work for the case when vhost-vdp fails to start.
>>> Unlike the other datapath queues for net vdpa, the events left 
>>> behind in
>>> the control queue can't be processed by the userspace, as unlike
>>> vhost-kernel we don't have a fallback path in the userspace.
>> So that's the question, we should have a safe fallback.
>>
>>> To ignore
>>> the pending event and let vhost vdpa process it on resume/start is
>>> perhaps the best thing to do. This is even true for datapath queues for
>>> other vdpa devices than of network.
>>>
>>>>> Not sure I got the reason why we need to handle pending host
>>>>> notification in userland vq, can you elaborate?
>>>> Because vhost-vDPA fails to start, we will "fallback" to a dummy 
>>>> userspace.
>>> Is the dummy userspace working or operational? What's the use case of
>>> this "fallback" dummy if what guest user can get is a busted NIC?
>> The problem is we can't do better in this case now. Such fallack (e.g
>> for vhost-user) has been used for years. Or do you have any better
>> ideas?
> In my opinion if vhost-vdpa or vhost-user fails to start, maybe we 
> should try to disable the device via virtio_error(), which would set 
> broken to true and set NEEDS_RESET in case of VERSION_1. That way the 
> device won't move forward further and the guest may get the indication 
> via config interrupt that something had gone wrong underneath. If 
> device reset is well supported there the guest driver would retry.


Note that the NEEDS_RESET is not implemented in the current Linux drivers.


> This can at least give the backend some chance to recover if running 
> into intermittent error. The worst result would be the device keeps 
> resetting repeatedly, for which we may introduce tunable to control 
> the rate if seeing reset occurs too often.. Did this ever get 
> considered before?


I don't know, but we manage to survive with such fallback for years. We 
can do this, but can virtio_error() fix the issue you describe here?


>
> Noted that the dummy userspace can't handle any control vq command 
> effectively once the vhost backend fails, for e.g. how does it handle 
> those guest offload, rx mode, MAC or VLAN filter changes without 
> sending the request down to the backend? 


It should be no difference compared to the real hardware. The device is 
just malfunction. The driver can detect this in many ways. E.g in the 
past I suggest to implement the device watchdog for virtio-net, the 
prototype is posted but for some reason it was stalled. Maybe we can 
consider to continue the work.


> This could easily get inconsistent state to the guest if somehow we 
> are able to resume the virtqueue without a reset. Even so, I suspect 
> the device reset eventually is still needed on the other part, which 
> is subject to the specific failure. It looks to me at least for 
> vhost-vdpa, it might be the safest fallback so far to ignore pending 
> event in ctrl_vq, and disable the device from moving forward in case 
> of backend start failure.


I don't get here, if we fail to start vhost-vdpa, the Qemu should do a 
safe rewind otherwise it would be a bug.


>
>>
>> It doesn't differ too much of the two approaches:
>>
>> 1) dummy fallback which can do even cvq
>>
>> and
>>
>> 2) disable host notifiers
>>
>> Especially consider 2) requires more changes.
> I'm not clear if 2) really needs more changes, as it seems to me that 
> it would take more unwarranted changes to make dummy fallback to work 
> on cvq? And suppose we can fallback to disabling device via 
> virtio_error(), we don't even need to change any code on cvq?


So let me explain my points:

1) we use dummy receive as a fallback as vhost-user

2) the code should safely fallback to that otherwise it could be a bug 
elsewhere

3) if we think the dummy fallback doesn't make sense, we can improve, 
but we need to figure out why we can crash for 2) since the code could 
be used in other path.


>
> On the other hand, for the specific code path this patch tries to fix, 
> it is not due to failure to start vhost-vdpa backend, but more of a 
> control flow flaw in the stop path due to lack of VQ stop uAPI. Let 
> alone dummy or host notifier, considering currently it's in the stop 
> path followed by a reset, I feel it should be pretty safe to just 
> ignore the pending event on the control vq rather than process it 
> prematurely in userspace. What do you think? I can leave without the 
> host notifier handler change for sure.


I wonder how vhost-user deal with this.


>
>>
>>> I
>>> think this is very different from the vhost-kernel case in that once
>>> vhost fails, we can fallback to userspace to emulate the network 
>>> through
>>> the tap fd in a good way. However, there's no equivalent yet for
>>> vhost-vdpa...
>>>
>> As said previously, technically we can have vhost-vDPA network backend
>> as a fallback.
> But this is working as yet. And how do you envision the datapath may 
> work given that we don't have a fallback tap fd?


I mean we can treat vhost-vdpa as a kind of general networking backend 
that could be used by all NIC model like e1000e. Then we can use that as 
a fallback.

But I'm not sure it's worth to bother.

Thanks


>
> -Siwei
>
>
>>   (So did for vhost-user).
>>
>> Thanks
>>
>>> Thanks,
>>> -Siwei
>>>
>>>> Thanks
>>>>
>>>>> Thanks,
>>>>> -Siwei
>>>>>
>>>>>> Thanks
>>>>>>
>>>>>>> 7  0x0000558f52c6b89a in virtio_pci_set_guest_notifier 
>>>>>>> (d=d@entry=0x558f568f0f60, n=n@entry=2, 
>>>>>>> assign=assign@entry=true, with_irqfd=with_irqfd@entry=false)
>>>>>>>       at ../hw/virtio/virtio-pci.c:974
>>>>>>> 8  0x0000558f52c6c0d8 in virtio_pci_set_guest_notifiers 
>>>>>>> (d=0x558f568f0f60, nvqs=3, assign=true) at 
>>>>>>> ../hw/virtio/virtio-pci.c:1019
>>>>>>> 9  0x0000558f52bf091d in vhost_net_start 
>>>>>>> (dev=dev@entry=0x558f568f91f0, ncs=0x558f56937cd0, 
>>>>>>> data_queue_pairs=data_queue_pairs@entry=1, cvq=cvq@entry=1)
>>>>>>>       at ../hw/net/vhost_net.c:361
>>>>>>> 10 0x0000558f52d4e5e7 in virtio_net_set_status 
>>>>>>> (status=<optimized out>, n=0x558f568f91f0) at 
>>>>>>> ../hw/net/virtio-net.c:289
>>>>>>> 11 0x0000558f52d4e5e7 in virtio_net_set_status 
>>>>>>> (vdev=0x558f568f91f0, status=15 '\017') at 
>>>>>>> ../hw/net/virtio-net.c:370
>>>>>>> 12 0x0000558f52d6c4b2 in virtio_set_status 
>>>>>>> (vdev=vdev@entry=0x558f568f91f0, val=val@entry=15 '\017') at 
>>>>>>> ../hw/virtio/virtio.c:1945
>>>>>>> 13 0x0000558f52c69eff in virtio_pci_common_write 
>>>>>>> (opaque=0x558f568f0f60, addr=<optimized out>, val=<optimized 
>>>>>>> out>, size=<optimized out>) at ../hw/virtio/virtio-pci.c:1292
>>>>>>> 14 0x0000558f52d15d6e in memory_region_write_accessor 
>>>>>>> (mr=0x558f568f19d0, addr=20, value=<optimized out>, size=1, 
>>>>>>> shift=<optimized out>, mask=<optimized out>, attrs=...)
>>>>>>>       at ../softmmu/memory.c:492
>>>>>>> 15 0x0000558f52d127de in access_with_adjusted_size 
>>>>>>> (addr=addr@entry=20, value=value@entry=0x7f8cdbffe748, 
>>>>>>> size=size@entry=1, access_size_min=<optimized out>, 
>>>>>>> access_size_max=<optimized out>, access_fn=0x558f52d15cf0 
>>>>>>> <memory_region_write_accessor>, mr=0x558f568f19d0, attrs=...) at 
>>>>>>> ../softmmu/memory.c:554
>>>>>>> 16 0x0000558f52d157ef in memory_region_dispatch_write 
>>>>>>> (mr=mr@entry=0x558f568f19d0, addr=20, data=<optimized out>, 
>>>>>>> op=<optimized out>, attrs=attrs@entry=...)
>>>>>>>       at ../softmmu/memory.c:1504
>>>>>>> 17 0x0000558f52d078e7 in flatview_write_continue 
>>>>>>> (fv=fv@entry=0x7f8accbc3b90, addr=addr@entry=103079215124, 
>>>>>>> attrs=..., ptr=ptr@entry=0x7f8ce6300028, len=len@entry=1, 
>>>>>>> addr1=<optimized out>, l=<optimized out>, mr=0x558f568f19d0) at 
>>>>>>> ../../../include/qemu/host-utils.h:165
>>>>>>> 18 0x0000558f52d07b06 in flatview_write (fv=0x7f8accbc3b90, 
>>>>>>> addr=103079215124, attrs=..., buf=0x7f8ce6300028, len=1) at 
>>>>>>> ../softmmu/physmem.c:2822
>>>>>>> 19 0x0000558f52d0b36b in address_space_write (as=<optimized 
>>>>>>> out>, addr=<optimized out>, attrs=..., 
>>>>>>> buf=buf@entry=0x7f8ce6300028, len=<optimized out>)
>>>>>>>       at ../softmmu/physmem.c:2914
>>>>>>> 20 0x0000558f52d0b3da in address_space_rw (as=<optimized out>, 
>>>>>>> addr=<optimized out>, attrs=...,
>>>>>>>       attrs@entry=..., buf=buf@entry=0x7f8ce6300028, 
>>>>>>> len=<optimized out>, is_write=<optimized out>) at 
>>>>>>> ../softmmu/physmem.c:2924
>>>>>>> 21 0x0000558f52dced09 in kvm_cpu_exec 
>>>>>>> (cpu=cpu@entry=0x558f55c2da60) at ../accel/kvm/kvm-all.c:2903
>>>>>>> 22 0x0000558f52dcfabd in kvm_vcpu_thread_fn 
>>>>>>> (arg=arg@entry=0x558f55c2da60) at ../accel/kvm/kvm-accel-ops.c:49
>>>>>>> 23 0x0000558f52f9f04a in qemu_thread_start (args=<optimized 
>>>>>>> out>) at ../util/qemu-thread-posix.c:556
>>>>>>> 24 0x00007f8ce4392ea5 in start_thread () at /lib64/libpthread.so.0
>>>>>>> 25 0x00007f8ce40bb9fd in clone () at /lib64/libc.so.6
>>>>>>>
>>>>>>> Fixes: 4023784 ("vhost-vdpa: multiqueue support")
>>>>>>> Cc: Jason Wang <jasowang@redhat.com>
>>>>>>> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
>>>>>>> ---
>>>>>>>     hw/virtio/virtio-bus.c | 3 ++-
>>>>>>>     1 file changed, 2 insertions(+), 1 deletion(-)
>>>>>>>
>>>>>>> diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
>>>>>>> index 0f69d1c..3159b58 100644
>>>>>>> --- a/hw/virtio/virtio-bus.c
>>>>>>> +++ b/hw/virtio/virtio-bus.c
>>>>>>> @@ -311,7 +311,8 @@ void 
>>>>>>> virtio_bus_cleanup_host_notifier(VirtioBusState *bus, int n)
>>>>>>>         /* Test and clear notifier after disabling event,
>>>>>>>          * in case poll callback didn't have time to run.
>>>>>>>          */
>>>>>>> -    virtio_queue_host_notifier_read(notifier);
>>>>>>> +    if (!vdev->disable_ioeventfd_handler)
>>>>>>> +        virtio_queue_host_notifier_read(notifier);
>>>>>>>         event_notifier_cleanup(notifier);
>>>>>>>     }
>>>>>>>
>>>>>>> -- 
>>>>>>> 1.8.3.1
>>>>>>>
>
Si-Wei Liu April 8, 2022, 1:02 a.m. UTC | #8
On 4/7/2022 12:05 AM, Jason Wang wrote:
>
> 在 2022/4/6 上午3:18, Si-Wei Liu 写道:
>>
>>
>> On 4/1/2022 7:00 PM, Jason Wang wrote:
>>> On Sat, Apr 2, 2022 at 4:37 AM Si-Wei Liu <si-wei.liu@oracle.com> 
>>> wrote:
>>>>
>>>>
>>>> On 3/31/2022 1:36 AM, Jason Wang wrote:
>>>>> On Thu, Mar 31, 2022 at 12:41 AM Si-Wei Liu 
>>>>> <si-wei.liu@oracle.com> wrote:
>>>>>>
>>>>>> On 3/30/2022 2:14 AM, Jason Wang wrote:
>>>>>>> On Wed, Mar 30, 2022 at 2:33 PM Si-Wei Liu 
>>>>>>> <si-wei.liu@oracle.com> wrote:
>>>>>>>> Previous commit prevents vhost-user and vhost-vdpa from using
>>>>>>>> userland vq handler via disable_ioeventfd_handler. The same
>>>>>>>> needs to be done for host notifier cleanup too, as the
>>>>>>>> virtio_queue_host_notifier_read handler still tends to read
>>>>>>>> pending event left behind on ioeventfd and attempts to handle
>>>>>>>> outstanding kicks from QEMU userland vq.
>>>>>>>>
>>>>>>>> If vq handler is not disabled on cleanup, it may lead to sigsegv
>>>>>>>> with recursive virtio_net_set_status call on the control vq:
>>>>>>>>
>>>>>>>> 0  0x00007f8ce3ff3387 in raise () at /lib64/libc.so.6
>>>>>>>> 1  0x00007f8ce3ff4a78 in abort () at /lib64/libc.so.6
>>>>>>>> 2  0x00007f8ce3fec1a6 in __assert_fail_base () at /lib64/libc.so.6
>>>>>>>> 3  0x00007f8ce3fec252 in  () at /lib64/libc.so.6
>>>>>>>> 4  0x0000558f52d79421 in vhost_vdpa_get_vq_index 
>>>>>>>> (dev=<optimized out>, idx=<optimized out>) at 
>>>>>>>> ../hw/virtio/vhost-vdpa.c:563
>>>>>>>> 5  0x0000558f52d79421 in vhost_vdpa_get_vq_index 
>>>>>>>> (dev=<optimized out>, idx=<optimized out>) at 
>>>>>>>> ../hw/virtio/vhost-vdpa.c:558
>>>>>>>> 6  0x0000558f52d7329a in vhost_virtqueue_mask 
>>>>>>>> (hdev=0x558f55c01800, vdev=0x558f568f91f0, n=2, mask=<optimized 
>>>>>>>> out>) at ../hw/virtio/vhost.c:1557
>>>>>>> I feel it's probably a bug elsewhere e.g when we fail to start
>>>>>>> vhost-vDPA, it's the charge of the Qemu to poll host notifier 
>>>>>>> and we
>>>>>>> will fallback to the userspace vq handler.
>>>>>> Apologies, an incorrect stack trace was pasted which actually 
>>>>>> came from
>>>>>> patch #1. I will post a v2 with the corresponding one as below:
>>>>>>
>>>>>> 0  0x000055f800df1780 in qdev_get_parent_bus (dev=0x0) at
>>>>>> ../hw/core/qdev.c:376
>>>>>> 1  0x000055f800c68ad8 in virtio_bus_device_iommu_enabled
>>>>>> (vdev=vdev@entry=0x0) at ../hw/virtio/virtio-bus.c:331
>>>>>> 2  0x000055f800d70d7f in vhost_memory_unmap (dev=<optimized out>) at
>>>>>> ../hw/virtio/vhost.c:318
>>>>>> 3  0x000055f800d70d7f in vhost_memory_unmap (dev=<optimized out>,
>>>>>> buffer=0x7fc19bec5240, len=2052, is_write=1, access_len=2052) at
>>>>>> ../hw/virtio/vhost.c:336
>>>>>> 4  0x000055f800d71867 in vhost_virtqueue_stop
>>>>>> (dev=dev@entry=0x55f8037ccc30, vdev=vdev@entry=0x55f8044ec590,
>>>>>> vq=0x55f8037cceb0, idx=0) at ../hw/virtio/vhost.c:1241
>>>>>> 5  0x000055f800d7406c in vhost_dev_stop 
>>>>>> (hdev=hdev@entry=0x55f8037ccc30,
>>>>>> vdev=vdev@entry=0x55f8044ec590) at ../hw/virtio/vhost.c:1839
>>>>>> 6  0x000055f800bf00a7 in vhost_net_stop_one (net=0x55f8037ccc30,
>>>>>> dev=0x55f8044ec590) at ../hw/net/vhost_net.c:315
>>>>>> 7  0x000055f800bf0678 in vhost_net_stop 
>>>>>> (dev=dev@entry=0x55f8044ec590,
>>>>>> ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7,
>>>>>> cvq=cvq@entry=1)
>>>>>>       at ../hw/net/vhost_net.c:423
>>>>>> 8  0x000055f800d4e628 in virtio_net_set_status (status=<optimized 
>>>>>> out>,
>>>>>> n=0x55f8044ec590) at ../hw/net/virtio-net.c:296
>>>>>> 9  0x000055f800d4e628 in virtio_net_set_status
>>>>>> (vdev=vdev@entry=0x55f8044ec590, status=15 '\017') at
>>>>>> ../hw/net/virtio-net.c:370
>>>>> I don't understand why virtio_net_handle_ctrl() call 
>>>>> virtio_net_set_stauts()...
>>>> The pending request left over on the ctrl vq was a VIRTIO_NET_CTRL_MQ
>>>> command, i.e. in virtio_net_handle_mq():
>>> Completely forget that the code was actually written by me :\
>>>
>>>> 1413     n->curr_queue_pairs = queue_pairs;
>>>> 1414     /* stop the backend before changing the number of queue_pairs
>>>> to avoid handling a
>>>> 1415      * disabled queue */
>>>> 1416     virtio_net_set_status(vdev, vdev->status);
>>>> 1417     virtio_net_set_queue_pairs(n);
>>>>
>>>> Noted before the vdpa multiqueue support, there was never a vhost_dev
>>>> for ctrl_vq exposed, i.e. there's no host notifier set up for the
>>>> ctrl_vq on vhost_kernel as it is emulated in QEMU software.
>>>>
>>>>>> 10 0x000055f800d534d8 in virtio_net_handle_ctrl (iov_cnt=<optimized
>>>>>> out>, iov=<optimized out>, cmd=0 '\000', n=0x55f8044ec590) at
>>>>>> ../hw/net/virtio-net.c:1408
>>>>>> 11 0x000055f800d534d8 in virtio_net_handle_ctrl 
>>>>>> (vdev=0x55f8044ec590,
>>>>>> vq=0x7fc1a7e888d0) at ../hw/net/virtio-net.c:1452
>>>>>> 12 0x000055f800d69f37 in virtio_queue_host_notifier_read
>>>>>> (vq=0x7fc1a7e888d0) at ../hw/virtio/virtio.c:2331
>>>>>> 13 0x000055f800d69f37 in virtio_queue_host_notifier_read
>>>>>> (n=n@entry=0x7fc1a7e8894c) at ../hw/virtio/virtio.c:3575
>>>>>> 14 0x000055f800c688e6 in virtio_bus_cleanup_host_notifier
>>>>>> (bus=<optimized out>, n=n@entry=14) at ../hw/virtio/virtio-bus.c:312
>>>>>> 15 0x000055f800d73106 in vhost_dev_disable_notifiers
>>>>>> (hdev=hdev@entry=0x55f8035b51b0, vdev=vdev@entry=0x55f8044ec590)
>>>>>>       at ../../../include/hw/virtio/virtio-bus.h:35
>>>>>> 16 0x000055f800bf00b2 in vhost_net_stop_one (net=0x55f8035b51b0,
>>>>>> dev=0x55f8044ec590) at ../hw/net/vhost_net.c:316
>>>>>> 17 0x000055f800bf0678 in vhost_net_stop 
>>>>>> (dev=dev@entry=0x55f8044ec590,
>>>>>> ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7,
>>>>>> cvq=cvq@entry=1)
>>>>>>       at ../hw/net/vhost_net.c:423
>>>>>> 18 0x000055f800d4e628 in virtio_net_set_status (status=<optimized 
>>>>>> out>,
>>>>>> n=0x55f8044ec590) at ../hw/net/virtio-net.c:296
>>>>>> 19 0x000055f800d4e628 in virtio_net_set_status (vdev=0x55f8044ec590,
>>>>>> status=15 '\017') at ../hw/net/virtio-net.c:370
>>>>>> 20 0x000055f800d6c4b2 in virtio_set_status (vdev=0x55f8044ec590,
>>>>>> val=<optimized out>) at ../hw/virtio/virtio.c:1945
>>>>>> 21 0x000055f800d11d9d in vm_state_notify 
>>>>>> (running=running@entry=false,
>>>>>> state=state@entry=RUN_STATE_SHUTDOWN) at ../softmmu/runstate.c:333
>>>>>> 22 0x000055f800d04e7a in do_vm_stop
>>>>>> (state=state@entry=RUN_STATE_SHUTDOWN, 
>>>>>> send_stop=send_stop@entry=false)
>>>>>> at ../softmmu/cpus.c:262
>>>>>> 23 0x000055f800d04e99 in vm_shutdown () at ../softmmu/cpus.c:280
>>>>>> 24 0x000055f800d126af in qemu_cleanup () at 
>>>>>> ../softmmu/runstate.c:812
>>>>>> 25 0x000055f800ad5b13 in main (argc=<optimized out>, argv=<optimized
>>>>>> out>, envp=<optimized out>) at ../softmmu/main.c:51
>>>>>>
>>>>>>    From the trace pending read only occurs in stop path. The 
>>>>>> recursive
>>>>>> virtio_net_set_status from virtio_net_handle_ctrl doesn't make 
>>>>>> sense to me.
>>>>> Yes, we need to figure this out to know the root cause.
>>>> I think it has something to do with the virtqueue unready issue 
>>>> that the
>>>> vhost_reset_device() refactoring series attempt to fix. If that is 
>>>> fixed
>>>> we should not see this sigsegv with mlx5_vdpa. However I guess we both
>>>> agreed that the vq_unready support would need new uAPI (some flag) to
>>>> define, hence this fix applies to the situation where uAPI doesn't 
>>>> exist
>>>> on the kernel or the vq_unready is not supported by vdpa vendor 
>>>> driver.
>>>>
>>> Yes.
>>>
>>>>> The code should work for the case when vhost-vdp fails to start.
>>>> Unlike the other datapath queues for net vdpa, the events left 
>>>> behind in
>>>> the control queue can't be processed by the userspace, as unlike
>>>> vhost-kernel we don't have a fallback path in the userspace.
>>> So that's the question, we should have a safe fallback.
>>>
>>>> To ignore
>>>> the pending event and let vhost vdpa process it on resume/start is
>>>> perhaps the best thing to do. This is even true for datapath queues 
>>>> for
>>>> other vdpa devices than of network.
>>>>
>>>>>> Not sure I got the reason why we need to handle pending host
>>>>>> notification in userland vq, can you elaborate?
>>>>> Because vhost-vDPA fails to start, we will "fallback" to a dummy 
>>>>> userspace.
>>>> Is the dummy userspace working or operational? What's the use case of
>>>> this "fallback" dummy if what guest user can get is a busted NIC?
>>> The problem is we can't do better in this case now. Such fallack (e.g
>>> for vhost-user) has been used for years. Or do you have any better
>>> ideas?
>> In my opinion if vhost-vdpa or vhost-user fails to start, maybe we 
>> should try to disable the device via virtio_error(), which would set 
>> broken to true and set NEEDS_RESET in case of VERSION_1. That way the 
>> device won't move forward further and the guest may get the 
>> indication via config interrupt that something had gone wrong 
>> underneath. If device reset is well supported there the guest driver 
>> would retry.
>
>
> Note that the NEEDS_RESET is not implemented in the current Linux 
> drivers.
Yes, I am aware of that. I think the point to set NEEDS_RESET is to stop 
the device from moving forward, as when it comes to start failure, the 
vhost backend is already bogged down or in a bogus state unable to move 
further. And it's the standardized way to explicitly inform guest of 
failure on the device side, although the corresponding NEEDS_RESET 
handling hadn't been implemented in any Linux driver yet. Of coz 
alternatively, guest can figure it out itself implicitly via watchdog 
timer, as you indicated below.

>
>
>> This can at least give the backend some chance to recover if running 
>> into intermittent error. The worst result would be the device keeps 
>> resetting repeatedly, for which we may introduce tunable to control 
>> the rate if seeing reset occurs too often.. Did this ever get 
>> considered before?
>
>
> I don't know, but we manage to survive with such fallback for years. 
I wonder how vhost-user client may restart in this case i.e. when 
running into transient backend failure. Haven't yet checked the code, do 
you mean there's never error recovery (or error detection at least) 
implemented in the vhost-user client for e.g. DPDK? Or it just tries to 
reconnect if the socket connection gets closed, but never cares about 
any vhost-user backend start failure?

> We can do this, but can virtio_error() fix the issue you describe here?
It doesn't fix the sigsegv issue for certain. Actually the issue I ran 
into has little to do with error handling, but thinking with the 
assumption of virtio_error() in the start error path we can just live 
without falling back to the dummy userspace or handling any request (as 
all vqs are effectively stopped/disabled). Which is exactly consistent 
with the handling in the stop (success) path: ignore pending event on 
the host notifier. In other word, it doesn't necessarily have to assume 
the existence of dummy userspace fallback, which IMHO does nothing more 
compared to marking NEEDS_RESET with virtio_error(). While on the 
contrary, if there's ever a good use case for the dummy userspace (which 
I might not be aware), I thought the fallback to userspace emulation 
would be even needed for the stop path. But I doubted the need for 
adding such complex code without seeing a convincing case.

>
>
>>
>> Noted that the dummy userspace can't handle any control vq command 
>> effectively once the vhost backend fails, for e.g. how does it handle 
>> those guest offload, rx mode, MAC or VLAN filter changes without 
>> sending the request down to the backend? 
>
>
> It should be no difference compared to the real hardware. The device 
> is just malfunction. The driver can detect this in many ways. E.g in 
> the past I suggest to implement the device watchdog for virtio-net, 
> the prototype is posted but for some reason it was stalled. Maybe we 
> can consider to continue the work.
Would you mind pointing me to the thread? What was the blocker then?

I feel it might be nice to consider NEEDS_RESET handling for guest 
drivers as it is more relevant here.

>
>
>> This could easily get inconsistent state to the guest if somehow we 
>> are able to resume the virtqueue without a reset. Even so, I suspect 
>> the device reset eventually is still needed on the other part, which 
>> is subject to the specific failure. It looks to me at least for 
>> vhost-vdpa, it might be the safest fallback so far to ignore pending 
>> event in ctrl_vq, and disable the device from moving forward in case 
>> of backend start failure.
>
>
> I don't get here, if we fail to start vhost-vdpa, the Qemu should do a 
> safe rewind otherwise it would be a bug.
In the ideal world, yes QEMU should back off to where it was. However, I 
worried that not all of the operations has a corresponding undo op 
symmetrically, for e.g. there's no unready op for vq_ready(), 
reset_owner() contains device reset internally to undo what set_owner() 
effects. It would be easier to just reset as a safe fallback in this case.

>
>
>>
>>>
>>> It doesn't differ too much of the two approaches:
>>>
>>> 1) dummy fallback which can do even cvq
>>>
>>> and
>>>
>>> 2) disable host notifiers
>>>
>>> Especially consider 2) requires more changes.
>> I'm not clear if 2) really needs more changes, as it seems to me that 
>> it would take more unwarranted changes to make dummy fallback to work 
>> on cvq? And suppose we can fallback to disabling device via 
>> virtio_error(), we don't even need to change any code on cvq?
>
>
> So let me explain my points:
>
> 1) we use dummy receive as a fallback as vhost-user
>
> 2) the code should safely fallback to that otherwise it could be a bug 
> elsewhere
>
> 3) if we think the dummy fallback doesn't make sense, we can improve, 
> but we need to figure out why we can crash for 2) since the code could 
> be used in other path.
I think we may either ignore pending request left behind on the vhost 
host notifier or even flush the queue in the stop path, since when 
reaching to this point all of the data vqs have been effectively stopped 
via vhost_dev_stop() and vhost_dev_disable_notifiers(). It looks that's 
what the dummy fallback did on the data vqs, too? i.e. receive_disabled 
is set until queues for the dummy backend are eventually flushed when 
device is fully stopped.

What "could be used in other path" is the key question to answer in my 
opinion. Without knowing the (potential) use cases, it'd be hard to 
imagine what level of emulation needs to be done. I hope we don't have 
to involve complex code change to emulate changing the no. of queues 
when it's known all the heavy lifting done earlier will be effectively 
destroyed with a follow-up reset in the stop path.

As said, I'm fine with not touching the dummy fallback part, but at 
least we should figure out a simple way to fix the vhost-vdpa control vq 
issue.

>
>
>>
>> On the other hand, for the specific code path this patch tries to 
>> fix, it is not due to failure to start vhost-vdpa backend, but more 
>> of a control flow flaw in the stop path due to lack of VQ stop uAPI. 
>> Let alone dummy or host notifier, considering currently it's in the 
>> stop path followed by a reset, I feel it should be pretty safe to 
>> just ignore the pending event on the control vq rather than process 
>> it prematurely in userspace. What do you think? I can leave without 
>> the host notifier handler change for sure.
>
>
> I wonder how vhost-user deal with this.
vhost-user doesn't expose host notifier for control vq. This path is not 
even involved. All requests on the control vq are handled by the 
emulated virtio_net_handle_ctrl handler in the QEMU process.

>
>
>>
>>>
>>>> I
>>>> think this is very different from the vhost-kernel case in that once
>>>> vhost fails, we can fallback to userspace to emulate the network 
>>>> through
>>>> the tap fd in a good way. However, there's no equivalent yet for
>>>> vhost-vdpa...
>>>>
>>> As said previously, technically we can have vhost-vDPA network backend
>>> as a fallback.
>> But this is working as yet. And how do you envision the datapath may 
>> work given that we don't have a fallback tap fd?
>
>
> I mean we can treat vhost-vdpa as a kind of general networking backend 
> that could be used by all NIC model like e1000e. Then we can use that 
> as a fallback.
>
> But I'm not sure it's worth to bother.
Well, perhaps that's another story. I think to support that it would 
need more code refactoring than just the ioeventfd handler change alone...

Thanks,
-Siwei

>
> Thanks
>
>
>>
>> -Siwei
>>
>>
>>>   (So did for vhost-user).
>>>
>>> Thanks
>>>
>>>> Thanks,
>>>> -Siwei
>>>>
>>>>> Thanks
>>>>>
>>>>>> Thanks,
>>>>>> -Siwei
>>>>>>
>>>>>>> Thanks
>>>>>>>
>>>>>>>> 7  0x0000558f52c6b89a in virtio_pci_set_guest_notifier 
>>>>>>>> (d=d@entry=0x558f568f0f60, n=n@entry=2, 
>>>>>>>> assign=assign@entry=true, with_irqfd=with_irqfd@entry=false)
>>>>>>>>       at ../hw/virtio/virtio-pci.c:974
>>>>>>>> 8  0x0000558f52c6c0d8 in virtio_pci_set_guest_notifiers 
>>>>>>>> (d=0x558f568f0f60, nvqs=3, assign=true) at 
>>>>>>>> ../hw/virtio/virtio-pci.c:1019
>>>>>>>> 9  0x0000558f52bf091d in vhost_net_start 
>>>>>>>> (dev=dev@entry=0x558f568f91f0, ncs=0x558f56937cd0, 
>>>>>>>> data_queue_pairs=data_queue_pairs@entry=1, cvq=cvq@entry=1)
>>>>>>>>       at ../hw/net/vhost_net.c:361
>>>>>>>> 10 0x0000558f52d4e5e7 in virtio_net_set_status 
>>>>>>>> (status=<optimized out>, n=0x558f568f91f0) at 
>>>>>>>> ../hw/net/virtio-net.c:289
>>>>>>>> 11 0x0000558f52d4e5e7 in virtio_net_set_status 
>>>>>>>> (vdev=0x558f568f91f0, status=15 '\017') at 
>>>>>>>> ../hw/net/virtio-net.c:370
>>>>>>>> 12 0x0000558f52d6c4b2 in virtio_set_status 
>>>>>>>> (vdev=vdev@entry=0x558f568f91f0, val=val@entry=15 '\017') at 
>>>>>>>> ../hw/virtio/virtio.c:1945
>>>>>>>> 13 0x0000558f52c69eff in virtio_pci_common_write 
>>>>>>>> (opaque=0x558f568f0f60, addr=<optimized out>, val=<optimized 
>>>>>>>> out>, size=<optimized out>) at ../hw/virtio/virtio-pci.c:1292
>>>>>>>> 14 0x0000558f52d15d6e in memory_region_write_accessor 
>>>>>>>> (mr=0x558f568f19d0, addr=20, value=<optimized out>, size=1, 
>>>>>>>> shift=<optimized out>, mask=<optimized out>, attrs=...)
>>>>>>>>       at ../softmmu/memory.c:492
>>>>>>>> 15 0x0000558f52d127de in access_with_adjusted_size 
>>>>>>>> (addr=addr@entry=20, value=value@entry=0x7f8cdbffe748, 
>>>>>>>> size=size@entry=1, access_size_min=<optimized out>, 
>>>>>>>> access_size_max=<optimized out>, access_fn=0x558f52d15cf0 
>>>>>>>> <memory_region_write_accessor>, mr=0x558f568f19d0, attrs=...) 
>>>>>>>> at ../softmmu/memory.c:554
>>>>>>>> 16 0x0000558f52d157ef in memory_region_dispatch_write 
>>>>>>>> (mr=mr@entry=0x558f568f19d0, addr=20, data=<optimized out>, 
>>>>>>>> op=<optimized out>, attrs=attrs@entry=...)
>>>>>>>>       at ../softmmu/memory.c:1504
>>>>>>>> 17 0x0000558f52d078e7 in flatview_write_continue 
>>>>>>>> (fv=fv@entry=0x7f8accbc3b90, addr=addr@entry=103079215124, 
>>>>>>>> attrs=..., ptr=ptr@entry=0x7f8ce6300028, len=len@entry=1, 
>>>>>>>> addr1=<optimized out>, l=<optimized out>, mr=0x558f568f19d0) at 
>>>>>>>> ../../../include/qemu/host-utils.h:165
>>>>>>>> 18 0x0000558f52d07b06 in flatview_write (fv=0x7f8accbc3b90, 
>>>>>>>> addr=103079215124, attrs=..., buf=0x7f8ce6300028, len=1) at 
>>>>>>>> ../softmmu/physmem.c:2822
>>>>>>>> 19 0x0000558f52d0b36b in address_space_write (as=<optimized 
>>>>>>>> out>, addr=<optimized out>, attrs=..., 
>>>>>>>> buf=buf@entry=0x7f8ce6300028, len=<optimized out>)
>>>>>>>>       at ../softmmu/physmem.c:2914
>>>>>>>> 20 0x0000558f52d0b3da in address_space_rw (as=<optimized out>, 
>>>>>>>> addr=<optimized out>, attrs=...,
>>>>>>>>       attrs@entry=..., buf=buf@entry=0x7f8ce6300028, 
>>>>>>>> len=<optimized out>, is_write=<optimized out>) at 
>>>>>>>> ../softmmu/physmem.c:2924
>>>>>>>> 21 0x0000558f52dced09 in kvm_cpu_exec 
>>>>>>>> (cpu=cpu@entry=0x558f55c2da60) at ../accel/kvm/kvm-all.c:2903
>>>>>>>> 22 0x0000558f52dcfabd in kvm_vcpu_thread_fn 
>>>>>>>> (arg=arg@entry=0x558f55c2da60) at ../accel/kvm/kvm-accel-ops.c:49
>>>>>>>> 23 0x0000558f52f9f04a in qemu_thread_start (args=<optimized 
>>>>>>>> out>) at ../util/qemu-thread-posix.c:556
>>>>>>>> 24 0x00007f8ce4392ea5 in start_thread () at /lib64/libpthread.so.0
>>>>>>>> 25 0x00007f8ce40bb9fd in clone () at /lib64/libc.so.6
>>>>>>>>
>>>>>>>> Fixes: 4023784 ("vhost-vdpa: multiqueue support")
>>>>>>>> Cc: Jason Wang <jasowang@redhat.com>
>>>>>>>> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
>>>>>>>> ---
>>>>>>>>     hw/virtio/virtio-bus.c | 3 ++-
>>>>>>>>     1 file changed, 2 insertions(+), 1 deletion(-)
>>>>>>>>
>>>>>>>> diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
>>>>>>>> index 0f69d1c..3159b58 100644
>>>>>>>> --- a/hw/virtio/virtio-bus.c
>>>>>>>> +++ b/hw/virtio/virtio-bus.c
>>>>>>>> @@ -311,7 +311,8 @@ void 
>>>>>>>> virtio_bus_cleanup_host_notifier(VirtioBusState *bus, int n)
>>>>>>>>         /* Test and clear notifier after disabling event,
>>>>>>>>          * in case poll callback didn't have time to run.
>>>>>>>>          */
>>>>>>>> -    virtio_queue_host_notifier_read(notifier);
>>>>>>>> +    if (!vdev->disable_ioeventfd_handler)
>>>>>>>> +        virtio_queue_host_notifier_read(notifier);
>>>>>>>>         event_notifier_cleanup(notifier);
>>>>>>>>     }
>>>>>>>>
>>>>>>>> -- 
>>>>>>>> 1.8.3.1
>>>>>>>>
>>
>
Jason Wang April 11, 2022, 8:49 a.m. UTC | #9
On Fri, Apr 8, 2022 at 9:02 AM Si-Wei Liu <si-wei.liu@oracle.com> wrote:
>
>
>
> On 4/7/2022 12:05 AM, Jason Wang wrote:
> >
> > 在 2022/4/6 上午3:18, Si-Wei Liu 写道:
> >>
> >>
> >> On 4/1/2022 7:00 PM, Jason Wang wrote:
> >>> On Sat, Apr 2, 2022 at 4:37 AM Si-Wei Liu <si-wei.liu@oracle.com>
> >>> wrote:
> >>>>
> >>>>
> >>>> On 3/31/2022 1:36 AM, Jason Wang wrote:
> >>>>> On Thu, Mar 31, 2022 at 12:41 AM Si-Wei Liu
> >>>>> <si-wei.liu@oracle.com> wrote:
> >>>>>>
> >>>>>> On 3/30/2022 2:14 AM, Jason Wang wrote:
> >>>>>>> On Wed, Mar 30, 2022 at 2:33 PM Si-Wei Liu
> >>>>>>> <si-wei.liu@oracle.com> wrote:
> >>>>>>>> Previous commit prevents vhost-user and vhost-vdpa from using
> >>>>>>>> userland vq handler via disable_ioeventfd_handler. The same
> >>>>>>>> needs to be done for host notifier cleanup too, as the
> >>>>>>>> virtio_queue_host_notifier_read handler still tends to read
> >>>>>>>> pending event left behind on ioeventfd and attempts to handle
> >>>>>>>> outstanding kicks from QEMU userland vq.
> >>>>>>>>
> >>>>>>>> If vq handler is not disabled on cleanup, it may lead to sigsegv
> >>>>>>>> with recursive virtio_net_set_status call on the control vq:
> >>>>>>>>
> >>>>>>>> 0  0x00007f8ce3ff3387 in raise () at /lib64/libc.so.6
> >>>>>>>> 1  0x00007f8ce3ff4a78 in abort () at /lib64/libc.so.6
> >>>>>>>> 2  0x00007f8ce3fec1a6 in __assert_fail_base () at /lib64/libc.so.6
> >>>>>>>> 3  0x00007f8ce3fec252 in  () at /lib64/libc.so.6
> >>>>>>>> 4  0x0000558f52d79421 in vhost_vdpa_get_vq_index
> >>>>>>>> (dev=<optimized out>, idx=<optimized out>) at
> >>>>>>>> ../hw/virtio/vhost-vdpa.c:563
> >>>>>>>> 5  0x0000558f52d79421 in vhost_vdpa_get_vq_index
> >>>>>>>> (dev=<optimized out>, idx=<optimized out>) at
> >>>>>>>> ../hw/virtio/vhost-vdpa.c:558
> >>>>>>>> 6  0x0000558f52d7329a in vhost_virtqueue_mask
> >>>>>>>> (hdev=0x558f55c01800, vdev=0x558f568f91f0, n=2, mask=<optimized
> >>>>>>>> out>) at ../hw/virtio/vhost.c:1557
> >>>>>>> I feel it's probably a bug elsewhere e.g when we fail to start
> >>>>>>> vhost-vDPA, it's the charge of the Qemu to poll host notifier
> >>>>>>> and we
> >>>>>>> will fallback to the userspace vq handler.
> >>>>>> Apologies, an incorrect stack trace was pasted which actually
> >>>>>> came from
> >>>>>> patch #1. I will post a v2 with the corresponding one as below:
> >>>>>>
> >>>>>> 0  0x000055f800df1780 in qdev_get_parent_bus (dev=0x0) at
> >>>>>> ../hw/core/qdev.c:376
> >>>>>> 1  0x000055f800c68ad8 in virtio_bus_device_iommu_enabled
> >>>>>> (vdev=vdev@entry=0x0) at ../hw/virtio/virtio-bus.c:331
> >>>>>> 2  0x000055f800d70d7f in vhost_memory_unmap (dev=<optimized out>) at
> >>>>>> ../hw/virtio/vhost.c:318
> >>>>>> 3  0x000055f800d70d7f in vhost_memory_unmap (dev=<optimized out>,
> >>>>>> buffer=0x7fc19bec5240, len=2052, is_write=1, access_len=2052) at
> >>>>>> ../hw/virtio/vhost.c:336
> >>>>>> 4  0x000055f800d71867 in vhost_virtqueue_stop
> >>>>>> (dev=dev@entry=0x55f8037ccc30, vdev=vdev@entry=0x55f8044ec590,
> >>>>>> vq=0x55f8037cceb0, idx=0) at ../hw/virtio/vhost.c:1241
> >>>>>> 5  0x000055f800d7406c in vhost_dev_stop
> >>>>>> (hdev=hdev@entry=0x55f8037ccc30,
> >>>>>> vdev=vdev@entry=0x55f8044ec590) at ../hw/virtio/vhost.c:1839
> >>>>>> 6  0x000055f800bf00a7 in vhost_net_stop_one (net=0x55f8037ccc30,
> >>>>>> dev=0x55f8044ec590) at ../hw/net/vhost_net.c:315
> >>>>>> 7  0x000055f800bf0678 in vhost_net_stop
> >>>>>> (dev=dev@entry=0x55f8044ec590,
> >>>>>> ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7,
> >>>>>> cvq=cvq@entry=1)
> >>>>>>       at ../hw/net/vhost_net.c:423
> >>>>>> 8  0x000055f800d4e628 in virtio_net_set_status (status=<optimized
> >>>>>> out>,
> >>>>>> n=0x55f8044ec590) at ../hw/net/virtio-net.c:296
> >>>>>> 9  0x000055f800d4e628 in virtio_net_set_status
> >>>>>> (vdev=vdev@entry=0x55f8044ec590, status=15 '\017') at
> >>>>>> ../hw/net/virtio-net.c:370
> >>>>> I don't understand why virtio_net_handle_ctrl() call
> >>>>> virtio_net_set_stauts()...
> >>>> The pending request left over on the ctrl vq was a VIRTIO_NET_CTRL_MQ
> >>>> command, i.e. in virtio_net_handle_mq():
> >>> Completely forget that the code was actually written by me :\
> >>>
> >>>> 1413     n->curr_queue_pairs = queue_pairs;
> >>>> 1414     /* stop the backend before changing the number of queue_pairs
> >>>> to avoid handling a
> >>>> 1415      * disabled queue */
> >>>> 1416     virtio_net_set_status(vdev, vdev->status);
> >>>> 1417     virtio_net_set_queue_pairs(n);
> >>>>
> >>>> Noted before the vdpa multiqueue support, there was never a vhost_dev
> >>>> for ctrl_vq exposed, i.e. there's no host notifier set up for the
> >>>> ctrl_vq on vhost_kernel as it is emulated in QEMU software.
> >>>>
> >>>>>> 10 0x000055f800d534d8 in virtio_net_handle_ctrl (iov_cnt=<optimized
> >>>>>> out>, iov=<optimized out>, cmd=0 '\000', n=0x55f8044ec590) at
> >>>>>> ../hw/net/virtio-net.c:1408
> >>>>>> 11 0x000055f800d534d8 in virtio_net_handle_ctrl
> >>>>>> (vdev=0x55f8044ec590,
> >>>>>> vq=0x7fc1a7e888d0) at ../hw/net/virtio-net.c:1452
> >>>>>> 12 0x000055f800d69f37 in virtio_queue_host_notifier_read
> >>>>>> (vq=0x7fc1a7e888d0) at ../hw/virtio/virtio.c:2331
> >>>>>> 13 0x000055f800d69f37 in virtio_queue_host_notifier_read
> >>>>>> (n=n@entry=0x7fc1a7e8894c) at ../hw/virtio/virtio.c:3575
> >>>>>> 14 0x000055f800c688e6 in virtio_bus_cleanup_host_notifier
> >>>>>> (bus=<optimized out>, n=n@entry=14) at ../hw/virtio/virtio-bus.c:312
> >>>>>> 15 0x000055f800d73106 in vhost_dev_disable_notifiers
> >>>>>> (hdev=hdev@entry=0x55f8035b51b0, vdev=vdev@entry=0x55f8044ec590)
> >>>>>>       at ../../../include/hw/virtio/virtio-bus.h:35
> >>>>>> 16 0x000055f800bf00b2 in vhost_net_stop_one (net=0x55f8035b51b0,
> >>>>>> dev=0x55f8044ec590) at ../hw/net/vhost_net.c:316
> >>>>>> 17 0x000055f800bf0678 in vhost_net_stop
> >>>>>> (dev=dev@entry=0x55f8044ec590,
> >>>>>> ncs=0x55f80452bae0, data_queue_pairs=data_queue_pairs@entry=7,
> >>>>>> cvq=cvq@entry=1)
> >>>>>>       at ../hw/net/vhost_net.c:423
> >>>>>> 18 0x000055f800d4e628 in virtio_net_set_status (status=<optimized
> >>>>>> out>,
> >>>>>> n=0x55f8044ec590) at ../hw/net/virtio-net.c:296
> >>>>>> 19 0x000055f800d4e628 in virtio_net_set_status (vdev=0x55f8044ec590,
> >>>>>> status=15 '\017') at ../hw/net/virtio-net.c:370
> >>>>>> 20 0x000055f800d6c4b2 in virtio_set_status (vdev=0x55f8044ec590,
> >>>>>> val=<optimized out>) at ../hw/virtio/virtio.c:1945
> >>>>>> 21 0x000055f800d11d9d in vm_state_notify
> >>>>>> (running=running@entry=false,
> >>>>>> state=state@entry=RUN_STATE_SHUTDOWN) at ../softmmu/runstate.c:333
> >>>>>> 22 0x000055f800d04e7a in do_vm_stop
> >>>>>> (state=state@entry=RUN_STATE_SHUTDOWN,
> >>>>>> send_stop=send_stop@entry=false)
> >>>>>> at ../softmmu/cpus.c:262
> >>>>>> 23 0x000055f800d04e99 in vm_shutdown () at ../softmmu/cpus.c:280
> >>>>>> 24 0x000055f800d126af in qemu_cleanup () at
> >>>>>> ../softmmu/runstate.c:812
> >>>>>> 25 0x000055f800ad5b13 in main (argc=<optimized out>, argv=<optimized
> >>>>>> out>, envp=<optimized out>) at ../softmmu/main.c:51
> >>>>>>
> >>>>>>    From the trace pending read only occurs in stop path. The
> >>>>>> recursive
> >>>>>> virtio_net_set_status from virtio_net_handle_ctrl doesn't make
> >>>>>> sense to me.
> >>>>> Yes, we need to figure this out to know the root cause.
> >>>> I think it has something to do with the virtqueue unready issue
> >>>> that the
> >>>> vhost_reset_device() refactoring series attempt to fix. If that is
> >>>> fixed
> >>>> we should not see this sigsegv with mlx5_vdpa. However I guess we both
> >>>> agreed that the vq_unready support would need new uAPI (some flag) to
> >>>> define, hence this fix applies to the situation where uAPI doesn't
> >>>> exist
> >>>> on the kernel or the vq_unready is not supported by vdpa vendor
> >>>> driver.
> >>>>
> >>> Yes.
> >>>
> >>>>> The code should work for the case when vhost-vdp fails to start.
> >>>> Unlike the other datapath queues for net vdpa, the events left
> >>>> behind in
> >>>> the control queue can't be processed by the userspace, as unlike
> >>>> vhost-kernel we don't have a fallback path in the userspace.
> >>> So that's the question, we should have a safe fallback.
> >>>
> >>>> To ignore
> >>>> the pending event and let vhost vdpa process it on resume/start is
> >>>> perhaps the best thing to do. This is even true for datapath queues
> >>>> for
> >>>> other vdpa devices than of network.
> >>>>
> >>>>>> Not sure I got the reason why we need to handle pending host
> >>>>>> notification in userland vq, can you elaborate?
> >>>>> Because vhost-vDPA fails to start, we will "fallback" to a dummy
> >>>>> userspace.
> >>>> Is the dummy userspace working or operational? What's the use case of
> >>>> this "fallback" dummy if what guest user can get is a busted NIC?
> >>> The problem is we can't do better in this case now. Such fallack (e.g
> >>> for vhost-user) has been used for years. Or do you have any better
> >>> ideas?
> >> In my opinion if vhost-vdpa or vhost-user fails to start, maybe we
> >> should try to disable the device via virtio_error(), which would set
> >> broken to true and set NEEDS_RESET in case of VERSION_1. That way the
> >> device won't move forward further and the guest may get the
> >> indication via config interrupt that something had gone wrong
> >> underneath. If device reset is well supported there the guest driver
> >> would retry.
> >
> >
> > Note that the NEEDS_RESET is not implemented in the current Linux
> > drivers.
> Yes, I am aware of that. I think the point to set NEEDS_RESET is to stop
> the device from moving forward, as when it comes to start failure, the
> vhost backend is already bogged down or in a bogus state unable to move
> further. And it's the standardized way to explicitly inform guest of
> failure on the device side, although the corresponding NEEDS_RESET
> handling hadn't been implemented in any Linux driver yet. Of coz
> alternatively, guest can figure it out itself implicitly via watchdog
> timer, as you indicated below.

Right, but I think the guest stuffs is something that is nice to have
but not a must since we don't trust the device always.

>
> >
> >
> >> This can at least give the backend some chance to recover if running
> >> into intermittent error. The worst result would be the device keeps
> >> resetting repeatedly, for which we may introduce tunable to control
> >> the rate if seeing reset occurs too often.. Did this ever get
> >> considered before?
> >
> >
> > I don't know, but we manage to survive with such fallback for years.
> I wonder how vhost-user client may restart in this case i.e. when
> running into transient backend failure. Haven't yet checked the code, do
> you mean there's never error recovery (or error detection at least)
> implemented in the vhost-user client for e.g. DPDK? Or it just tries to
> reconnect if the socket connection gets closed, but never cares about
> any vhost-user backend start failure?

I think we may just get "fallback to userspace" everytime we want to fallback.

>
> > We can do this, but can virtio_error() fix the issue you describe here?
> It doesn't fix the sigsegv issue for certain. Actually the issue I ran
> into has little to do with error handling, but thinking with the
> assumption of virtio_error() in the start error path we can just live
> without falling back to the dummy userspace or handling any request (as
> all vqs are effectively stopped/disabled). Which is exactly consistent
> with the handling in the stop (success) path: ignore pending event on
> the host notifier. In other word, it doesn't necessarily have to assume
> the existence of dummy userspace fallback, which IMHO does nothing more
> compared to marking NEEDS_RESET with virtio_error(). While on the
> contrary, if there's ever a good use case for the dummy userspace (which
> I might not be aware), I thought the fallback to userspace emulation
> would be even needed for the stop path. But I doubted the need for
> adding such complex code without seeing a convincing case.

Ok, I get you. I think virtio_erorr() could be done in another series.
We can fix the issue crash first and then do virtio_error().

>
> >
> >
> >>
> >> Noted that the dummy userspace can't handle any control vq command
> >> effectively once the vhost backend fails, for e.g. how does it handle
> >> those guest offload, rx mode, MAC or VLAN filter changes without
> >> sending the request down to the backend?
> >
> >
> > It should be no difference compared to the real hardware. The device
> > is just malfunction. The driver can detect this in many ways. E.g in
> > the past I suggest to implement the device watchdog for virtio-net,
> > the prototype is posted but for some reason it was stalled. Maybe we
> > can consider to continue the work.
> Would you mind pointing me to the thread? What was the blocker then?

Something like

https://lore.kernel.org/netdev/20191122013636.1041-1-jcfaracco@gmail.com/

I think it doesn't have any blocker, it's probably because nobody
tries to keep working on that. And a drawback is that it's only used
for the net and only for TX. To have a more general detection of the
buggy device, we need a lot of work done in other places. E.g to
detect a stall in virtio_cread():

https://patchwork.kernel.org/project/qemu-devel/patch/20190611172032.19143-1-lvivier@redhat.com/

>
> I feel it might be nice to consider NEEDS_RESET handling for guest
> drivers as it is more relevant here.

Yes, but notice that they're a little bit different:

1) NEEDS_RESET, the device knows something is wrong and it can't be
recovered without a reset
2) watchdog, the device don't know there a bug inside itself

2) looks more robust at first glance.

>
> >
> >
> >> This could easily get inconsistent state to the guest if somehow we
> >> are able to resume the virtqueue without a reset. Even so, I suspect
> >> the device reset eventually is still needed on the other part, which
> >> is subject to the specific failure. It looks to me at least for
> >> vhost-vdpa, it might be the safest fallback so far to ignore pending
> >> event in ctrl_vq, and disable the device from moving forward in case
> >> of backend start failure.
> >
> >
> > I don't get here, if we fail to start vhost-vdpa, the Qemu should do a
> > safe rewind otherwise it would be a bug.
> In the ideal world, yes QEMU should back off to where it was. However, I
> worried that not all of the operations has a corresponding undo op
> symmetrically, for e.g. there's no unready op for vq_ready(),
> reset_owner() contains device reset internally to undo what set_owner()
> effects.

Note that as we discussed in another thread, the reset_owner() is
really confusing and may lead to a buggy device.

Actually, I don't see any real issues caused by the above rewind you
mentioned if we fallback to a device that doesn't send and receive
anything.

> It would be easier to just reset as a safe fallback in this case.

The problem is that we should have a way to work for the old guest. A
reset here may break existing virtio drivers since it was noticed by
the guest.

>
> >
> >
> >>
> >>>
> >>> It doesn't differ too much of the two approaches:
> >>>
> >>> 1) dummy fallback which can do even cvq
> >>>
> >>> and
> >>>
> >>> 2) disable host notifiers
> >>>
> >>> Especially consider 2) requires more changes.
> >> I'm not clear if 2) really needs more changes, as it seems to me that
> >> it would take more unwarranted changes to make dummy fallback to work
> >> on cvq? And suppose we can fallback to disabling device via
> >> virtio_error(), we don't even need to change any code on cvq?
> >
> >
> > So let me explain my points:
> >
> > 1) we use dummy receive as a fallback as vhost-user
> >
> > 2) the code should safely fallback to that otherwise it could be a bug
> > elsewhere
> >
> > 3) if we think the dummy fallback doesn't make sense, we can improve,
> > but we need to figure out why we can crash for 2) since the code could
> > be used in other path.
> I think we may either ignore pending request left behind on the vhost
> host notifier or even flush the queue in the stop path, since when
> reaching to this point all of the data vqs have been effectively stopped
> via vhost_dev_stop() and vhost_dev_disable_notifiers(). It looks that's
> what the dummy fallback did on the data vqs, too? i.e. receive_disabled
> is set until queues for the dummy backend are eventually flushed when
> device is fully stopped.
>
> What "could be used in other path" is the key question to answer in my
> opinion. Without knowing the (potential) use cases, it'd be hard to
> imagine what level of emulation needs to be done. I hope we don't have
> to involve complex code change to emulate changing the no. of queues
> when it's known all the heavy lifting done earlier will be effectively
> destroyed with a follow-up reset in the stop path.

That's my feeling as well.

>
> As said, I'm fine with not touching the dummy fallback part, but at
> least we should figure out a simple way to fix the vhost-vdpa control vq
> issue.

Right, that's my point, we can do any other things on top (since we
need a fix for -stable which should not be instructive). For the case
of control vq, I think we should make it as simple as falling back to
let Qemu to poll the ioeventfd, then it can safely fallback to the
userspace virtio_net_handle_ctrl()?

>
> >
> >
> >>
> >> On the other hand, for the specific code path this patch tries to
> >> fix, it is not due to failure to start vhost-vdpa backend, but more
> >> of a control flow flaw in the stop path due to lack of VQ stop uAPI.
> >> Let alone dummy or host notifier, considering currently it's in the
> >> stop path followed by a reset, I feel it should be pretty safe to
> >> just ignore the pending event on the control vq rather than process
> >> it prematurely in userspace. What do you think? I can leave without
> >> the host notifier handler change for sure.
> >
> >
> > I wonder how vhost-user deal with this.
> vhost-user doesn't expose host notifier for control vq. This path is not
> even involved. All requests on the control vq are handled by the
> emulated virtio_net_handle_ctrl handler in the QEMU process.

Right, but what I meant is that cvq should not differ from data vq in
this case. (Letting qemu to poll the ioeventfd and use
virtio_net_handle_cvq()).

>
> >
> >
> >>
> >>>
> >>>> I
> >>>> think this is very different from the vhost-kernel case in that once
> >>>> vhost fails, we can fallback to userspace to emulate the network
> >>>> through
> >>>> the tap fd in a good way. However, there's no equivalent yet for
> >>>> vhost-vdpa...
> >>>>
> >>> As said previously, technically we can have vhost-vDPA network backend
> >>> as a fallback.
> >> But this is working as yet. And how do you envision the datapath may
> >> work given that we don't have a fallback tap fd?
> >
> >
> > I mean we can treat vhost-vdpa as a kind of general networking backend
> > that could be used by all NIC model like e1000e. Then we can use that
> > as a fallback.
> >
> > But I'm not sure it's worth to bother.
> Well, perhaps that's another story. I think to support that it would
> need more code refactoring than just the ioeventfd handler change alone...

Right.

Thanks

>
> Thanks,
> -Siwei
>
> >
> > Thanks
> >
> >
> >>
> >> -Siwei
> >>
> >>
> >>>   (So did for vhost-user).
> >>>
> >>> Thanks
> >>>
> >>>> Thanks,
> >>>> -Siwei
> >>>>
> >>>>> Thanks
> >>>>>
> >>>>>> Thanks,
> >>>>>> -Siwei
> >>>>>>
> >>>>>>> Thanks
> >>>>>>>
> >>>>>>>> 7  0x0000558f52c6b89a in virtio_pci_set_guest_notifier
> >>>>>>>> (d=d@entry=0x558f568f0f60, n=n@entry=2,
> >>>>>>>> assign=assign@entry=true, with_irqfd=with_irqfd@entry=false)
> >>>>>>>>       at ../hw/virtio/virtio-pci.c:974
> >>>>>>>> 8  0x0000558f52c6c0d8 in virtio_pci_set_guest_notifiers
> >>>>>>>> (d=0x558f568f0f60, nvqs=3, assign=true) at
> >>>>>>>> ../hw/virtio/virtio-pci.c:1019
> >>>>>>>> 9  0x0000558f52bf091d in vhost_net_start
> >>>>>>>> (dev=dev@entry=0x558f568f91f0, ncs=0x558f56937cd0,
> >>>>>>>> data_queue_pairs=data_queue_pairs@entry=1, cvq=cvq@entry=1)
> >>>>>>>>       at ../hw/net/vhost_net.c:361
> >>>>>>>> 10 0x0000558f52d4e5e7 in virtio_net_set_status
> >>>>>>>> (status=<optimized out>, n=0x558f568f91f0) at
> >>>>>>>> ../hw/net/virtio-net.c:289
> >>>>>>>> 11 0x0000558f52d4e5e7 in virtio_net_set_status
> >>>>>>>> (vdev=0x558f568f91f0, status=15 '\017') at
> >>>>>>>> ../hw/net/virtio-net.c:370
> >>>>>>>> 12 0x0000558f52d6c4b2 in virtio_set_status
> >>>>>>>> (vdev=vdev@entry=0x558f568f91f0, val=val@entry=15 '\017') at
> >>>>>>>> ../hw/virtio/virtio.c:1945
> >>>>>>>> 13 0x0000558f52c69eff in virtio_pci_common_write
> >>>>>>>> (opaque=0x558f568f0f60, addr=<optimized out>, val=<optimized
> >>>>>>>> out>, size=<optimized out>) at ../hw/virtio/virtio-pci.c:1292
> >>>>>>>> 14 0x0000558f52d15d6e in memory_region_write_accessor
> >>>>>>>> (mr=0x558f568f19d0, addr=20, value=<optimized out>, size=1,
> >>>>>>>> shift=<optimized out>, mask=<optimized out>, attrs=...)
> >>>>>>>>       at ../softmmu/memory.c:492
> >>>>>>>> 15 0x0000558f52d127de in access_with_adjusted_size
> >>>>>>>> (addr=addr@entry=20, value=value@entry=0x7f8cdbffe748,
> >>>>>>>> size=size@entry=1, access_size_min=<optimized out>,
> >>>>>>>> access_size_max=<optimized out>, access_fn=0x558f52d15cf0
> >>>>>>>> <memory_region_write_accessor>, mr=0x558f568f19d0, attrs=...)
> >>>>>>>> at ../softmmu/memory.c:554
> >>>>>>>> 16 0x0000558f52d157ef in memory_region_dispatch_write
> >>>>>>>> (mr=mr@entry=0x558f568f19d0, addr=20, data=<optimized out>,
> >>>>>>>> op=<optimized out>, attrs=attrs@entry=...)
> >>>>>>>>       at ../softmmu/memory.c:1504
> >>>>>>>> 17 0x0000558f52d078e7 in flatview_write_continue
> >>>>>>>> (fv=fv@entry=0x7f8accbc3b90, addr=addr@entry=103079215124,
> >>>>>>>> attrs=..., ptr=ptr@entry=0x7f8ce6300028, len=len@entry=1,
> >>>>>>>> addr1=<optimized out>, l=<optimized out>, mr=0x558f568f19d0) at
> >>>>>>>> ../../../include/qemu/host-utils.h:165
> >>>>>>>> 18 0x0000558f52d07b06 in flatview_write (fv=0x7f8accbc3b90,
> >>>>>>>> addr=103079215124, attrs=..., buf=0x7f8ce6300028, len=1) at
> >>>>>>>> ../softmmu/physmem.c:2822
> >>>>>>>> 19 0x0000558f52d0b36b in address_space_write (as=<optimized
> >>>>>>>> out>, addr=<optimized out>, attrs=...,
> >>>>>>>> buf=buf@entry=0x7f8ce6300028, len=<optimized out>)
> >>>>>>>>       at ../softmmu/physmem.c:2914
> >>>>>>>> 20 0x0000558f52d0b3da in address_space_rw (as=<optimized out>,
> >>>>>>>> addr=<optimized out>, attrs=...,
> >>>>>>>>       attrs@entry=..., buf=buf@entry=0x7f8ce6300028,
> >>>>>>>> len=<optimized out>, is_write=<optimized out>) at
> >>>>>>>> ../softmmu/physmem.c:2924
> >>>>>>>> 21 0x0000558f52dced09 in kvm_cpu_exec
> >>>>>>>> (cpu=cpu@entry=0x558f55c2da60) at ../accel/kvm/kvm-all.c:2903
> >>>>>>>> 22 0x0000558f52dcfabd in kvm_vcpu_thread_fn
> >>>>>>>> (arg=arg@entry=0x558f55c2da60) at ../accel/kvm/kvm-accel-ops.c:49
> >>>>>>>> 23 0x0000558f52f9f04a in qemu_thread_start (args=<optimized
> >>>>>>>> out>) at ../util/qemu-thread-posix.c:556
> >>>>>>>> 24 0x00007f8ce4392ea5 in start_thread () at /lib64/libpthread.so.0
> >>>>>>>> 25 0x00007f8ce40bb9fd in clone () at /lib64/libc.so.6
> >>>>>>>>
> >>>>>>>> Fixes: 4023784 ("vhost-vdpa: multiqueue support")
> >>>>>>>> Cc: Jason Wang <jasowang@redhat.com>
> >>>>>>>> Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
> >>>>>>>> ---
> >>>>>>>>     hw/virtio/virtio-bus.c | 3 ++-
> >>>>>>>>     1 file changed, 2 insertions(+), 1 deletion(-)
> >>>>>>>>
> >>>>>>>> diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
> >>>>>>>> index 0f69d1c..3159b58 100644
> >>>>>>>> --- a/hw/virtio/virtio-bus.c
> >>>>>>>> +++ b/hw/virtio/virtio-bus.c
> >>>>>>>> @@ -311,7 +311,8 @@ void
> >>>>>>>> virtio_bus_cleanup_host_notifier(VirtioBusState *bus, int n)
> >>>>>>>>         /* Test and clear notifier after disabling event,
> >>>>>>>>          * in case poll callback didn't have time to run.
> >>>>>>>>          */
> >>>>>>>> -    virtio_queue_host_notifier_read(notifier);
> >>>>>>>> +    if (!vdev->disable_ioeventfd_handler)
> >>>>>>>> +        virtio_queue_host_notifier_read(notifier);
> >>>>>>>>         event_notifier_cleanup(notifier);
> >>>>>>>>     }
> >>>>>>>>
> >>>>>>>> --
> >>>>>>>> 1.8.3.1
> >>>>>>>>
> >>
> >
>
diff mbox series

Patch

diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c
index 0f69d1c..3159b58 100644
--- a/hw/virtio/virtio-bus.c
+++ b/hw/virtio/virtio-bus.c
@@ -311,7 +311,8 @@  void virtio_bus_cleanup_host_notifier(VirtioBusState *bus, int n)
     /* Test and clear notifier after disabling event,
      * in case poll callback didn't have time to run.
      */
-    virtio_queue_host_notifier_read(notifier);
+    if (!vdev->disable_ioeventfd_handler)
+        virtio_queue_host_notifier_read(notifier);
     event_notifier_cleanup(notifier);
 }