diff mbox

vhost-user: fix crash when chardev-remove

Message ID 215f62a9.521a.159913eedbc.Coremail.h158309@126.com (mailing list archive)
State New, archived
Headers show

Commit Message

黄淮 Jan. 12, 2017, 5:55 a.m. UTC
Hi 
I tested  on qemu-2.7.1 release version.


test case:
1. host run ovs-dpdk.  start  vhost-user mode vm
2. chardev-add socket,id=char-client-002-2,path=/usr/local/var/run/openvswitch/client-002-2,server=on

    netdev_add vhost-user,id=client-002-2, ,chardev=char-client-002-2,vhostforce=on

    device_add virtio-net-pci,netdev=client-002-2,mac=00:22:79:29:d2:6c,id=netdev-client-002-2 
   ... wait 10 s
    device_del  netdev-client-002-2
    netdev_del  client-002-2
    chardev-remove char-client-002-2
   
    ovs-vsctl del-port  client-002-2
(gdb) bt
#0  0x00007f80483265f7 in raise () from /lib64/libc.so.6
#1  0x00007f8048327ce8 in abort () from /lib64/libc.so.6
#2  0x00007f804831f566 in __assert_fail_base () from /lib64/libc.so.6
#3  0x00007f804831f612 in __assert_fail () from /lib64/libc.so.6
#4  0x00007f804b729bec in get_vhost_net (nc=<optimized out>) at /opt/cloud/contrib/qemu-2.7.1/hw/net/vhost_net.c:415
#5  0x00007f804b726f31 in virtio_net_vhost_status (status=0 '\000', n=0x7f804db841c0) at /opt/cloud/contrib/qemu-2.7.1/hw/net/virtio-net.c:121
#6  virtio_net_set_status (vdev=<optimized out>, status=<optimized out>) at /opt/cloud/contrib/qemu-2.7.1/hw/net/virtio-net.c:224
#7  0x00007f804b73ead6 in virtio_set_status (vdev=vdev@entry=0x7f804db841c0, val=val@entry=0 '\000') at /opt/cloud/contrib/qemu-2.7.1/hw/virtio/virtio.c:760
#8  0x00007f804b8f869c in virtio_ioport_write (val=0, addr=18, opaque=0x7f804db7be80) at hw/virtio/virtio-pci.c:400
#9  virtio_pci_config_write (opaque=0x7f804db7be80, addr=18, val=0, size=<optimized out>) at hw/virtio/virtio-pci.c:525
#10 0x00007f804b6fa0db in memory_region_write_accessor (mr=0x7f804db7c710, addr=18, value=<optimized out>, size=1, shift=<optimized out>, mask=<optimized out>, attrs=...)
    at /opt/cloud/contrib/qemu-2.7.1/memory.c:525
#11 0x00007f804b6f8079 in access_with_adjusted_size (addr=addr@entry=18, value=value@entry=0x7f7ffeffc958, size=size@entry=1, access_size_min=<optimized out>,
    access_size_max=<optimized out>, access=access@entry=0x7f804b6fa060 <memory_region_write_accessor>, mr=mr@entry=0x7f804db7c710, attrs=attrs@entry=...)
    at /opt/cloud/contrib/qemu-2.7.1/memory.c:591
#12 0x00007f804b6fc6f5 in memory_region_dispatch_write (mr=mr@entry=0x7f804db7c710, addr=addr@entry=18, data=0, size=size@entry=1, attrs=attrs@entry=...)
    at /opt/cloud/contrib/qemu-2.7.1/memory.c:1327
#13 0x00007f804b6b93bb in address_space_write_continue (mr=0x7f804db7c710, l=1, addr1=18, len=1, buf=0x7f804b4bc000 <Address 0x7f804b4bc000 out of bounds>, attrs=..., addr=4114,
    as=0x7f804bfaa3e0 <address_space_io>) at /opt/cloud/contrib/qemu-2.7.1/exec.c:2556
#14 address_space_write (as=<optimized out>, addr=<optimized out>, attrs=..., buf=<optimized out>, len=<optimized out>) at /opt/cloud/contrib/qemu-2.7.1/exec.c:2601
#15 0x00007f804b6b9a3d in address_space_rw (as=<optimized out>, addr=addr@entry=4114, attrs=..., attrs@entry=..., buf=<optimized out>, len=len@entry=1, is_write=is_write@entry=true)
    at /opt/cloud/contrib/qemu-2.7.1/exec.c:2703
#16 0x00007f804b6f6fd5 in kvm_handle_io (count=1, size=1, direction=<optimized out>, data=<optimized out>, attrs=..., port=4114) at /opt/cloud/contrib/qemu-2.7.1/kvm-all.c:1791
#17 kvm_cpu_exec (cpu=cpu@entry=0x7f804d8d1de0) at /opt/cloud/contrib/qemu-2.7.1/kvm-all.c:1955
#18 0x00007f804b6e4e76 in qemu_kvm_cpu_thread_fn (arg=0x7f804d8d1de0) at /opt/cloud/contrib/qemu-2.7.1/cpus.c:1078
#19 0x00007f80486b9dc5 in start_thread () from /lib64/libpthread.so.0
#20 0x00007f80483e728d in clone () from /lib64/libc.so.6


After fix this:
Hmm, the socket didn't send a CLOSED event on remove?
 

     qemu_purge_queued_packets(nc);
 }


@@ -192,7 +195,8 @@ static gboolean net_vhost_user_watch(GIOChannel *chan, GIOCondition cond,
 {
     VhostUserState *s = opaque;


-    qemu_chr_disconnect(s->chr);
+    if (s->chr)
+        qemu_chr_disconnect(s->chr);



that looks outdated,


which version of qemu did you tested and patched?


thanks



     return FALSE;
 }
--

Marc-André Lureau
diff mbox

Patch

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index f2d49ad..4037cf4 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -412,7 +412,6 @@  VHostNetState *get_vhost_net(NetClientState *nc)
         break;
     case NET_CLIENT_DRIVER_VHOST_USER:
         vhost_net = vhost_user_get_vhost_net(nc);
-        assert(vhost_net);


(gdb) bt
#0  qemu_chr_disconnect (chr=0x0) at qemu-char.c:4081
#1  0x00007fdb4f538cf0 in net_vhost_user_watch (chan=<optimized out>, cond=<optimized out>, opaque=<optimized out>) at net/vhost-user.c:195
#2  0x00007fdb4cd617aa in g_main_context_dispatch () from /lib64/libglib-2.0.so.0
#3  0x00007fdb4f5798f0 in glib_pollfds_poll () at main-loop.c:213
#4  os_host_main_loop_wait (timeout=<optimized out>) at main-loop.c:258
#5  main_loop_wait (nonblocking=<optimized out>) at main-loop.c:506
#6  0x00007fdb4f2dbfa7 in main_loop () at vl.c:1909
#7  main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4618




I think it`s because vhost-user client mode ,  2.7+ version new function(reconnect).   After qemu char-remove, the watch fd process didn`t stop. When ovs-dpdk remove port  and close watch fd,  qemu crashed.


Thanks
Huanghuai  



At 2017-01-11 23:02:26, "Marc-André Lureau" <marcandre.lureau@gmail.com> wrote:

Hi



On Wed, Jan 11, 2017 at 3:32 PM 黄淮 <h158309@126.com> wrote:

From: Huai Huang<h158309@126.com>





Could you describe a bit more the crash and provide a backtrace?

 
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index f2d49ad..4037cf4 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -412,7 +412,6 @@  VHostNetState *get_vhost_net(NetClientState *nc)
         break;
     case NET_CLIENT_DRIVER_VHOST_USER:
         vhost_net = vhost_user_get_vhost_net(nc);
-        assert(vhost_net);



This was recently added, in commit 1a5b68cee8a2b165ffd61b2e0641a4da3990f242.


How is it related?


I remember the rest of the vhost-user code expected get_vhost_net() to be non-null, did that change?



         break;
     default:
         break;
diff --git a/net/vhost-user.c b/net/vhost-user.c
index b0595f8..4e54478 100644
--- a/net/vhost-user.c
+++ b/net/vhost-user.c
@@ -160,7 +160,10 @@  static void vhost_user_cleanup(NetClientState *nc)
         qemu_chr_fe_release(s->chr);
         s->chr = NULL;
     }
-
+    if (s->watch) {
+        g_source_remove(s->watch);
+        s->watch = 0;
+    }