diff mbox series

[v2,1/2] vhost-user: Fix lost reconnect

Message ID 20230824074115.93897-2-fengli@smartx.com (mailing list archive)
State New, archived
Headers show
Series Fix vhost reconnect issues | expand

Commit Message

Li Feng Aug. 24, 2023, 7:41 a.m. UTC
When the vhost-user is reconnecting to the backend, and if the vhost-user fails
at the get_features in vhost_dev_init(), then the reconnect will fail
and it will not be retriggered forever.

The reason is:
When the vhost-user fails at get_features, the vhost_dev_cleanup will be called
immediately.

vhost_dev_cleanup calls 'memset(hdev, 0, sizeof(struct vhost_dev))'.

The reconnect path is:
vhost_user_blk_event
   vhost_user_async_close(.. vhost_user_blk_disconnect ..)
     qemu_chr_fe_set_handlers <----- clear the notifier callback
       schedule vhost_user_async_close_bh

The vhost->vdev is null, so the vhost_user_blk_disconnect will not be
called, then the event fd callback will not be reinstalled.

All vhost-user devices have this issue, including vhost-user-blk/scsi.

With this patch, if the vdev->vdev is null, the fd callback will still
be reinstalled.

Fixes: 71e076a07d ("hw/virtio: generalise CHR_EVENT_CLOSED handling")

Signed-off-by: Li Feng <fengli@smartx.com>
---
 hw/block/vhost-user-blk.c      |  2 +-
 hw/scsi/vhost-user-scsi.c      |  3 ++-
 hw/virtio/vhost-user-gpio.c    |  2 +-
 hw/virtio/vhost-user.c         | 10 ++++++++--
 include/hw/virtio/vhost-user.h |  4 +++-
 5 files changed, 15 insertions(+), 6 deletions(-)

Comments

Raphael Norwitz Aug. 29, 2023, 10:11 p.m. UTC | #1
> On Aug 24, 2023, at 3:41 AM, Li Feng <fengli@smartx.com> wrote:
> 
> When the vhost-user is reconnecting to the backend, and if the vhost-user fails
> at the get_features in vhost_dev_init(), then the reconnect will fail
> and it will not be retriggered forever.
> 
> The reason is:
> When the vhost-user fails at get_features, the vhost_dev_cleanup will be called
> immediately.
> 
> vhost_dev_cleanup calls 'memset(hdev, 0, sizeof(struct vhost_dev))'.
> 
> The reconnect path is:
> vhost_user_blk_event
>   vhost_user_async_close(.. vhost_user_blk_disconnect ..)
>     qemu_chr_fe_set_handlers <----- clear the notifier callback
>       schedule vhost_user_async_close_bh
> 
> The vhost->vdev is null, so the vhost_user_blk_disconnect will not be
> called, then the event fd callback will not be reinstalled.
> 
> All vhost-user devices have this issue, including vhost-user-blk/scsi.
> 
> With this patch, if the vdev->vdev is null, the fd callback will still
> be reinstalled.
> 
> Fixes: 71e076a07d ("hw/virtio: generalise CHR_EVENT_CLOSED handling")
> 

A couple of NITs, otherwise LGTM

Reviewed-by: Raphael Norwitz <raphael.norwitz@nutanix.com>

> Signed-off-by: Li Feng <fengli@smartx.com>
> ---
> hw/block/vhost-user-blk.c      |  2 +-
> hw/scsi/vhost-user-scsi.c      |  3 ++-
> hw/virtio/vhost-user-gpio.c    |  2 +-
> hw/virtio/vhost-user.c         | 10 ++++++++--
> include/hw/virtio/vhost-user.h |  4 +++-
> 5 files changed, 15 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
> index 3c69fa47d5..95c758200d 100644
> --- a/hw/block/vhost-user-blk.c
> +++ b/hw/block/vhost-user-blk.c
> @@ -391,7 +391,7 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event)
>     case CHR_EVENT_CLOSED:
>         /* defer close until later to avoid circular close */
>         vhost_user_async_close(dev, &s->chardev, &s->dev,
> -                               vhost_user_blk_disconnect);
> +                               vhost_user_blk_disconnect, vhost_user_blk_event);
>         break;
>     case CHR_EVENT_BREAK:
>     case CHR_EVENT_MUX_IN:
> diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
> index a7fa8e8df2..e931df9f5b 100644
> --- a/hw/scsi/vhost-user-scsi.c
> +++ b/hw/scsi/vhost-user-scsi.c
> @@ -236,7 +236,8 @@ static void vhost_user_scsi_event(void *opaque, QEMUChrEvent event)
>     case CHR_EVENT_CLOSED:
>         /* defer close until later to avoid circular close */
>         vhost_user_async_close(dev, &vs->conf.chardev, &vsc->dev,
> -                               vhost_user_scsi_disconnect);
> +                               vhost_user_scsi_disconnect,
> +                               vhost_user_scsi_event);
>         break;
>     case CHR_EVENT_BREAK:
>     case CHR_EVENT_MUX_IN:
> diff --git a/hw/virtio/vhost-user-gpio.c b/hw/virtio/vhost-user-gpio.c
> index d9979aa5db..04c2cc79f4 100644
> --- a/hw/virtio/vhost-user-gpio.c
> +++ b/hw/virtio/vhost-user-gpio.c
> @@ -283,7 +283,7 @@ static void vu_gpio_event(void *opaque, QEMUChrEvent event)
>     case CHR_EVENT_CLOSED:
>         /* defer close until later to avoid circular close */
>         vhost_user_async_close(dev, &gpio->chardev, &gpio->vhost_dev,
> -                               vu_gpio_disconnect);
> +                               vu_gpio_disconnect, vu_gpio_event);
>         break;
>     case CHR_EVENT_BREAK:
>     case CHR_EVENT_MUX_IN:
> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
> index 8dcf049d42..9540766dd3 100644
> --- a/hw/virtio/vhost-user.c
> +++ b/hw/virtio/vhost-user.c
> @@ -2643,6 +2643,7 @@ typedef struct {
>     DeviceState *dev;
>     CharBackend *cd;
>     struct vhost_dev *vhost;
> +    IOEventHandler *event_cb;
> } VhostAsyncCallback;
> 
> static void vhost_user_async_close_bh(void *opaque)
> @@ -2657,7 +2658,10 @@ static void vhost_user_async_close_bh(void *opaque)
>      */
>     if (vhost->vdev) {
>         data->cb(data->dev);
> -    }
> +    } else if (data->event_cb) {
> +        qemu_chr_fe_set_handlers(data->cd, NULL, NULL, data->event_cb,
> +                                 NULL, data->dev, NULL, true);
> +   }
> 
>     g_free(data);
> }
> @@ -2669,7 +2673,9 @@ static void vhost_user_async_close_bh(void *opaque)
>  */
> void vhost_user_async_close(DeviceState *d,
>                             CharBackend *chardev, struct vhost_dev *vhost,
> -                            vu_async_close_fn cb)
> +                            vu_async_close_fn cb,
> +                            IOEventHandler *event_cb

Nit: why the newline before the closing parenthesis?

> +                            )
> {
>     if (!runstate_check(RUN_STATE_SHUTDOWN)) {
>         /*
> diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h
> index 191216a74f..5fdc711d4e 100644
> --- a/include/hw/virtio/vhost-user.h
> +++ b/include/hw/virtio/vhost-user.h
> @@ -84,6 +84,8 @@ typedef void (*vu_async_close_fn)(DeviceState *cb);
> 
> void vhost_user_async_close(DeviceState *d,
>                             CharBackend *chardev, struct vhost_dev *vhost,
> -                            vu_async_close_fn cb);
> +                            vu_async_close_fn cb,
> +                            IOEventHandler *event_cb

Nit: ditto - don’t think we need this newline before );

> +                            );
> 
> #endif
> -- 
> 2.41.0
>
Li Feng Aug. 30, 2023, 4:51 a.m. UTC | #2
> On 30 Aug 2023, at 6:11 AM, Raphael Norwitz <raphael.norwitz@nutanix.com> wrote:
> 
> 
> 
>> On Aug 24, 2023, at 3:41 AM, Li Feng <fengli@smartx.com> wrote:
>> 
>> When the vhost-user is reconnecting to the backend, and if the vhost-user fails
>> at the get_features in vhost_dev_init(), then the reconnect will fail
>> and it will not be retriggered forever.
>> 
>> The reason is:
>> When the vhost-user fails at get_features, the vhost_dev_cleanup will be called
>> immediately.
>> 
>> vhost_dev_cleanup calls 'memset(hdev, 0, sizeof(struct vhost_dev))'.
>> 
>> The reconnect path is:
>> vhost_user_blk_event
>>  vhost_user_async_close(.. vhost_user_blk_disconnect ..)
>>    qemu_chr_fe_set_handlers <----- clear the notifier callback
>>      schedule vhost_user_async_close_bh
>> 
>> The vhost->vdev is null, so the vhost_user_blk_disconnect will not be
>> called, then the event fd callback will not be reinstalled.
>> 
>> All vhost-user devices have this issue, including vhost-user-blk/scsi.
>> 
>> With this patch, if the vdev->vdev is null, the fd callback will still
>> be reinstalled.
>> 
>> Fixes: 71e076a07d ("hw/virtio: generalise CHR_EVENT_CLOSED handling")
>> 
> 
> A couple of NITs, otherwise LGTM
> 
> Reviewed-by: Raphael Norwitz <raphael.norwitz@nutanix.com <mailto:raphael.norwitz@nutanix.com>>
> 
>> Signed-off-by: Li Feng <fengli@smartx.com>
>> ---
>> hw/block/vhost-user-blk.c      |  2 +-
>> hw/scsi/vhost-user-scsi.c      |  3 ++-
>> hw/virtio/vhost-user-gpio.c    |  2 +-
>> hw/virtio/vhost-user.c         | 10 ++++++++--
>> include/hw/virtio/vhost-user.h |  4 +++-
>> 5 files changed, 15 insertions(+), 6 deletions(-)
>> 
>> diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
>> index 3c69fa47d5..95c758200d 100644
>> --- a/hw/block/vhost-user-blk.c
>> +++ b/hw/block/vhost-user-blk.c
>> @@ -391,7 +391,7 @@ static void vhost_user_blk_event(void *opaque, QEMUChrEvent event)
>>    case CHR_EVENT_CLOSED:
>>        /* defer close until later to avoid circular close */
>>        vhost_user_async_close(dev, &s->chardev, &s->dev,
>> -                               vhost_user_blk_disconnect);
>> +                               vhost_user_blk_disconnect, vhost_user_blk_event);
>>        break;
>>    case CHR_EVENT_BREAK:
>>    case CHR_EVENT_MUX_IN:
>> diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
>> index a7fa8e8df2..e931df9f5b 100644
>> --- a/hw/scsi/vhost-user-scsi.c
>> +++ b/hw/scsi/vhost-user-scsi.c
>> @@ -236,7 +236,8 @@ static void vhost_user_scsi_event(void *opaque, QEMUChrEvent event)
>>    case CHR_EVENT_CLOSED:
>>        /* defer close until later to avoid circular close */
>>        vhost_user_async_close(dev, &vs->conf.chardev, &vsc->dev,
>> -                               vhost_user_scsi_disconnect);
>> +                               vhost_user_scsi_disconnect,
>> +                               vhost_user_scsi_event);
>>        break;
>>    case CHR_EVENT_BREAK:
>>    case CHR_EVENT_MUX_IN:
>> diff --git a/hw/virtio/vhost-user-gpio.c b/hw/virtio/vhost-user-gpio.c
>> index d9979aa5db..04c2cc79f4 100644
>> --- a/hw/virtio/vhost-user-gpio.c
>> +++ b/hw/virtio/vhost-user-gpio.c
>> @@ -283,7 +283,7 @@ static void vu_gpio_event(void *opaque, QEMUChrEvent event)
>>    case CHR_EVENT_CLOSED:
>>        /* defer close until later to avoid circular close */
>>        vhost_user_async_close(dev, &gpio->chardev, &gpio->vhost_dev,
>> -                               vu_gpio_disconnect);
>> +                               vu_gpio_disconnect, vu_gpio_event);
>>        break;
>>    case CHR_EVENT_BREAK:
>>    case CHR_EVENT_MUX_IN:
>> diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
>> index 8dcf049d42..9540766dd3 100644
>> --- a/hw/virtio/vhost-user.c
>> +++ b/hw/virtio/vhost-user.c
>> @@ -2643,6 +2643,7 @@ typedef struct {
>>    DeviceState *dev;
>>    CharBackend *cd;
>>    struct vhost_dev *vhost;
>> +    IOEventHandler *event_cb;
>> } VhostAsyncCallback;
>> 
>> static void vhost_user_async_close_bh(void *opaque)
>> @@ -2657,7 +2658,10 @@ static void vhost_user_async_close_bh(void *opaque)
>>     */
>>    if (vhost->vdev) {
>>        data->cb(data->dev);
>> -    }
>> +    } else if (data->event_cb) {
>> +        qemu_chr_fe_set_handlers(data->cd, NULL, NULL, data->event_cb,
>> +                                 NULL, data->dev, NULL, true);
>> +   }
>> 
>>    g_free(data);
>> }
>> @@ -2669,7 +2673,9 @@ static void vhost_user_async_close_bh(void *opaque)
>> */
>> void vhost_user_async_close(DeviceState *d,
>>                            CharBackend *chardev, struct vhost_dev *vhost,
>> -                            vu_async_close_fn cb)
>> +                            vu_async_close_fn cb,
>> +                            IOEventHandler *event_cb
> 
> Nit: why the newline before the closing parenthesis?
Acked.

> 
>> +                            )
>> {
>>    if (!runstate_check(RUN_STATE_SHUTDOWN)) {
>>        /*
>> diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h
>> index 191216a74f..5fdc711d4e 100644
>> --- a/include/hw/virtio/vhost-user.h
>> +++ b/include/hw/virtio/vhost-user.h
>> @@ -84,6 +84,8 @@ typedef void (*vu_async_close_fn)(DeviceState *cb);
>> 
>> void vhost_user_async_close(DeviceState *d,
>>                            CharBackend *chardev, struct vhost_dev *vhost,
>> -                            vu_async_close_fn cb);
>> +                            vu_async_close_fn cb,
>> +                            IOEventHandler *event_cb
> 
> Nit: ditto - don’t think we need this newline before );
Acked.
> 
>> +                            );
>> 
>> #endif
>> -- 
>> 2.41.0
diff mbox series

Patch

diff --git a/hw/block/vhost-user-blk.c b/hw/block/vhost-user-blk.c
index 3c69fa47d5..95c758200d 100644
--- a/hw/block/vhost-user-blk.c
+++ b/hw/block/vhost-user-blk.c
@@ -391,7 +391,7 @@  static void vhost_user_blk_event(void *opaque, QEMUChrEvent event)
     case CHR_EVENT_CLOSED:
         /* defer close until later to avoid circular close */
         vhost_user_async_close(dev, &s->chardev, &s->dev,
-                               vhost_user_blk_disconnect);
+                               vhost_user_blk_disconnect, vhost_user_blk_event);
         break;
     case CHR_EVENT_BREAK:
     case CHR_EVENT_MUX_IN:
diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c
index a7fa8e8df2..e931df9f5b 100644
--- a/hw/scsi/vhost-user-scsi.c
+++ b/hw/scsi/vhost-user-scsi.c
@@ -236,7 +236,8 @@  static void vhost_user_scsi_event(void *opaque, QEMUChrEvent event)
     case CHR_EVENT_CLOSED:
         /* defer close until later to avoid circular close */
         vhost_user_async_close(dev, &vs->conf.chardev, &vsc->dev,
-                               vhost_user_scsi_disconnect);
+                               vhost_user_scsi_disconnect,
+                               vhost_user_scsi_event);
         break;
     case CHR_EVENT_BREAK:
     case CHR_EVENT_MUX_IN:
diff --git a/hw/virtio/vhost-user-gpio.c b/hw/virtio/vhost-user-gpio.c
index d9979aa5db..04c2cc79f4 100644
--- a/hw/virtio/vhost-user-gpio.c
+++ b/hw/virtio/vhost-user-gpio.c
@@ -283,7 +283,7 @@  static void vu_gpio_event(void *opaque, QEMUChrEvent event)
     case CHR_EVENT_CLOSED:
         /* defer close until later to avoid circular close */
         vhost_user_async_close(dev, &gpio->chardev, &gpio->vhost_dev,
-                               vu_gpio_disconnect);
+                               vu_gpio_disconnect, vu_gpio_event);
         break;
     case CHR_EVENT_BREAK:
     case CHR_EVENT_MUX_IN:
diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c
index 8dcf049d42..9540766dd3 100644
--- a/hw/virtio/vhost-user.c
+++ b/hw/virtio/vhost-user.c
@@ -2643,6 +2643,7 @@  typedef struct {
     DeviceState *dev;
     CharBackend *cd;
     struct vhost_dev *vhost;
+    IOEventHandler *event_cb;
 } VhostAsyncCallback;
 
 static void vhost_user_async_close_bh(void *opaque)
@@ -2657,7 +2658,10 @@  static void vhost_user_async_close_bh(void *opaque)
      */
     if (vhost->vdev) {
         data->cb(data->dev);
-    }
+    } else if (data->event_cb) {
+        qemu_chr_fe_set_handlers(data->cd, NULL, NULL, data->event_cb,
+                                 NULL, data->dev, NULL, true);
+   }
 
     g_free(data);
 }
@@ -2669,7 +2673,9 @@  static void vhost_user_async_close_bh(void *opaque)
  */
 void vhost_user_async_close(DeviceState *d,
                             CharBackend *chardev, struct vhost_dev *vhost,
-                            vu_async_close_fn cb)
+                            vu_async_close_fn cb,
+                            IOEventHandler *event_cb
+                            )
 {
     if (!runstate_check(RUN_STATE_SHUTDOWN)) {
         /*
diff --git a/include/hw/virtio/vhost-user.h b/include/hw/virtio/vhost-user.h
index 191216a74f..5fdc711d4e 100644
--- a/include/hw/virtio/vhost-user.h
+++ b/include/hw/virtio/vhost-user.h
@@ -84,6 +84,8 @@  typedef void (*vu_async_close_fn)(DeviceState *cb);
 
 void vhost_user_async_close(DeviceState *d,
                             CharBackend *chardev, struct vhost_dev *vhost,
-                            vu_async_close_fn cb);
+                            vu_async_close_fn cb,
+                            IOEventHandler *event_cb
+                            );
 
 #endif