diff mbox series

[v2] vdpa: reset the backend device in stage of stop last vhost device

Message ID 1648634561-12504-1-git-send-email-08005325@163.com (mailing list archive)
State New, archived
Headers show
Series [v2] vdpa: reset the backend device in stage of stop last vhost device | expand

Commit Message

08005325@163.com March 30, 2022, 10:02 a.m. UTC
From: Michael Qiu <qiudayu@archeros.com>

Currently, when VM poweroff, it will trigger vdpa
device(such as mlx bluefield2 VF) reset many times(with 1 datapath
queue pair and one control queue, triggered 3 times), this
leads to below issue:

vhost VQ 2 ring restore failed: -22: Invalid argument (22)

This because in vhost_net_stop(), it will stop all vhost device bind to
this virtio device, and in vhost_dev_stop(), qemu tries to stop the device
, then stop the queue: vhost_virtqueue_stop().

In vhost_dev_stop(), it resets the device, which clear some flags
in low level driver, and in next loop(stop other vhost backends),
qemu try to stop the queue corresponding to the vhost backend,
 the driver finds that the VQ is invalied, this is the root cause.

To solve the issue, vdpa should set vring unready, and
remove reset ops in device stop: vhost_dev_start(hdev, false).

and implement a new function vhost_dev_reset, only reset backend
device when the last vhost stop triggerd.

Signed-off-by: Michael Qiu<qiudayu@archeros.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
v2 --> v1:
   implement a new function vhost_dev_reset,
   reset the backend kernel device at last.

---
 hw/net/vhost_net.c        | 22 +++++++++++++++++++---
 hw/virtio/vhost-vdpa.c    |  8 ++++----
 hw/virtio/vhost.c         | 16 +++++++++++++++-
 include/hw/virtio/vhost.h |  1 +
 4 files changed, 39 insertions(+), 8 deletions(-)

Comments

Michael S. Tsirkin March 30, 2022, 10:52 a.m. UTC | #1
On Wed, Mar 30, 2022 at 06:02:41AM -0400, 08005325@163.com wrote:

It's an empty patch.
Si-Wei Liu March 31, 2022, 12:15 a.m. UTC | #2
On 3/30/2022 3:02 AM, 08005325@163.com wrote:
> From: Michael Qiu <qiudayu@archeros.com>
>
> Currently, when VM poweroff, it will trigger vdpa
> device(such as mlx bluefield2 VF) reset many times(with 1 datapath
> queue pair and one control queue, triggered 3 times), this
> leads to below issue:
>
> vhost VQ 2 ring restore failed: -22: Invalid argument (22)
>
> This because in vhost_net_stop(), it will stop all vhost device bind to
> this virtio device, and in vhost_dev_stop(), qemu tries to stop the device
> , then stop the queue: vhost_virtqueue_stop().
>
> In vhost_dev_stop(), it resets the device, which clear some flags
> in low level driver, and in next loop(stop other vhost backends),
> qemu try to stop the queue corresponding to the vhost backend,
>   the driver finds that the VQ is invalied, this is the root cause.
>
> To solve the issue, vdpa should set vring unready, and
> remove reset ops in device stop: vhost_dev_start(hdev, false).
>
> and implement a new function vhost_dev_reset, only reset backend
> device when the last vhost stop triggerd.
>
> Signed-off-by: Michael Qiu<qiudayu@archeros.com>
> Acked-by: Jason Wang <jasowang@redhat.com>
> ---
> v2 --> v1:
>     implement a new function vhost_dev_reset,
>     reset the backend kernel device at last.
>
> ---
>   hw/net/vhost_net.c        | 22 +++++++++++++++++++---
>   hw/virtio/vhost-vdpa.c    |  8 ++++----
>   hw/virtio/vhost.c         | 16 +++++++++++++++-
>   include/hw/virtio/vhost.h |  1 +
>   4 files changed, 39 insertions(+), 8 deletions(-)
>
> diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> index 30379d2..3cdf6a4 100644
> --- a/hw/net/vhost_net.c
> +++ b/hw/net/vhost_net.c
> @@ -299,7 +299,7 @@ fail_notifiers:
>   }
>   
>   static void vhost_net_stop_one(struct vhost_net *net,
> -                               VirtIODevice *dev)
> +                               VirtIODevice *dev, bool reset)
>   {
>       struct vhost_vring_file file = { .fd = -1 };
>   
> @@ -313,6 +313,11 @@ static void vhost_net_stop_one(struct vhost_net *net,
>           net->nc->info->poll(net->nc, true);
>       }
>       vhost_dev_stop(&net->dev, dev);
> +
> +    if (reset) {
> +        vhost_dev_reset(&net->dev);
> +    }
> +
>       vhost_dev_disable_notifiers(&net->dev, dev);
>   }
>   
> @@ -391,7 +396,12 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
>   err_start:
>       while (--i >= 0) {
>           peer = qemu_get_peer(ncs , i);
> -        vhost_net_stop_one(get_vhost_net(peer), dev);
> +
> +        if (i == 0) {
> +            vhost_net_stop_one(get_vhost_net(peer), dev, true);
> +        } else {
> +            vhost_net_stop_one(get_vhost_net(peer), dev, false);
> +        }
>       }
>       e = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
>       if (e < 0) {
> @@ -420,7 +430,13 @@ void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs,
>           } else {
>               peer = qemu_get_peer(ncs, n->max_queue_pairs);
>           }
> -        vhost_net_stop_one(get_vhost_net(peer), dev);
> +
> +        /* We only reset backend device during the last vhost */
> +        if (i == nvhosts - 1) {
I wonder if there's any specific reason to position device reset in the 
for loop given that there's no virtqueue level reset? Wouldn't it be 
cleaner to reset the device at the end of vhost_net_stop() before 
return? you may use qemu_get_peer(ncs, 0) without hassle? Noted the 
vhost_ops->vhost_reset_device op is per device rather per vq.

> +            vhost_net_stop_one(get_vhost_net(peer), dev, true);
> +        } else {
> +            vhost_net_stop_one(get_vhost_net(peer), dev, false);
> +        }
>       }
>   
>       r = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index c5ed7a3..d858b4f 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -719,14 +719,14 @@ static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx)
>       return idx;
>   }
>   
> -static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev)
> +static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev, unsigned int ready)
>   {
>       int i;
>       trace_vhost_vdpa_set_vring_ready(dev);
>       for (i = 0; i < dev->nvqs; ++i) {
>           struct vhost_vring_state state = {
>               .index = dev->vq_index + i,
> -            .num = 1,
> +            .num = ready,
>           };
>           vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state);
>       }
> @@ -1088,8 +1088,9 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
>           if (unlikely(!ok)) {
>               return -1;
>           }
> -        vhost_vdpa_set_vring_ready(dev);
> +        vhost_vdpa_set_vring_ready(dev, 1);
>       } else {
> +        vhost_vdpa_set_vring_ready(dev, 0);
>           ok = vhost_vdpa_svqs_stop(dev);
>           if (unlikely(!ok)) {
>               return -1;
> @@ -1105,7 +1106,6 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
>           memory_listener_register(&v->listener, &address_space_memory);
>           return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
>       } else {
> -        vhost_vdpa_reset_device(dev);
>           vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
>                                      VIRTIO_CONFIG_S_DRIVER);
Here's another issue (regression) got to address - the added 
S_ACKNOWLEDGE | S_DRIVER bits will be cleared right away by the 
follow-up reset in vhost_net_stop_one(... , true), which in turn will 
cause virtio fail to initialize e.g. vhost_vdpa_set_features() will fail 
to set VIRTIO_CONFIG_S_FEATURES_OK.

Ideally the status bit should be set whenever the corresponding status 
bit is set by virtio_net from virtio_net_vhost_status(), or practically 
it can be done at the very beginning of vhost_dev_start(), for e.g. the 
first call before vhost_dev_set_features(). For this purpose, you may 
consider adding another vhost_init_device op, which is symmetric to 
vhost_ops->vhost_reset_device in the vhost_net_stop() path.

Thanks,
-Siwei

>           memory_listener_unregister(&v->listener);
> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
> index b643f42..6d9b4a3 100644
> --- a/hw/virtio/vhost.c
> +++ b/hw/virtio/vhost.c
> @@ -1820,7 +1820,7 @@ fail_features:
>   void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
>   {
>       int i;
> -
> +    printf("vhost_dev_stop test\n");
>       /* should only be called after backend is connected */
>       assert(hdev->vhost_ops);
>   
> @@ -1854,3 +1854,17 @@ int vhost_net_set_backend(struct vhost_dev *hdev,
>   
>       return -ENOSYS;
>   }
> +
> +int vhost_dev_reset(struct vhost_dev *hdev)
> +{
> +    int ret = 0;
> +
> +    /* should only be called after backend is connected */
> +    assert(hdev->vhost_ops);
> +
> +    if (hdev->vhost_ops->vhost_reset_device) {
> +        ret = hdev->vhost_ops->vhost_reset_device(hdev);
> +    }
> +
> +    return ret;
> +}
> diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
> index 58a73e7..b8b7c20 100644
> --- a/include/hw/virtio/vhost.h
> +++ b/include/hw/virtio/vhost.h
> @@ -114,6 +114,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
>   void vhost_dev_cleanup(struct vhost_dev *hdev);
>   int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev);
>   void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev);
> +int vhost_dev_reset(struct vhost_dev *hdev);
>   int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
>   void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
>
Michael Qiu March 31, 2022, 1:39 a.m. UTC | #3
Michael,

Others has already received the patch, don't know why. Anyway, I will 
repost another version(V3).

Here is the V2 patch, see below:

From: Michael Qiu <qiudayu@archeros.com>

Currently, when VM poweroff, it will trigger vdpa
device(such as mlx bluefield2 VF) reset many times(with 1 datapath
queue pair and one control queue, triggered 3 times), this
leads to below issue:

vhost VQ 2 ring restore failed: -22: Invalid argument (22)

This because in vhost_net_stop(), it will stop all vhost device bind to
this virtio device, and in vhost_dev_stop(), qemu tries to stop the device
, then stop the queue: vhost_virtqueue_stop().

In vhost_dev_stop(), it resets the device, which clear some flags
in low level driver, and in next loop(stop other vhost backends),
qemu try to stop the queue corresponding to the vhost backend,
  the driver finds that the VQ is invalied, this is the root cause.

To solve the issue, vdpa should set vring unready, and
remove reset ops in device stop: vhost_dev_start(hdev, false).

and implement a new function vhost_dev_reset, only reset backend
device when the last vhost stop triggerd.

Signed-off-by: Michael Qiu<qiudayu@archeros.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
v2 --> v1:
    implement a new function vhost_dev_reset,
    reset the backend kernel device at last.

---
  hw/net/vhost_net.c        | 22 +++++++++++++++++++---
  hw/virtio/vhost-vdpa.c    |  8 ++++----
  hw/virtio/vhost.c         | 16 +++++++++++++++-
  include/hw/virtio/vhost.h |  1 +
  4 files changed, 39 insertions(+), 8 deletions(-)

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 30379d2..3cdf6a4 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -299,7 +299,7 @@ fail_notifiers:
  }

  static void vhost_net_stop_one(struct vhost_net *net,
-                               VirtIODevice *dev)
+                               VirtIODevice *dev, bool reset)
  {
      struct vhost_vring_file file = { .fd = -1 };

@@ -313,6 +313,11 @@ static void vhost_net_stop_one(struct vhost_net *net,
          net->nc->info->poll(net->nc, true);
      }
      vhost_dev_stop(&net->dev, dev);
+
+    if (reset) {
+        vhost_dev_reset(&net->dev);
+    }
+
      vhost_dev_disable_notifiers(&net->dev, dev);
  }

@@ -391,7 +396,12 @@ int vhost_net_start(VirtIODevice *dev, 
NetClientState *ncs,
  err_start:
      while (--i >= 0) {
          peer = qemu_get_peer(ncs , i);
-        vhost_net_stop_one(get_vhost_net(peer), dev);
+
+        if (i == 0) {
+            vhost_net_stop_one(get_vhost_net(peer), dev, true);
+        } else {
+            vhost_net_stop_one(get_vhost_net(peer), dev, false);
+        }
      }
      e = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
      if (e < 0) {
@@ -420,7 +430,13 @@ void vhost_net_stop(VirtIODevice *dev, 
NetClientState *ncs,
          } else {
              peer = qemu_get_peer(ncs, n->max_queue_pairs);
          }
-        vhost_net_stop_one(get_vhost_net(peer), dev);
+
+        /* We only reset backend device during the last vhost */
+        if (i == nvhosts - 1) {
+            vhost_net_stop_one(get_vhost_net(peer), dev, true);
+        } else {
+            vhost_net_stop_one(get_vhost_net(peer), dev, false);
+        }
      }

      r = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index c5ed7a3..d858b4f 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -719,14 +719,14 @@ static int vhost_vdpa_get_vq_index(struct 
vhost_dev *dev, int idx)
      return idx;
  }

-static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev)
+static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev, unsigned 
int ready)
  {
      int i;
      trace_vhost_vdpa_set_vring_ready(dev);
      for (i = 0; i < dev->nvqs; ++i) {
          struct vhost_vring_state state = {
              .index = dev->vq_index + i,
-            .num = 1,
+            .num = ready,
          };
          vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state);
      }
@@ -1088,8 +1088,9 @@ static int vhost_vdpa_dev_start(struct vhost_dev 
*dev, bool started)
          if (unlikely(!ok)) {
              return -1;
          }
-        vhost_vdpa_set_vring_ready(dev);
+        vhost_vdpa_set_vring_ready(dev, 1);
      } else {
+        vhost_vdpa_set_vring_ready(dev, 0);
          ok = vhost_vdpa_svqs_stop(dev);
          if (unlikely(!ok)) {
              return -1;
@@ -1105,7 +1106,6 @@ static int vhost_vdpa_dev_start(struct vhost_dev 
*dev, bool started)
          memory_listener_register(&v->listener, &address_space_memory);
          return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
      } else {
-        vhost_vdpa_reset_device(dev);
          vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
                                     VIRTIO_CONFIG_S_DRIVER);
          memory_listener_unregister(&v->listener);
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index b643f42..6d9b4a3 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -1820,7 +1820,7 @@ fail_features:
  void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
  {
      int i;
-
+    printf("vhost_dev_stop test\n");
      /* should only be called after backend is connected */
      assert(hdev->vhost_ops);

@@ -1854,3 +1854,17 @@ int vhost_net_set_backend(struct vhost_dev *hdev,

      return -ENOSYS;
  }
+
+int vhost_dev_reset(struct vhost_dev *hdev)
+{
+    int ret = 0;
+
+    /* should only be called after backend is connected */
+    assert(hdev->vhost_ops);
+
+    if (hdev->vhost_ops->vhost_reset_device) {
+        ret = hdev->vhost_ops->vhost_reset_device(hdev);
+    }
+
+    return ret;
+}
diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index 58a73e7..b8b7c20 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -114,6 +114,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
  void vhost_dev_cleanup(struct vhost_dev *hdev);
  int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev);
  void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev);
+int vhost_dev_reset(struct vhost_dev *hdev);
  int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice 
*vdev);
  void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice 
*vdev);
Michael Qiu March 31, 2022, 4:01 a.m. UTC | #4
On 2022/3/31 8:15, Si-Wei Liu wrote:
> 
> 
> On 3/30/2022 3:02 AM, 08005325@163.com wrote:
>> From: Michael Qiu <qiudayu@archeros.com>
>>
>> Currently, when VM poweroff, it will trigger vdpa
>> device(such as mlx bluefield2 VF) reset many times(with 1 datapath
>> queue pair and one control queue, triggered 3 times), this
>> leads to below issue:
>>
>> vhost VQ 2 ring restore failed: -22: Invalid argument (22)
>>
>> This because in vhost_net_stop(), it will stop all vhost device bind to
>> this virtio device, and in vhost_dev_stop(), qemu tries to stop the 
>> device
>> , then stop the queue: vhost_virtqueue_stop().
>>
>> In vhost_dev_stop(), it resets the device, which clear some flags
>> in low level driver, and in next loop(stop other vhost backends),
>> qemu try to stop the queue corresponding to the vhost backend,
>>   the driver finds that the VQ is invalied, this is the root cause.
>>
>> To solve the issue, vdpa should set vring unready, and
>> remove reset ops in device stop: vhost_dev_start(hdev, false).
>>
>> and implement a new function vhost_dev_reset, only reset backend
>> device when the last vhost stop triggerd.
>>
>> Signed-off-by: Michael Qiu<qiudayu@archeros.com>
>> Acked-by: Jason Wang <jasowang@redhat.com>
>> ---
>> v2 --> v1:
>>     implement a new function vhost_dev_reset,
>>     reset the backend kernel device at last.
>>
>> ---
>>   hw/net/vhost_net.c        | 22 +++++++++++++++++++---
>>   hw/virtio/vhost-vdpa.c    |  8 ++++----
>>   hw/virtio/vhost.c         | 16 +++++++++++++++-
>>   include/hw/virtio/vhost.h |  1 +
>>   4 files changed, 39 insertions(+), 8 deletions(-)
>>
>> diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
>> index 30379d2..3cdf6a4 100644
>> --- a/hw/net/vhost_net.c
>> +++ b/hw/net/vhost_net.c
>> @@ -299,7 +299,7 @@ fail_notifiers:
>>   }
>>   static void vhost_net_stop_one(struct vhost_net *net,
>> -                               VirtIODevice *dev)
>> +                               VirtIODevice *dev, bool reset)
>>   {
>>       struct vhost_vring_file file = { .fd = -1 };
>> @@ -313,6 +313,11 @@ static void vhost_net_stop_one(struct vhost_net 
>> *net,
>>           net->nc->info->poll(net->nc, true);
>>       }
>>       vhost_dev_stop(&net->dev, dev);
>> +
>> +    if (reset) {
>> +        vhost_dev_reset(&net->dev);
>> +    }
>> +
>>       vhost_dev_disable_notifiers(&net->dev, dev);
>>   }
>> @@ -391,7 +396,12 @@ int vhost_net_start(VirtIODevice *dev, 
>> NetClientState *ncs,
>>   err_start:
>>       while (--i >= 0) {
>>           peer = qemu_get_peer(ncs , i);
>> -        vhost_net_stop_one(get_vhost_net(peer), dev);
>> +
>> +        if (i == 0) {
>> +            vhost_net_stop_one(get_vhost_net(peer), dev, true);
>> +        } else {
>> +            vhost_net_stop_one(get_vhost_net(peer), dev, false);
>> +        }
>>       }
>>       e = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
>>       if (e < 0) {
>> @@ -420,7 +430,13 @@ void vhost_net_stop(VirtIODevice *dev, 
>> NetClientState *ncs,
>>           } else {
>>               peer = qemu_get_peer(ncs, n->max_queue_pairs);
>>           }
>> -        vhost_net_stop_one(get_vhost_net(peer), dev);
>> +
>> +        /* We only reset backend device during the last vhost */
>> +        if (i == nvhosts - 1) {
> I wonder if there's any specific reason to position device reset in the 
> for loop given that there's no virtqueue level reset? Wouldn't it be 
> cleaner to reset the device at the end of vhost_net_stop() before 
> return? you may use qemu_get_peer(ncs, 0) without hassle? Noted the 
> vhost_ops->vhost_reset_device op is per device rather per vq.

OK, it's a good way to do reset at the end of vhost_net_stop(), I will 
change it in next version.

> 
>> +            vhost_net_stop_one(get_vhost_net(peer), dev, true);
>> +        } else {
>> +            vhost_net_stop_one(get_vhost_net(peer), dev, false);
>> +        }
>>       }
>>       r = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
>> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
>> index c5ed7a3..d858b4f 100644
>> --- a/hw/virtio/vhost-vdpa.c
>> +++ b/hw/virtio/vhost-vdpa.c
>> @@ -719,14 +719,14 @@ static int vhost_vdpa_get_vq_index(struct 
>> vhost_dev *dev, int idx)
>>       return idx;
>>   }
>> -static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev)
>> +static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev, unsigned 
>> int ready)
>>   {
>>       int i;
>>       trace_vhost_vdpa_set_vring_ready(dev);
>>       for (i = 0; i < dev->nvqs; ++i) {
>>           struct vhost_vring_state state = {
>>               .index = dev->vq_index + i,
>> -            .num = 1,
>> +            .num = ready,
>>           };
>>           vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state);
>>       }
>> @@ -1088,8 +1088,9 @@ static int vhost_vdpa_dev_start(struct vhost_dev 
>> *dev, bool started)
>>           if (unlikely(!ok)) {
>>               return -1;
>>           }
>> -        vhost_vdpa_set_vring_ready(dev);
>> +        vhost_vdpa_set_vring_ready(dev, 1);
>>       } else {
>> +        vhost_vdpa_set_vring_ready(dev, 0);
>>           ok = vhost_vdpa_svqs_stop(dev);
>>           if (unlikely(!ok)) {
>>               return -1;
>> @@ -1105,7 +1106,6 @@ static int vhost_vdpa_dev_start(struct vhost_dev 
>> *dev, bool started)
>>           memory_listener_register(&v->listener, &address_space_memory);
>>           return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
>>       } else {
>> -        vhost_vdpa_reset_device(dev);
>>           vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
>>                                      VIRTIO_CONFIG_S_DRIVER);
> Here's another issue (regression) got to address - the added 
> S_ACKNOWLEDGE | S_DRIVER bits will be cleared right away by the 
> follow-up reset in vhost_net_stop_one(... , true), which in turn will 
> cause virtio fail to initialize e.g. vhost_vdpa_set_features() will fail 
> to set VIRTIO_CONFIG_S_FEATURES_OK
> 
> Ideally the status bit should be set whenever the corresponding status 
> bit is set by virtio_net from virtio_net_vhost_status(), or practically 
> it can be done at the very beginning of vhost_dev_start(), for e.g. the 
> first call before vhost_dev_set_features(). For this purpose, you may 
> consider adding another vhost_init_device op, which is symmetric to 
> vhost_ops->vhost_reset_device in the vhost_net_stop() path.
> 

Seems only vdpa device need reset after stop, although virtio spec said 
need reset, but kernel doesn't reset, and if reset it has issue to 
reprobe virtio-net in guest, So we probely only add it after reset if it 
is VDPA device, for kernel and other datapath we just keep the same as 
before.

Thanks,
Michael

> Thanks,
> -Siwei
> 
>>           memory_listener_unregister(&v->listener);
>> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
>> index b643f42..6d9b4a3 100644
>> --- a/hw/virtio/vhost.c
>> +++ b/hw/virtio/vhost.c
>> @@ -1820,7 +1820,7 @@ fail_features:
>>   void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
>>   {
>>       int i;
>> -
>> +    printf("vhost_dev_stop test\n");
>>       /* should only be called after backend is connected */
>>       assert(hdev->vhost_ops);
>> @@ -1854,3 +1854,17 @@ int vhost_net_set_backend(struct vhost_dev *hdev,
>>       return -ENOSYS;
>>   }
>> +
>> +int vhost_dev_reset(struct vhost_dev *hdev)
>> +{
>> +    int ret = 0;
>> +
>> +    /* should only be called after backend is connected */
>> +    assert(hdev->vhost_ops);
>> +
>> +    if (hdev->vhost_ops->vhost_reset_device) {
>> +        ret = hdev->vhost_ops->vhost_reset_device(hdev);
>> +    }
>> +
>> +    return ret;
>> +}
>> diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
>> index 58a73e7..b8b7c20 100644
>> --- a/include/hw/virtio/vhost.h
>> +++ b/include/hw/virtio/vhost.h
>> @@ -114,6 +114,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void 
>> *opaque,
>>   void vhost_dev_cleanup(struct vhost_dev *hdev);
>>   int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev);
>>   void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev);
>> +int vhost_dev_reset(struct vhost_dev *hdev);
>>   int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice 
>> *vdev);
>>   void vhost_dev_disable_notifiers(struct vhost_dev *hdev, 
>> VirtIODevice *vdev);
> 
>
Michael Qiu March 31, 2022, 4:02 a.m. UTC | #5
On 2022/3/31 8:15, Si-Wei Liu wrote:
> 
> 
> On 3/30/2022 3:02 AM, 08005325@163.com wrote:
>> From: Michael Qiu <qiudayu@archeros.com>
>>
>> Currently, when VM poweroff, it will trigger vdpa
>> device(such as mlx bluefield2 VF) reset many times(with 1 datapath
>> queue pair and one control queue, triggered 3 times), this
>> leads to below issue:
>>
>> vhost VQ 2 ring restore failed: -22: Invalid argument (22)
>>
>> This because in vhost_net_stop(), it will stop all vhost device bind to
>> this virtio device, and in vhost_dev_stop(), qemu tries to stop the 
>> device
>> , then stop the queue: vhost_virtqueue_stop().
>>
>> In vhost_dev_stop(), it resets the device, which clear some flags
>> in low level driver, and in next loop(stop other vhost backends),
>> qemu try to stop the queue corresponding to the vhost backend,
>>   the driver finds that the VQ is invalied, this is the root cause.
>>
>> To solve the issue, vdpa should set vring unready, and
>> remove reset ops in device stop: vhost_dev_start(hdev, false).
>>
>> and implement a new function vhost_dev_reset, only reset backend
>> device when the last vhost stop triggerd.
>>
>> Signed-off-by: Michael Qiu<qiudayu@archeros.com>
>> Acked-by: Jason Wang <jasowang@redhat.com>
>> ---
>> v2 --> v1:
>>     implement a new function vhost_dev_reset,
>>     reset the backend kernel device at last.
>>
>> ---
>>   hw/net/vhost_net.c        | 22 +++++++++++++++++++---
>>   hw/virtio/vhost-vdpa.c    |  8 ++++----
>>   hw/virtio/vhost.c         | 16 +++++++++++++++-
>>   include/hw/virtio/vhost.h |  1 +
>>   4 files changed, 39 insertions(+), 8 deletions(-)
>>
>> diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
>> index 30379d2..3cdf6a4 100644
>> --- a/hw/net/vhost_net.c
>> +++ b/hw/net/vhost_net.c
>> @@ -299,7 +299,7 @@ fail_notifiers:
>>   }
>>   static void vhost_net_stop_one(struct vhost_net *net,
>> -                               VirtIODevice *dev)
>> +                               VirtIODevice *dev, bool reset)
>>   {
>>       struct vhost_vring_file file = { .fd = -1 };
>> @@ -313,6 +313,11 @@ static void vhost_net_stop_one(struct vhost_net 
>> *net,
>>           net->nc->info->poll(net->nc, true);
>>       }
>>       vhost_dev_stop(&net->dev, dev);
>> +
>> +    if (reset) {
>> +        vhost_dev_reset(&net->dev);
>> +    }
>> +
>>       vhost_dev_disable_notifiers(&net->dev, dev);
>>   }
>> @@ -391,7 +396,12 @@ int vhost_net_start(VirtIODevice *dev, 
>> NetClientState *ncs,
>>   err_start:
>>       while (--i >= 0) {
>>           peer = qemu_get_peer(ncs , i);
>> -        vhost_net_stop_one(get_vhost_net(peer), dev);
>> +
>> +        if (i == 0) {
>> +            vhost_net_stop_one(get_vhost_net(peer), dev, true);
>> +        } else {
>> +            vhost_net_stop_one(get_vhost_net(peer), dev, false);
>> +        }
>>       }
>>       e = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
>>       if (e < 0) {
>> @@ -420,7 +430,13 @@ void vhost_net_stop(VirtIODevice *dev, 
>> NetClientState *ncs,
>>           } else {
>>               peer = qemu_get_peer(ncs, n->max_queue_pairs);
>>           }
>> -        vhost_net_stop_one(get_vhost_net(peer), dev);
>> +
>> +        /* We only reset backend device during the last vhost */
>> +        if (i == nvhosts - 1) {
> I wonder if there's any specific reason to position device reset in the 
> for loop given that there's no virtqueue level reset? Wouldn't it be 
> cleaner to reset the device at the end of vhost_net_stop() before 
> return? you may use qemu_get_peer(ncs, 0) without hassle? Noted the 
> vhost_ops->vhost_reset_device op is per device rather per vq.

OK, it's a good way to do reset at the end of vhost_net_stop(), I will 
change it in next version.

> 
>> +            vhost_net_stop_one(get_vhost_net(peer), dev, true);
>> +        } else {
>> +            vhost_net_stop_one(get_vhost_net(peer), dev, false);
>> +        }
>>       }
>>       r = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
>> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
>> index c5ed7a3..d858b4f 100644
>> --- a/hw/virtio/vhost-vdpa.c
>> +++ b/hw/virtio/vhost-vdpa.c
>> @@ -719,14 +719,14 @@ static int vhost_vdpa_get_vq_index(struct 
>> vhost_dev *dev, int idx)
>>       return idx;
>>   }
>> -static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev)
>> +static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev, unsigned 
>> int ready)
>>   {
>>       int i;
>>       trace_vhost_vdpa_set_vring_ready(dev);
>>       for (i = 0; i < dev->nvqs; ++i) {
>>           struct vhost_vring_state state = {
>>               .index = dev->vq_index + i,
>> -            .num = 1,
>> +            .num = ready,
>>           };
>>           vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state);
>>       }
>> @@ -1088,8 +1088,9 @@ static int vhost_vdpa_dev_start(struct vhost_dev 
>> *dev, bool started)
>>           if (unlikely(!ok)) {
>>               return -1;
>>           }
>> -        vhost_vdpa_set_vring_ready(dev);
>> +        vhost_vdpa_set_vring_ready(dev, 1);
>>       } else {
>> +        vhost_vdpa_set_vring_ready(dev, 0);
>>           ok = vhost_vdpa_svqs_stop(dev);
>>           if (unlikely(!ok)) {
>>               return -1;
>> @@ -1105,7 +1106,6 @@ static int vhost_vdpa_dev_start(struct vhost_dev 
>> *dev, bool started)
>>           memory_listener_register(&v->listener, &address_space_memory);
>>           return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
>>       } else {
>> -        vhost_vdpa_reset_device(dev);
>>           vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
>>                                      VIRTIO_CONFIG_S_DRIVER);
> Here's another issue (regression) got to address - the added 
> S_ACKNOWLEDGE | S_DRIVER bits will be cleared right away by the 
> follow-up reset in vhost_net_stop_one(... , true), which in turn will 
> cause virtio fail to initialize e.g. vhost_vdpa_set_features() will fail 
> to set VIRTIO_CONFIG_S_FEATURES_OK
> 
> Ideally the status bit should be set whenever the corresponding status 
> bit is set by virtio_net from virtio_net_vhost_status(), or practically 
> it can be done at the very beginning of vhost_dev_start(), for e.g. the 
> first call before vhost_dev_set_features(). For this purpose, you may 
> consider adding another vhost_init_device op, which is symmetric to 
> vhost_ops->vhost_reset_device in the vhost_net_stop() path.
> 

Seems only vdpa device need reset after stop, although virtio spec said 
need reset, but kernel doesn't reset, and if reset it has issue to 
reprobe virtio-net in guest, So we probely only add it after reset if it 
is VDPA device, for kernel and other datapath we just keep the same as 
before.

Thanks,
Michael

> Thanks,
> -Siwei
> 
>>           memory_listener_unregister(&v->listener);
>> diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
>> index b643f42..6d9b4a3 100644
>> --- a/hw/virtio/vhost.c
>> +++ b/hw/virtio/vhost.c
>> @@ -1820,7 +1820,7 @@ fail_features:
>>   void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
>>   {
>>       int i;
>> -
>> +    printf("vhost_dev_stop test\n");
>>       /* should only be called after backend is connected */
>>       assert(hdev->vhost_ops);
>> @@ -1854,3 +1854,17 @@ int vhost_net_set_backend(struct vhost_dev *hdev,
>>       return -ENOSYS;
>>   }
>> +
>> +int vhost_dev_reset(struct vhost_dev *hdev)
>> +{
>> +    int ret = 0;
>> +
>> +    /* should only be called after backend is connected */
>> +    assert(hdev->vhost_ops);
>> +
>> +    if (hdev->vhost_ops->vhost_reset_device) {
>> +        ret = hdev->vhost_ops->vhost_reset_device(hdev);
>> +    }
>> +
>> +    return ret;
>> +}
>> diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
>> index 58a73e7..b8b7c20 100644
>> --- a/include/hw/virtio/vhost.h
>> +++ b/include/hw/virtio/vhost.h
>> @@ -114,6 +114,7 @@ int vhost_dev_init(struct vhost_dev *hdev, void 
>> *opaque,
>>   void vhost_dev_cleanup(struct vhost_dev *hdev);
>>   int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev);
>>   void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev);
>> +int vhost_dev_reset(struct vhost_dev *hdev);
>>   int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice 
>> *vdev);
>>   void vhost_dev_disable_notifiers(struct vhost_dev *hdev, 
>> VirtIODevice *vdev);
> 
>
diff mbox series

Patch

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index 30379d2..3cdf6a4 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -299,7 +299,7 @@  fail_notifiers:
 }
 
 static void vhost_net_stop_one(struct vhost_net *net,
-                               VirtIODevice *dev)
+                               VirtIODevice *dev, bool reset)
 {
     struct vhost_vring_file file = { .fd = -1 };
 
@@ -313,6 +313,11 @@  static void vhost_net_stop_one(struct vhost_net *net,
         net->nc->info->poll(net->nc, true);
     }
     vhost_dev_stop(&net->dev, dev);
+
+    if (reset) {
+        vhost_dev_reset(&net->dev);
+    }
+
     vhost_dev_disable_notifiers(&net->dev, dev);
 }
 
@@ -391,7 +396,12 @@  int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
 err_start:
     while (--i >= 0) {
         peer = qemu_get_peer(ncs , i);
-        vhost_net_stop_one(get_vhost_net(peer), dev);
+
+        if (i == 0) {
+            vhost_net_stop_one(get_vhost_net(peer), dev, true);
+        } else {
+            vhost_net_stop_one(get_vhost_net(peer), dev, false);
+        }
     }
     e = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
     if (e < 0) {
@@ -420,7 +430,13 @@  void vhost_net_stop(VirtIODevice *dev, NetClientState *ncs,
         } else {
             peer = qemu_get_peer(ncs, n->max_queue_pairs);
         }
-        vhost_net_stop_one(get_vhost_net(peer), dev);
+
+        /* We only reset backend device during the last vhost */
+        if (i == nvhosts - 1) {
+            vhost_net_stop_one(get_vhost_net(peer), dev, true);
+        } else {
+            vhost_net_stop_one(get_vhost_net(peer), dev, false);
+        }
     }
 
     r = k->set_guest_notifiers(qbus->parent, total_notifiers, false);
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index c5ed7a3..d858b4f 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -719,14 +719,14 @@  static int vhost_vdpa_get_vq_index(struct vhost_dev *dev, int idx)
     return idx;
 }
 
-static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev)
+static int vhost_vdpa_set_vring_ready(struct vhost_dev *dev, unsigned int ready)
 {
     int i;
     trace_vhost_vdpa_set_vring_ready(dev);
     for (i = 0; i < dev->nvqs; ++i) {
         struct vhost_vring_state state = {
             .index = dev->vq_index + i,
-            .num = 1,
+            .num = ready,
         };
         vhost_vdpa_call(dev, VHOST_VDPA_SET_VRING_ENABLE, &state);
     }
@@ -1088,8 +1088,9 @@  static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
         if (unlikely(!ok)) {
             return -1;
         }
-        vhost_vdpa_set_vring_ready(dev);
+        vhost_vdpa_set_vring_ready(dev, 1);
     } else {
+        vhost_vdpa_set_vring_ready(dev, 0);
         ok = vhost_vdpa_svqs_stop(dev);
         if (unlikely(!ok)) {
             return -1;
@@ -1105,7 +1106,6 @@  static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
         memory_listener_register(&v->listener, &address_space_memory);
         return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
     } else {
-        vhost_vdpa_reset_device(dev);
         vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE |
                                    VIRTIO_CONFIG_S_DRIVER);
         memory_listener_unregister(&v->listener);
diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c
index b643f42..6d9b4a3 100644
--- a/hw/virtio/vhost.c
+++ b/hw/virtio/vhost.c
@@ -1820,7 +1820,7 @@  fail_features:
 void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
 {
     int i;
-
+    printf("vhost_dev_stop test\n");
     /* should only be called after backend is connected */
     assert(hdev->vhost_ops);
 
@@ -1854,3 +1854,17 @@  int vhost_net_set_backend(struct vhost_dev *hdev,
 
     return -ENOSYS;
 }
+
+int vhost_dev_reset(struct vhost_dev *hdev)
+{
+    int ret = 0;
+
+    /* should only be called after backend is connected */
+    assert(hdev->vhost_ops);
+
+    if (hdev->vhost_ops->vhost_reset_device) {
+        ret = hdev->vhost_ops->vhost_reset_device(hdev);
+    }
+
+    return ret;
+}
diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index 58a73e7..b8b7c20 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -114,6 +114,7 @@  int vhost_dev_init(struct vhost_dev *hdev, void *opaque,
 void vhost_dev_cleanup(struct vhost_dev *hdev);
 int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev);
 void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev);
+int vhost_dev_reset(struct vhost_dev *hdev);
 int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
 void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);