diff mbox series

vdpa-dev: Fix initialisation order to restore VDUSE compatibility

Message ID 20240202132521.32714-1-kwolf@redhat.com (mailing list archive)
State New, archived
Headers show
Series vdpa-dev: Fix initialisation order to restore VDUSE compatibility | expand

Commit Message

Kevin Wolf Feb. 2, 2024, 1:25 p.m. UTC
VDUSE requires that virtqueues are first enabled before the DRIVER_OK
status flag is set; with the current API of the kernel module, it is
impossible to enable the opposite order in our block export code because
userspace is not notified when a virtqueue is enabled.

This requirement also mathces the normal initialisation order as done by
the generic vhost code in QEMU. However, commit 6c482547 accidentally
changed the order for vdpa-dev and broke access to VDUSE devices with
this.

This changes vdpa-dev to use the normal order again and use the standard
vhost callback .vhost_set_vring_enable for this. VDUSE devices can be
used with vdpa-dev again after this fix.

Cc: qemu-stable@nongnu.org
Fixes: 6c4825476a4351530bcac17abab72295b75ffe98
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/virtio/vdpa-dev.c   |  5 +----
 hw/virtio/vhost-vdpa.c | 17 +++++++++++++++++
 2 files changed, 18 insertions(+), 4 deletions(-)

Comments

Stefano Garzarella Feb. 5, 2024, 10:51 a.m. UTC | #1
On Fri, Feb 02, 2024 at 02:25:21PM +0100, Kevin Wolf wrote:
>VDUSE requires that virtqueues are first enabled before the DRIVER_OK
>status flag is set; with the current API of the kernel module, it is
>impossible to enable the opposite order in our block export code because
>userspace is not notified when a virtqueue is enabled.

Yeah, IMHO the VDUSE protocol is missing a VDUSE_SET_VQ_READY message,
I'll start another thread about that, but in the meantime I agree that
we should fix QEMU since we need to work properly with old kernels as
well.

>
>This requirement also mathces the normal initialisation order as done by
>the generic vhost code in QEMU. However, commit 6c482547 accidentally
>changed the order for vdpa-dev and broke access to VDUSE devices with
>this.
>
>This changes vdpa-dev to use the normal order again and use the standard
>vhost callback .vhost_set_vring_enable for this. VDUSE devices can be
>used with vdpa-dev again after this fix.

I like this approach and the patch LGTM, but I'm a bit worried about
this function in hw/net/vhost_net.c:

     int vhost_set_vring_enable(NetClientState *nc, int enable)
     {
         VHostNetState *net = get_vhost_net(nc);
         const VhostOps *vhost_ops = net->dev.vhost_ops;

         nc->vring_enable = enable;

         if (vhost_ops && vhost_ops->vhost_set_vring_enable) {
             return vhost_ops->vhost_set_vring_enable(&net->dev, enable);
         }

         return 0;
     }

@Eugenio, @Jason, should we change some things there if vhost-vdpa
implements the vhost_set_vring_enable callback?

Do you remember why we didn't implement it from the beginning?

Thanks,
Stefano

>
>Cc: qemu-stable@nongnu.org
>Fixes: 6c4825476a4351530bcac17abab72295b75ffe98
>Signed-off-by: Kevin Wolf <kwolf@redhat.com>
>---
> hw/virtio/vdpa-dev.c   |  5 +----
> hw/virtio/vhost-vdpa.c | 17 +++++++++++++++++
> 2 files changed, 18 insertions(+), 4 deletions(-)
>
>diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c
>index eb9ecea83b..13e87f06f6 100644
>--- a/hw/virtio/vdpa-dev.c
>+++ b/hw/virtio/vdpa-dev.c
>@@ -253,14 +253,11 @@ static int vhost_vdpa_device_start(VirtIODevice *vdev, Error **errp)
>
>     s->dev.acked_features = vdev->guest_features;
>
>-    ret = vhost_dev_start(&s->dev, vdev, false);
>+    ret = vhost_dev_start(&s->dev, vdev, true);
>     if (ret < 0) {
>         error_setg_errno(errp, -ret, "Error starting vhost");
>         goto err_guest_notifiers;
>     }
>-    for (i = 0; i < s->dev.nvqs; ++i) {
>-        vhost_vdpa_set_vring_ready(&s->vdpa, i);
>-    }
>     s->started = true;
>
>     /*
>diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
>index 3a43beb312..c4574d56c5 100644
>--- a/hw/virtio/vhost-vdpa.c
>+++ b/hw/virtio/vhost-vdpa.c
>@@ -904,6 +904,22 @@ int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx)
>     return r;
> }
>
>+static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int enable)
>+{
>+    struct vhost_vdpa *v = dev->opaque;
>+    unsigned int i;
>+    int ret;
>+
>+    for (i = 0; i < dev->nvqs; ++i) {
>+        ret = vhost_vdpa_set_vring_ready(v, i);
>+        if (ret < 0) {
>+            return ret;
>+        }
>+    }
>+
>+    return 0;
>+}
>+
> static int vhost_vdpa_set_config_call(struct vhost_dev *dev,
>                                        int fd)
> {
>@@ -1524,6 +1540,7 @@ const VhostOps vdpa_ops = {
>         .vhost_set_features = vhost_vdpa_set_features,
>         .vhost_reset_device = vhost_vdpa_reset_device,
>         .vhost_get_vq_index = vhost_vdpa_get_vq_index,
>+        .vhost_set_vring_enable = vhost_vdpa_set_vring_enable,
>         .vhost_get_config  = vhost_vdpa_get_config,
>         .vhost_set_config = vhost_vdpa_set_config,
>         .vhost_requires_shm_log = NULL,
>-- 
>2.43.0
>
Eugenio Perez Martin Feb. 5, 2024, 12:22 p.m. UTC | #2
On Fri, Feb 2, 2024 at 2:25 PM Kevin Wolf <kwolf@redhat.com> wrote:
>
> VDUSE requires that virtqueues are first enabled before the DRIVER_OK
> status flag is set; with the current API of the kernel module, it is
> impossible to enable the opposite order in our block export code because
> userspace is not notified when a virtqueue is enabled.
>
> This requirement also mathces the normal initialisation order as done by
> the generic vhost code in QEMU. However, commit 6c482547 accidentally
> changed the order for vdpa-dev and broke access to VDUSE devices with
> this.
>
> This changes vdpa-dev to use the normal order again and use the standard
> vhost callback .vhost_set_vring_enable for this. VDUSE devices can be
> used with vdpa-dev again after this fix.
>
> Cc: qemu-stable@nongnu.org
> Fixes: 6c4825476a4351530bcac17abab72295b75ffe98
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
>  hw/virtio/vdpa-dev.c   |  5 +----
>  hw/virtio/vhost-vdpa.c | 17 +++++++++++++++++
>  2 files changed, 18 insertions(+), 4 deletions(-)
>
> diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c
> index eb9ecea83b..13e87f06f6 100644
> --- a/hw/virtio/vdpa-dev.c
> +++ b/hw/virtio/vdpa-dev.c
> @@ -253,14 +253,11 @@ static int vhost_vdpa_device_start(VirtIODevice *vdev, Error **errp)
>
>      s->dev.acked_features = vdev->guest_features;
>
> -    ret = vhost_dev_start(&s->dev, vdev, false);
> +    ret = vhost_dev_start(&s->dev, vdev, true);
>      if (ret < 0) {
>          error_setg_errno(errp, -ret, "Error starting vhost");
>          goto err_guest_notifiers;
>      }
> -    for (i = 0; i < s->dev.nvqs; ++i) {
> -        vhost_vdpa_set_vring_ready(&s->vdpa, i);
> -    }
>      s->started = true;
>
>      /*
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 3a43beb312..c4574d56c5 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -904,6 +904,22 @@ int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx)
>      return r;
>  }
>
> +static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int enable)
> +{
> +    struct vhost_vdpa *v = dev->opaque;
> +    unsigned int i;
> +    int ret;
> +
> +    for (i = 0; i < dev->nvqs; ++i) {
> +        ret = vhost_vdpa_set_vring_ready(v, i);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +    }
> +
> +    return 0;
> +}
> +
>  static int vhost_vdpa_set_config_call(struct vhost_dev *dev,
>                                         int fd)
>  {
> @@ -1524,6 +1540,7 @@ const VhostOps vdpa_ops = {
>          .vhost_set_features = vhost_vdpa_set_features,
>          .vhost_reset_device = vhost_vdpa_reset_device,
>          .vhost_get_vq_index = vhost_vdpa_get_vq_index,
> +        .vhost_set_vring_enable = vhost_vdpa_set_vring_enable,
>          .vhost_get_config  = vhost_vdpa_get_config,
>          .vhost_set_config = vhost_vdpa_set_config,
>          .vhost_requires_shm_log = NULL,
> --
> 2.43.0
>

vhost-vdpa net enables CVQ before dataplane ones to configure all the
device in the destination of a live migration. To go back again to
this callback would cause the device to enable the dataplane before
virtqueues are configured again.

How does VDUSE userspace knows how many queues should enable? Can't
the kernel perform the necessary actions after DRIVER_OK, like
configuring the kick etc?

Thanks!
Kevin Wolf Feb. 5, 2024, 1:13 p.m. UTC | #3
Am 02.02.2024 um 14:25 hat Kevin Wolf geschrieben:
> VDUSE requires that virtqueues are first enabled before the DRIVER_OK
> status flag is set; with the current API of the kernel module, it is
> impossible to enable the opposite order in our block export code because
> userspace is not notified when a virtqueue is enabled.
> 
> This requirement also mathces the normal initialisation order as done by
> the generic vhost code in QEMU. However, commit 6c482547 accidentally
> changed the order for vdpa-dev and broke access to VDUSE devices with
> this.
> 
> This changes vdpa-dev to use the normal order again and use the standard
> vhost callback .vhost_set_vring_enable for this. VDUSE devices can be
> used with vdpa-dev again after this fix.
> 
> Cc: qemu-stable@nongnu.org
> Fixes: 6c4825476a4351530bcac17abab72295b75ffe98
> Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> ---
>  hw/virtio/vdpa-dev.c   |  5 +----
>  hw/virtio/vhost-vdpa.c | 17 +++++++++++++++++
>  2 files changed, 18 insertions(+), 4 deletions(-)
> 
> diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c
> index eb9ecea83b..13e87f06f6 100644
> --- a/hw/virtio/vdpa-dev.c
> +++ b/hw/virtio/vdpa-dev.c
> @@ -253,14 +253,11 @@ static int vhost_vdpa_device_start(VirtIODevice *vdev, Error **errp)
>  
>      s->dev.acked_features = vdev->guest_features;
>  
> -    ret = vhost_dev_start(&s->dev, vdev, false);
> +    ret = vhost_dev_start(&s->dev, vdev, true);
>      if (ret < 0) {
>          error_setg_errno(errp, -ret, "Error starting vhost");
>          goto err_guest_notifiers;
>      }
> -    for (i = 0; i < s->dev.nvqs; ++i) {
> -        vhost_vdpa_set_vring_ready(&s->vdpa, i);
> -    }
>      s->started = true;
>  
>      /*
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 3a43beb312..c4574d56c5 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -904,6 +904,22 @@ int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx)
>      return r;
>  }
>  
> +static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int enable)
> +{
> +    struct vhost_vdpa *v = dev->opaque;
> +    unsigned int i;
> +    int ret;
> +
> +    for (i = 0; i < dev->nvqs; ++i) {
> +        ret = vhost_vdpa_set_vring_ready(v, i);
> +        if (ret < 0) {
> +            return ret;
> +        }
> +    }
> +
> +    return 0;
> +}

Oops, this forgets to actually use the @enable parameter, and always
enables the queue even if the caller wanted to disable it.

I'll fix this in a v2, but I'd first like to see if something has to
change to address Stefano's concern, too.

Kevin
Kevin Wolf Feb. 5, 2024, 1:49 p.m. UTC | #4
Am 05.02.2024 um 13:22 hat Eugenio Perez Martin geschrieben:
> On Fri, Feb 2, 2024 at 2:25 PM Kevin Wolf <kwolf@redhat.com> wrote:
> >
> > VDUSE requires that virtqueues are first enabled before the DRIVER_OK
> > status flag is set; with the current API of the kernel module, it is
> > impossible to enable the opposite order in our block export code because
> > userspace is not notified when a virtqueue is enabled.
> >
> > This requirement also mathces the normal initialisation order as done by
> > the generic vhost code in QEMU. However, commit 6c482547 accidentally
> > changed the order for vdpa-dev and broke access to VDUSE devices with
> > this.
> >
> > This changes vdpa-dev to use the normal order again and use the standard
> > vhost callback .vhost_set_vring_enable for this. VDUSE devices can be
> > used with vdpa-dev again after this fix.
> >
> > Cc: qemu-stable@nongnu.org
> > Fixes: 6c4825476a4351530bcac17abab72295b75ffe98
> > Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> > ---
> >  hw/virtio/vdpa-dev.c   |  5 +----
> >  hw/virtio/vhost-vdpa.c | 17 +++++++++++++++++
> >  2 files changed, 18 insertions(+), 4 deletions(-)
> >
> > diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c
> > index eb9ecea83b..13e87f06f6 100644
> > --- a/hw/virtio/vdpa-dev.c
> > +++ b/hw/virtio/vdpa-dev.c
> > @@ -253,14 +253,11 @@ static int vhost_vdpa_device_start(VirtIODevice *vdev, Error **errp)
> >
> >      s->dev.acked_features = vdev->guest_features;
> >
> > -    ret = vhost_dev_start(&s->dev, vdev, false);
> > +    ret = vhost_dev_start(&s->dev, vdev, true);
> >      if (ret < 0) {
> >          error_setg_errno(errp, -ret, "Error starting vhost");
> >          goto err_guest_notifiers;
> >      }
> > -    for (i = 0; i < s->dev.nvqs; ++i) {
> > -        vhost_vdpa_set_vring_ready(&s->vdpa, i);
> > -    }
> >      s->started = true;
> >
> >      /*
> > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> > index 3a43beb312..c4574d56c5 100644
> > --- a/hw/virtio/vhost-vdpa.c
> > +++ b/hw/virtio/vhost-vdpa.c
> > @@ -904,6 +904,22 @@ int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx)
> >      return r;
> >  }
> >
> > +static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int enable)
> > +{
> > +    struct vhost_vdpa *v = dev->opaque;
> > +    unsigned int i;
> > +    int ret;
> > +
> > +    for (i = 0; i < dev->nvqs; ++i) {
> > +        ret = vhost_vdpa_set_vring_ready(v, i);
> > +        if (ret < 0) {
> > +            return ret;
> > +        }
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> >  static int vhost_vdpa_set_config_call(struct vhost_dev *dev,
> >                                         int fd)
> >  {
> > @@ -1524,6 +1540,7 @@ const VhostOps vdpa_ops = {
> >          .vhost_set_features = vhost_vdpa_set_features,
> >          .vhost_reset_device = vhost_vdpa_reset_device,
> >          .vhost_get_vq_index = vhost_vdpa_get_vq_index,
> > +        .vhost_set_vring_enable = vhost_vdpa_set_vring_enable,
> >          .vhost_get_config  = vhost_vdpa_get_config,
> >          .vhost_set_config = vhost_vdpa_set_config,
> >          .vhost_requires_shm_log = NULL,
> 
> vhost-vdpa net enables CVQ before dataplane ones to configure all the
> device in the destination of a live migration. To go back again to
> this callback would cause the device to enable the dataplane before
> virtqueues are configured again.

Not that it makes a difference, but I don't think it would actually be
going back. Even before your commit 6c482547, we were not making use of
the generic callback but just called the function in a slightly
different place (but less different than after commit 6c482547).

But anyway... Why don't the other vhost backend need the same for
vhost-net then? Do they just not support live migration?

I don't know the code well enough to say where the problem is, but if
vhost-vdpa networking code relies on the usual vhost operations not
being implemented and bypasses VhostOps to replace it, that sounds like
a design problem to me. Maybe VhostOps needs a new operation to enable
just a single virtqueue that can be used by the networking code instead?

> How does VDUSE userspace knows how many queues should enable? Can't
> the kernel perform the necessary actions after DRIVER_OK, like
> configuring the kick etc?

Not sure if I understand the question. The vdpa kernel interface always
enables individual queues, so the VDUSE userspace will enable whatever
queues it was asked to enable. The only restriction is that the queues
need to be enabled before setting DRIVER_OK.

The interface that enables all virtqueues at once seems to be just
.vhost_set_vring_enable in QEMU.

Kevin
Jason Wang Feb. 6, 2024, 2:47 a.m. UTC | #5
On Mon, Feb 5, 2024 at 6:51 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
>
> On Fri, Feb 02, 2024 at 02:25:21PM +0100, Kevin Wolf wrote:
> >VDUSE requires that virtqueues are first enabled before the DRIVER_OK
> >status flag is set; with the current API of the kernel module, it is
> >impossible to enable the opposite order in our block export code because
> >userspace is not notified when a virtqueue is enabled.

Did this mean virtio-blk will enable a virtqueue after DRIVER_OK? Sepc
is not clear about this and that's why we introduce
VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK.

>
> Yeah, IMHO the VDUSE protocol is missing a VDUSE_SET_VQ_READY message,

I think you meant when VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK is
negotiated. If this is truth, it seems a little more complicated, for
example the get_backend_features needs to be forward to the userspace?
This seems suboptimal to implement this in the spec first and then we
can leverage the features. Or we can have another parameter for the
ioctl that creates the vduse device.

> I'll start another thread about that, but in the meantime I agree that
> we should fix QEMU since we need to work properly with old kernels as
> well.
>
> >
> >This requirement also mathces the normal initialisation order as done by
> >the generic vhost code in QEMU. However, commit 6c482547 accidentally
> >changed the order for vdpa-dev and broke access to VDUSE devices with
> >this.
> >
> >This changes vdpa-dev to use the normal order again and use the standard
> >vhost callback .vhost_set_vring_enable for this. VDUSE devices can be
> >used with vdpa-dev again after this fix.
>
> I like this approach and the patch LGTM, but I'm a bit worried about
> this function in hw/net/vhost_net.c:
>
>      int vhost_set_vring_enable(NetClientState *nc, int enable)
>      {
>          VHostNetState *net = get_vhost_net(nc);
>          const VhostOps *vhost_ops = net->dev.vhost_ops;
>
>          nc->vring_enable = enable;
>
>          if (vhost_ops && vhost_ops->vhost_set_vring_enable) {
>              return vhost_ops->vhost_set_vring_enable(&net->dev, enable);
>          }
>
>          return 0;
>      }
>
> @Eugenio, @Jason, should we change some things there if vhost-vdpa
> implements the vhost_set_vring_enable callback?

Eugenio may know more, I remember we need to enable cvq first for
shadow virtqueue to restore some states.

>
> Do you remember why we didn't implement it from the beginning?

It seems the vrings parameter is introduced after vhost-vdpa is implemented.

Thanks

>
> Thanks,
> Stefano
>
> >
> >Cc: qemu-stable@nongnu.org
> >Fixes: 6c4825476a4351530bcac17abab72295b75ffe98
> >Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> >---
> > hw/virtio/vdpa-dev.c   |  5 +----
> > hw/virtio/vhost-vdpa.c | 17 +++++++++++++++++
> > 2 files changed, 18 insertions(+), 4 deletions(-)
> >
> >diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c
> >index eb9ecea83b..13e87f06f6 100644
> >--- a/hw/virtio/vdpa-dev.c
> >+++ b/hw/virtio/vdpa-dev.c
> >@@ -253,14 +253,11 @@ static int vhost_vdpa_device_start(VirtIODevice *vdev, Error **errp)
> >
> >     s->dev.acked_features = vdev->guest_features;
> >
> >-    ret = vhost_dev_start(&s->dev, vdev, false);
> >+    ret = vhost_dev_start(&s->dev, vdev, true);
> >     if (ret < 0) {
> >         error_setg_errno(errp, -ret, "Error starting vhost");
> >         goto err_guest_notifiers;
> >     }
> >-    for (i = 0; i < s->dev.nvqs; ++i) {
> >-        vhost_vdpa_set_vring_ready(&s->vdpa, i);
> >-    }
> >     s->started = true;
> >
> >     /*
> >diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> >index 3a43beb312..c4574d56c5 100644
> >--- a/hw/virtio/vhost-vdpa.c
> >+++ b/hw/virtio/vhost-vdpa.c
> >@@ -904,6 +904,22 @@ int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx)
> >     return r;
> > }
> >
> >+static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int enable)
> >+{
> >+    struct vhost_vdpa *v = dev->opaque;
> >+    unsigned int i;
> >+    int ret;
> >+
> >+    for (i = 0; i < dev->nvqs; ++i) {
> >+        ret = vhost_vdpa_set_vring_ready(v, i);
> >+        if (ret < 0) {
> >+            return ret;
> >+        }
> >+    }
> >+
> >+    return 0;
> >+}
> >+
> > static int vhost_vdpa_set_config_call(struct vhost_dev *dev,
> >                                        int fd)
> > {
> >@@ -1524,6 +1540,7 @@ const VhostOps vdpa_ops = {
> >         .vhost_set_features = vhost_vdpa_set_features,
> >         .vhost_reset_device = vhost_vdpa_reset_device,
> >         .vhost_get_vq_index = vhost_vdpa_get_vq_index,
> >+        .vhost_set_vring_enable = vhost_vdpa_set_vring_enable,
> >         .vhost_get_config  = vhost_vdpa_get_config,
> >         .vhost_set_config = vhost_vdpa_set_config,
> >         .vhost_requires_shm_log = NULL,
> >--
> >2.43.0
> >
>
Stefano Garzarella Feb. 6, 2024, 8:31 a.m. UTC | #6
On Tue, Feb 06, 2024 at 10:47:40AM +0800, Jason Wang wrote:
>On Mon, Feb 5, 2024 at 6:51 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
>>
>> On Fri, Feb 02, 2024 at 02:25:21PM +0100, Kevin Wolf wrote:
>> >VDUSE requires that virtqueues are first enabled before the DRIVER_OK
>> >status flag is set; with the current API of the kernel module, it is
>> >impossible to enable the opposite order in our block export code because
>> >userspace is not notified when a virtqueue is enabled.
>
>Did this mean virtio-blk will enable a virtqueue after DRIVER_OK?

It's not specific to virtio-blk, but to the generic vdpa device we have 
in QEMU (i.e. vhost-vdpa-device). Yep, after commit 
6c4825476a4351530bcac17abab72295b75ffe98, virtqueues are enabled after 
DRIVER_OK.

>Sepc is not clear about this and that's why we introduce
>VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK.

Ah, I didn't know about this new feature. So after commit 
6c4825476a4351530bcac17abab72295b75ffe98 the vhost-vdpa-device is not 
complying with the specification, right?

>
>>
>> Yeah, IMHO the VDUSE protocol is missing a VDUSE_SET_VQ_READY message,
>
>I think you meant when VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK is
>negotiated.

At this point yes. But if VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK is not 
negotiated, should we return an error in vhost-vdpa kernel module if 
VHOST_VDPA_SET_VRING_ENABLE is called when DRIVER_OK is already set?

>If this is truth, it seems a little more complicated, for
>example the get_backend_features needs to be forward to the userspace?

I'm not understanding, don't we already have VHOST_GET_BACKEND_FEATURES 
for this? Or do you mean userspace on the VDUSE side?

>This seems suboptimal to implement this in the spec first and then we
>can leverage the features. Or we can have another parameter for the
>ioctl that creates the vduse device.

I got a little lost, though in vhost-user, the device can always expect 
a vring_enable/disable, so I thought it was not complicated in VDUSE.

>
>> I'll start another thread about that, but in the meantime I agree that
>> we should fix QEMU since we need to work properly with old kernels as
>> well.
>>
>> >
>> >This requirement also mathces the normal initialisation order as done by
>> >the generic vhost code in QEMU. However, commit 6c482547 accidentally
>> >changed the order for vdpa-dev and broke access to VDUSE devices with
>> >this.
>> >
>> >This changes vdpa-dev to use the normal order again and use the standard
>> >vhost callback .vhost_set_vring_enable for this. VDUSE devices can be
>> >used with vdpa-dev again after this fix.
>>
>> I like this approach and the patch LGTM, but I'm a bit worried about
>> this function in hw/net/vhost_net.c:
>>
>>      int vhost_set_vring_enable(NetClientState *nc, int enable)
>>      {
>>          VHostNetState *net = get_vhost_net(nc);
>>          const VhostOps *vhost_ops = net->dev.vhost_ops;
>>
>>          nc->vring_enable = enable;
>>
>>          if (vhost_ops && vhost_ops->vhost_set_vring_enable) {
>>              return vhost_ops->vhost_set_vring_enable(&net->dev, enable);
>>          }
>>
>>          return 0;
>>      }
>>
>> @Eugenio, @Jason, should we change some things there if vhost-vdpa
>> implements the vhost_set_vring_enable callback?
>
>Eugenio may know more, I remember we need to enable cvq first for
>shadow virtqueue to restore some states.
>
>>
>> Do you remember why we didn't implement it from the beginning?
>
>It seems the vrings parameter is introduced after vhost-vdpa is 
>implemented.

Sorry, I mean why we didn't implement the vhost_set_vring_enable 
callback for vhost-vdpa from the beginning.

Thanks,
Stefano
Stefano Garzarella Feb. 6, 2024, 2:55 p.m. UTC | #7
On Tue, Feb 6, 2024 at 9:31 AM Stefano Garzarella <sgarzare@redhat.com> wrote:
>
> On Tue, Feb 06, 2024 at 10:47:40AM +0800, Jason Wang wrote:
> >On Mon, Feb 5, 2024 at 6:51 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
> >>
> >> On Fri, Feb 02, 2024 at 02:25:21PM +0100, Kevin Wolf wrote:
> >> >VDUSE requires that virtqueues are first enabled before the DRIVER_OK
> >> >status flag is set; with the current API of the kernel module, it is
> >> >impossible to enable the opposite order in our block export code because
> >> >userspace is not notified when a virtqueue is enabled.
> >
> >Did this mean virtio-blk will enable a virtqueue after DRIVER_OK?
>
> It's not specific to virtio-blk, but to the generic vdpa device we have
> in QEMU (i.e. vhost-vdpa-device). Yep, after commit
> 6c4825476a4351530bcac17abab72295b75ffe98, virtqueues are enabled after
> DRIVER_OK.
>
> >Sepc is not clear about this and that's why we introduce
> >VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK.
>
> Ah, I didn't know about this new feature. So after commit
> 6c4825476a4351530bcac17abab72295b75ffe98 the vhost-vdpa-device is not
> complying with the specification, right?
>
> >
> >>
> >> Yeah, IMHO the VDUSE protocol is missing a VDUSE_SET_VQ_READY message,
> >
> >I think you meant when VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK is
> >negotiated.
>
> At this point yes. But if VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK is not
> negotiated, should we return an error in vhost-vdpa kernel module if
> VHOST_VDPA_SET_VRING_ENABLE is called when DRIVER_OK is already set?

I just sent a patch:
https://lore.kernel.org/virtualization/20240206145154.118044-1-sgarzare@redhat.com/T/#u

Then I discovered that we never check the return value of
vhost_vdpa_set_vring_ready() in QEMU.
I'll send a patch for that!

Stefano

>
> >If this is truth, it seems a little more complicated, for
> >example the get_backend_features needs to be forward to the userspace?
>
> I'm not understanding, don't we already have VHOST_GET_BACKEND_FEATURES
> for this? Or do you mean userspace on the VDUSE side?
>
> >This seems suboptimal to implement this in the spec first and then we
> >can leverage the features. Or we can have another parameter for the
> >ioctl that creates the vduse device.
>
> I got a little lost, though in vhost-user, the device can always expect
> a vring_enable/disable, so I thought it was not complicated in VDUSE.
>
> >
> >> I'll start another thread about that, but in the meantime I agree that
> >> we should fix QEMU since we need to work properly with old kernels as
> >> well.
> >>
> >> >
> >> >This requirement also mathces the normal initialisation order as done by
> >> >the generic vhost code in QEMU. However, commit 6c482547 accidentally
> >> >changed the order for vdpa-dev and broke access to VDUSE devices with
> >> >this.
> >> >
> >> >This changes vdpa-dev to use the normal order again and use the standard
> >> >vhost callback .vhost_set_vring_enable for this. VDUSE devices can be
> >> >used with vdpa-dev again after this fix.
> >>
> >> I like this approach and the patch LGTM, but I'm a bit worried about
> >> this function in hw/net/vhost_net.c:
> >>
> >>      int vhost_set_vring_enable(NetClientState *nc, int enable)
> >>      {
> >>          VHostNetState *net = get_vhost_net(nc);
> >>          const VhostOps *vhost_ops = net->dev.vhost_ops;
> >>
> >>          nc->vring_enable = enable;
> >>
> >>          if (vhost_ops && vhost_ops->vhost_set_vring_enable) {
> >>              return vhost_ops->vhost_set_vring_enable(&net->dev, enable);
> >>          }
> >>
> >>          return 0;
> >>      }
> >>
> >> @Eugenio, @Jason, should we change some things there if vhost-vdpa
> >> implements the vhost_set_vring_enable callback?
> >
> >Eugenio may know more, I remember we need to enable cvq first for
> >shadow virtqueue to restore some states.
> >
> >>
> >> Do you remember why we didn't implement it from the beginning?
> >
> >It seems the vrings parameter is introduced after vhost-vdpa is
> >implemented.
>
> Sorry, I mean why we didn't implement the vhost_set_vring_enable
> callback for vhost-vdpa from the beginning.
>
> Thanks,
> Stefano
Eugenio Perez Martin Feb. 6, 2024, 4:44 p.m. UTC | #8
On Mon, Feb 5, 2024 at 2:49 PM Kevin Wolf <kwolf@redhat.com> wrote:
>
> Am 05.02.2024 um 13:22 hat Eugenio Perez Martin geschrieben:
> > On Fri, Feb 2, 2024 at 2:25 PM Kevin Wolf <kwolf@redhat.com> wrote:
> > >
> > > VDUSE requires that virtqueues are first enabled before the DRIVER_OK
> > > status flag is set; with the current API of the kernel module, it is
> > > impossible to enable the opposite order in our block export code because
> > > userspace is not notified when a virtqueue is enabled.
> > >
> > > This requirement also mathces the normal initialisation order as done by
> > > the generic vhost code in QEMU. However, commit 6c482547 accidentally
> > > changed the order for vdpa-dev and broke access to VDUSE devices with
> > > this.
> > >
> > > This changes vdpa-dev to use the normal order again and use the standard
> > > vhost callback .vhost_set_vring_enable for this. VDUSE devices can be
> > > used with vdpa-dev again after this fix.
> > >
> > > Cc: qemu-stable@nongnu.org
> > > Fixes: 6c4825476a4351530bcac17abab72295b75ffe98
> > > Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> > > ---
> > >  hw/virtio/vdpa-dev.c   |  5 +----
> > >  hw/virtio/vhost-vdpa.c | 17 +++++++++++++++++
> > >  2 files changed, 18 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c
> > > index eb9ecea83b..13e87f06f6 100644
> > > --- a/hw/virtio/vdpa-dev.c
> > > +++ b/hw/virtio/vdpa-dev.c
> > > @@ -253,14 +253,11 @@ static int vhost_vdpa_device_start(VirtIODevice *vdev, Error **errp)
> > >
> > >      s->dev.acked_features = vdev->guest_features;
> > >
> > > -    ret = vhost_dev_start(&s->dev, vdev, false);
> > > +    ret = vhost_dev_start(&s->dev, vdev, true);
> > >      if (ret < 0) {
> > >          error_setg_errno(errp, -ret, "Error starting vhost");
> > >          goto err_guest_notifiers;
> > >      }
> > > -    for (i = 0; i < s->dev.nvqs; ++i) {
> > > -        vhost_vdpa_set_vring_ready(&s->vdpa, i);
> > > -    }
> > >      s->started = true;
> > >
> > >      /*
> > > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> > > index 3a43beb312..c4574d56c5 100644
> > > --- a/hw/virtio/vhost-vdpa.c
> > > +++ b/hw/virtio/vhost-vdpa.c
> > > @@ -904,6 +904,22 @@ int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx)
> > >      return r;
> > >  }
> > >
> > > +static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int enable)
> > > +{
> > > +    struct vhost_vdpa *v = dev->opaque;
> > > +    unsigned int i;
> > > +    int ret;
> > > +
> > > +    for (i = 0; i < dev->nvqs; ++i) {
> > > +        ret = vhost_vdpa_set_vring_ready(v, i);
> > > +        if (ret < 0) {
> > > +            return ret;
> > > +        }
> > > +    }
> > > +
> > > +    return 0;
> > > +}
> > > +
> > >  static int vhost_vdpa_set_config_call(struct vhost_dev *dev,
> > >                                         int fd)
> > >  {
> > > @@ -1524,6 +1540,7 @@ const VhostOps vdpa_ops = {
> > >          .vhost_set_features = vhost_vdpa_set_features,
> > >          .vhost_reset_device = vhost_vdpa_reset_device,
> > >          .vhost_get_vq_index = vhost_vdpa_get_vq_index,
> > > +        .vhost_set_vring_enable = vhost_vdpa_set_vring_enable,
> > >          .vhost_get_config  = vhost_vdpa_get_config,
> > >          .vhost_set_config = vhost_vdpa_set_config,
> > >          .vhost_requires_shm_log = NULL,
> >
> > vhost-vdpa net enables CVQ before dataplane ones to configure all the
> > device in the destination of a live migration. To go back again to
> > this callback would cause the device to enable the dataplane before
> > virtqueues are configured again.
>
> Not that it makes a difference, but I don't think it would actually be
> going back. Even before your commit 6c482547, we were not making use of
> the generic callback but just called the function in a slightly
> different place (but less different than after commit 6c482547).
>
> But anyway... Why don't the other vhost backend need the same for
> vhost-net then? Do they just not support live migration?
>

They don't support control virtqueue. More specifically, control
virtqueue is handled directly in QEMU.

> I don't know the code well enough to say where the problem is, but if
> vhost-vdpa networking code relies on the usual vhost operations not
> being implemented and bypasses VhostOps to replace it, that sounds like
> a design problem to me.

I don't follow this. What vhost operation is expected not to be implemented?

> Maybe VhostOps needs a new operation to enable
> just a single virtqueue that can be used by the networking code instead?
>
> > How does VDUSE userspace knows how many queues should enable? Can't
> > the kernel perform the necessary actions after DRIVER_OK, like
> > configuring the kick etc?
>
> Not sure if I understand the question. The vdpa kernel interface always
> enables individual queues, so the VDUSE userspace will enable whatever
> queues it was asked to enable. The only restriction is that the queues
> need to be enabled before setting DRIVER_OK.
>
> The interface that enables all virtqueues at once seems to be just
> .vhost_set_vring_enable in QEMU.
>

It enables all virtqueues of the same vhost device in QEMU, but QEMU
creates one vhost_dev per each vhost-net virtqueue pair and another
one for CVQ. This goes back to the time where mq vhost-kernel net
devices were implemented by mergin many tap devices. net/vhost-vdpa.c
only enables the last one, which corresponds to CVQ, and then enables
the rest once all messages have been received.

On the other hand, .vhost_set_vring_enable is also used for queue
reset (vhost_net_virtqueue_reset and vhost_net_virtqueue_restart). In
other words, it is called after the set DRIVER_OK. I guess it is fine
for VDUSE as long as it does not offer vring reset capabilities, but
future wise should we start going in that direction?

But kernels without VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK should be
supported, right. Maybe we can add vhost_vdpa_set_vring_enable and
call vhost_dev_start with vrings parameters conditional to the feature
flag? That way the caller can enable it later or just enable all the
rings altogether.

Thanks!
Jason Wang Feb. 7, 2024, 3:17 a.m. UTC | #9
On Tue, Feb 6, 2024 at 4:31 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
>
> On Tue, Feb 06, 2024 at 10:47:40AM +0800, Jason Wang wrote:
> >On Mon, Feb 5, 2024 at 6:51 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
> >>
> >> On Fri, Feb 02, 2024 at 02:25:21PM +0100, Kevin Wolf wrote:
> >> >VDUSE requires that virtqueues are first enabled before the DRIVER_OK
> >> >status flag is set; with the current API of the kernel module, it is
> >> >impossible to enable the opposite order in our block export code because
> >> >userspace is not notified when a virtqueue is enabled.
> >
> >Did this mean virtio-blk will enable a virtqueue after DRIVER_OK?
>
> It's not specific to virtio-blk, but to the generic vdpa device we have
> in QEMU (i.e. vhost-vdpa-device). Yep, after commit
> 6c4825476a4351530bcac17abab72295b75ffe98, virtqueues are enabled after
> DRIVER_OK.

Right.

>
> >Sepc is not clear about this and that's why we introduce
> >VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK.
>
> Ah, I didn't know about this new feature. So after commit
> 6c4825476a4351530bcac17abab72295b75ffe98 the vhost-vdpa-device is not
> complying with the specification, right?

Kind of, but as stated, it's just because spec is unclear about the
behaviour. There's a chance that spec will explicitly support it in
the future.

>
> >
> >>
> >> Yeah, IMHO the VDUSE protocol is missing a VDUSE_SET_VQ_READY message,
> >
> >I think you meant when VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK is
> >negotiated.
>
> At this point yes. But if VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK is not
> negotiated, should we return an error in vhost-vdpa kernel module if
> VHOST_VDPA_SET_VRING_ENABLE is called when DRIVER_OK is already set?

I'm not sure if this can break some setups or not. It might be better
to leave it as is?

Without VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK, we don't know if
parent support vq_ready after driver_ok.
With VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK, we know parent support
vq_ready after driver_ok.

>
> >If this is truth, it seems a little more complicated, for
> >example the get_backend_features needs to be forward to the userspace?
>
> I'm not understanding, don't we already have VHOST_GET_BACKEND_FEATURES
> for this? Or do you mean userspace on the VDUSE side?

Yes, since in this case the parent is in the userspace, there's no way
for VDUSE to know if user space supports vq_ready after driver_ok or
not.

As you may have noticed, we don't have a message for vq_ready which
implies that vq_ready after driver_ok can't be supported.

>
> >This seems suboptimal to implement this in the spec first and then we
> >can leverage the features. Or we can have another parameter for the
> >ioctl that creates the vduse device.
>
> I got a little lost, though in vhost-user, the device can always expect
> a vring_enable/disable, so I thought it was not complicated in VDUSE.

Yes, the problem is assuming we have a message for vq_ready, there
could be  a "legacy" userspace that doesn't support that.  So in that
case, VDUSE needs to know if the userspace parent can support that or
not.

>
> >
> >> I'll start another thread about that, but in the meantime I agree that
> >> we should fix QEMU since we need to work properly with old kernels as
> >> well.
> >>
> >> >
> >> >This requirement also mathces the normal initialisation order as done by
> >> >the generic vhost code in QEMU. However, commit 6c482547 accidentally
> >> >changed the order for vdpa-dev and broke access to VDUSE devices with
> >> >this.
> >> >
> >> >This changes vdpa-dev to use the normal order again and use the standard
> >> >vhost callback .vhost_set_vring_enable for this. VDUSE devices can be
> >> >used with vdpa-dev again after this fix.
> >>
> >> I like this approach and the patch LGTM, but I'm a bit worried about
> >> this function in hw/net/vhost_net.c:
> >>
> >>      int vhost_set_vring_enable(NetClientState *nc, int enable)
> >>      {
> >>          VHostNetState *net = get_vhost_net(nc);
> >>          const VhostOps *vhost_ops = net->dev.vhost_ops;
> >>
> >>          nc->vring_enable = enable;
> >>
> >>          if (vhost_ops && vhost_ops->vhost_set_vring_enable) {
> >>              return vhost_ops->vhost_set_vring_enable(&net->dev, enable);
> >>          }
> >>
> >>          return 0;
> >>      }
> >>
> >> @Eugenio, @Jason, should we change some things there if vhost-vdpa
> >> implements the vhost_set_vring_enable callback?
> >
> >Eugenio may know more, I remember we need to enable cvq first for
> >shadow virtqueue to restore some states.
> >
> >>
> >> Do you remember why we didn't implement it from the beginning?
> >
> >It seems the vrings parameter is introduced after vhost-vdpa is
> >implemented.
>
> Sorry, I mean why we didn't implement the vhost_set_vring_enable
> callback for vhost-vdpa from the beginning.

Adding Cindy who writes those codes for more thoughts.

Thanks

>
> Thanks,
> Stefano
>
Cindy Lu Feb. 7, 2024, 8:05 a.m. UTC | #10
Jason Wang <jasowang@redhat.com> 于2024年2月7日周三 11:17写道:
>
> On Tue, Feb 6, 2024 at 4:31 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
> >
> > On Tue, Feb 06, 2024 at 10:47:40AM +0800, Jason Wang wrote:
> > >On Mon, Feb 5, 2024 at 6:51 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
> > >>
> > >> On Fri, Feb 02, 2024 at 02:25:21PM +0100, Kevin Wolf wrote:
> > >> >VDUSE requires that virtqueues are first enabled before the DRIVER_OK
> > >> >status flag is set; with the current API of the kernel module, it is
> > >> >impossible to enable the opposite order in our block export code because
> > >> >userspace is not notified when a virtqueue is enabled.
> > >
> > >Did this mean virtio-blk will enable a virtqueue after DRIVER_OK?
> >
> > It's not specific to virtio-blk, but to the generic vdpa device we have
> > in QEMU (i.e. vhost-vdpa-device). Yep, after commit
> > 6c4825476a4351530bcac17abab72295b75ffe98, virtqueues are enabled after
> > DRIVER_OK.
>
> Right.
>
> >
> > >Sepc is not clear about this and that's why we introduce
> > >VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK.
> >
> > Ah, I didn't know about this new feature. So after commit
> > 6c4825476a4351530bcac17abab72295b75ffe98 the vhost-vdpa-device is not
> > complying with the specification, right?
>
> Kind of, but as stated, it's just because spec is unclear about the
> behaviour. There's a chance that spec will explicitly support it in
> the future.
>
> >
> > >
> > >>
> > >> Yeah, IMHO the VDUSE protocol is missing a VDUSE_SET_VQ_READY message,
> > >
> > >I think you meant when VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK is
> > >negotiated.
> >
> > At this point yes. But if VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK is not
> > negotiated, should we return an error in vhost-vdpa kernel module if
> > VHOST_VDPA_SET_VRING_ENABLE is called when DRIVER_OK is already set?
>
> I'm not sure if this can break some setups or not. It might be better
> to leave it as is?
>
> Without VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK, we don't know if
> parent support vq_ready after driver_ok.
> With VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK, we know parent support
> vq_ready after driver_ok.
>
> >
> > >If this is truth, it seems a little more complicated, for
> > >example the get_backend_features needs to be forward to the userspace?
> >
> > I'm not understanding, don't we already have VHOST_GET_BACKEND_FEATURES
> > for this? Or do you mean userspace on the VDUSE side?
>
> Yes, since in this case the parent is in the userspace, there's no way
> for VDUSE to know if user space supports vq_ready after driver_ok or
> not.
>
> As you may have noticed, we don't have a message for vq_ready which
> implies that vq_ready after driver_ok can't be supported.
>
> >
> > >This seems suboptimal to implement this in the spec first and then we
> > >can leverage the features. Or we can have another parameter for the
> > >ioctl that creates the vduse device.
> >
> > I got a little lost, though in vhost-user, the device can always expect
> > a vring_enable/disable, so I thought it was not complicated in VDUSE.
>
> Yes, the problem is assuming we have a message for vq_ready, there
> could be  a "legacy" userspace that doesn't support that.  So in that
> case, VDUSE needs to know if the userspace parent can support that or
> not.
>
> >
> > >
> > >> I'll start another thread about that, but in the meantime I agree that
> > >> we should fix QEMU since we need to work properly with old kernels as
> > >> well.
> > >>
> > >> >
> > >> >This requirement also mathces the normal initialisation order as done by
> > >> >the generic vhost code in QEMU. However, commit 6c482547 accidentally
> > >> >changed the order for vdpa-dev and broke access to VDUSE devices with
> > >> >this.
> > >> >
> > >> >This changes vdpa-dev to use the normal order again and use the standard
> > >> >vhost callback .vhost_set_vring_enable for this. VDUSE devices can be
> > >> >used with vdpa-dev again after this fix.
> > >>
> > >> I like this approach and the patch LGTM, but I'm a bit worried about
> > >> this function in hw/net/vhost_net.c:
> > >>
> > >>      int vhost_set_vring_enable(NetClientState *nc, int enable)
> > >>      {
> > >>          VHostNetState *net = get_vhost_net(nc);
> > >>          const VhostOps *vhost_ops = net->dev.vhost_ops;
> > >>
> > >>          nc->vring_enable = enable;
> > >>
> > >>          if (vhost_ops && vhost_ops->vhost_set_vring_enable) {
> > >>              return vhost_ops->vhost_set_vring_enable(&net->dev, enable);
> > >>          }
> > >>
> > >>          return 0;
> > >>      }
> > >>
> > >> @Eugenio, @Jason, should we change some things there if vhost-vdpa
> > >> implements the vhost_set_vring_enable callback?
> > >
> > >Eugenio may know more, I remember we need to enable cvq first for
> > >shadow virtqueue to restore some states.
> > >
> > >>
> > >> Do you remember why we didn't implement it from the beginning?
> > >
> > >It seems the vrings parameter is introduced after vhost-vdpa is
> > >implemented.
> >
> > Sorry, I mean why we didn't implement the vhost_set_vring_enable
> > callback for vhost-vdpa from the beginning.
>
> Adding Cindy who writes those codes for more thoughts.
>
it's a really long time ago, and I can't remember it clearly. It seems
like there might be an issue with the sequence being called for
dev_start and vhost_set_vring_enable?
I have searched my mail but find nothing. I think we should do a full
test if we want to this

Thanks
Cindy
> Thanks
>
> >
> > Thanks,
> > Stefano
> >
>
Stefano Garzarella Feb. 7, 2024, 8:47 a.m. UTC | #11
On Wed, Feb 07, 2024 at 11:17:34AM +0800, Jason Wang wrote:
>On Tue, Feb 6, 2024 at 4:31 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
>>
>> On Tue, Feb 06, 2024 at 10:47:40AM +0800, Jason Wang wrote:
>> >On Mon, Feb 5, 2024 at 6:51 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
>> >>
>> >> On Fri, Feb 02, 2024 at 02:25:21PM +0100, Kevin Wolf wrote:
>> >> >VDUSE requires that virtqueues are first enabled before the DRIVER_OK
>> >> >status flag is set; with the current API of the kernel module, it is
>> >> >impossible to enable the opposite order in our block export code because
>> >> >userspace is not notified when a virtqueue is enabled.
>> >
>> >Did this mean virtio-blk will enable a virtqueue after DRIVER_OK?
>>
>> It's not specific to virtio-blk, but to the generic vdpa device we have
>> in QEMU (i.e. vhost-vdpa-device). Yep, after commit
>> 6c4825476a4351530bcac17abab72295b75ffe98, virtqueues are enabled after
>> DRIVER_OK.
>
>Right.
>
>>
>> >Sepc is not clear about this and that's why we introduce
>> >VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK.
>>
>> Ah, I didn't know about this new feature. So after commit
>> 6c4825476a4351530bcac17abab72295b75ffe98 the vhost-vdpa-device is not
>> complying with the specification, right?
>
>Kind of, but as stated, it's just because spec is unclear about the
>behaviour. There's a chance that spec will explicitly support it in
>the future.
>
>>
>> >
>> >>
>> >> Yeah, IMHO the VDUSE protocol is missing a VDUSE_SET_VQ_READY message,
>> >
>> >I think you meant when VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK is
>> >negotiated.
>>
>> At this point yes. But if VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK is not
>> negotiated, should we return an error in vhost-vdpa kernel module if
>> VHOST_VDPA_SET_VRING_ENABLE is called when DRIVER_OK is already set?
>
>I'm not sure if this can break some setups or not. It might be better
>to leave it as is?

As I also answered in the kernel patch, IMHO we have to return an error, 
because any setups are broken anyway (see the problem we're fixing with 
this patch), so at this point it's better to return an error, so they 
don't spend time figuring out why the VDUSE device doesn't work.

>
>Without VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK, we don't know if
>parent support vq_ready after driver_ok.

So we have to assume that it doesn't support it, to be more 
conservative, right?

>With VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK, we know parent support
>vq_ready after driver_ok.
>
>>
>> >If this is truth, it seems a little more complicated, for
>> >example the get_backend_features needs to be forward to the userspace?
>>
>> I'm not understanding, don't we already have VHOST_GET_BACKEND_FEATURES
>> for this? Or do you mean userspace on the VDUSE side?
>
>Yes, since in this case the parent is in the userspace, there's no way
>for VDUSE to know if user space supports vq_ready after driver_ok or
>not.
>
>As you may have noticed, we don't have a message for vq_ready which
>implies that vq_ready after driver_ok can't be supported.

Yep, I see.

>
>>
>> >This seems suboptimal to implement this in the spec first and then we
>> >can leverage the features. Or we can have another parameter for the
>> >ioctl that creates the vduse device.
>>
>> I got a little lost, though in vhost-user, the device can always expect
>> a vring_enable/disable, so I thought it was not complicated in VDUSE.
>
>Yes, the problem is assuming we have a message for vq_ready, there
>could be  a "legacy" userspace that doesn't support that.  So in that
>case, VDUSE needs to know if the userspace parent can support that or
>not.

I think VDUSE needs to negotiate features (if it doesn't already) as 
vhost-user does with the backend. Also for new future functionality.


>
>>
>> >
>> >> I'll start another thread about that, but in the meantime I agree that
>> >> we should fix QEMU since we need to work properly with old kernels as
>> >> well.
>> >>
>> >> >
>> >> >This requirement also mathces the normal initialisation order as done by
>> >> >the generic vhost code in QEMU. However, commit 6c482547 accidentally
>> >> >changed the order for vdpa-dev and broke access to VDUSE devices with
>> >> >this.
>> >> >
>> >> >This changes vdpa-dev to use the normal order again and use the standard
>> >> >vhost callback .vhost_set_vring_enable for this. VDUSE devices can be
>> >> >used with vdpa-dev again after this fix.
>> >>
>> >> I like this approach and the patch LGTM, but I'm a bit worried about
>> >> this function in hw/net/vhost_net.c:
>> >>
>> >>      int vhost_set_vring_enable(NetClientState *nc, int enable)
>> >>      {
>> >>          VHostNetState *net = get_vhost_net(nc);
>> >>          const VhostOps *vhost_ops = net->dev.vhost_ops;
>> >>
>> >>          nc->vring_enable = enable;
>> >>
>> >>          if (vhost_ops && vhost_ops->vhost_set_vring_enable) {
>> >>              return vhost_ops->vhost_set_vring_enable(&net->dev, enable);
>> >>          }
>> >>
>> >>          return 0;
>> >>      }
>> >>
>> >> @Eugenio, @Jason, should we change some things there if vhost-vdpa
>> >> implements the vhost_set_vring_enable callback?
>> >
>> >Eugenio may know more, I remember we need to enable cvq first for
>> >shadow virtqueue to restore some states.
>> >
>> >>
>> >> Do you remember why we didn't implement it from the beginning?
>> >
>> >It seems the vrings parameter is introduced after vhost-vdpa is
>> >implemented.
>>
>> Sorry, I mean why we didn't implement the vhost_set_vring_enable
>> callback for vhost-vdpa from the beginning.
>
>Adding Cindy who writes those codes for more thoughts.

Thanks,
Stefano
Stefano Garzarella Feb. 7, 2024, 8:53 a.m. UTC | #12
On Wed, Feb 07, 2024 at 04:05:29PM +0800, Cindy Lu wrote:
>Jason Wang <jasowang@redhat.com> 于2024年2月7日周三 11:17写道:
>>
>> On Tue, Feb 6, 2024 at 4:31 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
>> >
>> > On Tue, Feb 06, 2024 at 10:47:40AM +0800, Jason Wang wrote:
>> > >On Mon, Feb 5, 2024 at 6:51 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
>> > >>
>> > >> On Fri, Feb 02, 2024 at 02:25:21PM +0100, Kevin Wolf wrote:
>> > >> >VDUSE requires that virtqueues are first enabled before the DRIVER_OK
>> > >> >status flag is set; with the current API of the kernel module, it is
>> > >> >impossible to enable the opposite order in our block export code because
>> > >> >userspace is not notified when a virtqueue is enabled.
>> > >
>> > >Did this mean virtio-blk will enable a virtqueue after DRIVER_OK?
>> >
>> > It's not specific to virtio-blk, but to the generic vdpa device we have
>> > in QEMU (i.e. vhost-vdpa-device). Yep, after commit
>> > 6c4825476a4351530bcac17abab72295b75ffe98, virtqueues are enabled after
>> > DRIVER_OK.
>>
>> Right.
>>
>> >
>> > >Sepc is not clear about this and that's why we introduce
>> > >VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK.
>> >
>> > Ah, I didn't know about this new feature. So after commit
>> > 6c4825476a4351530bcac17abab72295b75ffe98 the vhost-vdpa-device is not
>> > complying with the specification, right?
>>
>> Kind of, but as stated, it's just because spec is unclear about the
>> behaviour. There's a chance that spec will explicitly support it in
>> the future.
>>
>> >
>> > >
>> > >>
>> > >> Yeah, IMHO the VDUSE protocol is missing a VDUSE_SET_VQ_READY message,
>> > >
>> > >I think you meant when VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK is
>> > >negotiated.
>> >
>> > At this point yes. But if VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK is not
>> > negotiated, should we return an error in vhost-vdpa kernel module if
>> > VHOST_VDPA_SET_VRING_ENABLE is called when DRIVER_OK is already set?
>>
>> I'm not sure if this can break some setups or not. It might be better
>> to leave it as is?
>>
>> Without VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK, we don't know if
>> parent support vq_ready after driver_ok.
>> With VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK, we know parent support
>> vq_ready after driver_ok.
>>
>> >
>> > >If this is truth, it seems a little more complicated, for
>> > >example the get_backend_features needs to be forward to the userspace?
>> >
>> > I'm not understanding, don't we already have VHOST_GET_BACKEND_FEATURES
>> > for this? Or do you mean userspace on the VDUSE side?
>>
>> Yes, since in this case the parent is in the userspace, there's no way
>> for VDUSE to know if user space supports vq_ready after driver_ok or
>> not.
>>
>> As you may have noticed, we don't have a message for vq_ready which
>> implies that vq_ready after driver_ok can't be supported.
>>
>> >
>> > >This seems suboptimal to implement this in the spec first and then we
>> > >can leverage the features. Or we can have another parameter for the
>> > >ioctl that creates the vduse device.
>> >
>> > I got a little lost, though in vhost-user, the device can always expect
>> > a vring_enable/disable, so I thought it was not complicated in VDUSE.
>>
>> Yes, the problem is assuming we have a message for vq_ready, there
>> could be  a "legacy" userspace that doesn't support that.  So in that
>> case, VDUSE needs to know if the userspace parent can support that or
>> not.
>>
>> >
>> > >
>> > >> I'll start another thread about that, but in the meantime I agree that
>> > >> we should fix QEMU since we need to work properly with old kernels as
>> > >> well.
>> > >>
>> > >> >
>> > >> >This requirement also mathces the normal initialisation order as done by
>> > >> >the generic vhost code in QEMU. However, commit 6c482547 accidentally
>> > >> >changed the order for vdpa-dev and broke access to VDUSE devices with
>> > >> >this.
>> > >> >
>> > >> >This changes vdpa-dev to use the normal order again and use the standard
>> > >> >vhost callback .vhost_set_vring_enable for this. VDUSE devices can be
>> > >> >used with vdpa-dev again after this fix.
>> > >>
>> > >> I like this approach and the patch LGTM, but I'm a bit worried about
>> > >> this function in hw/net/vhost_net.c:
>> > >>
>> > >>      int vhost_set_vring_enable(NetClientState *nc, int enable)
>> > >>      {
>> > >>          VHostNetState *net = get_vhost_net(nc);
>> > >>          const VhostOps *vhost_ops = net->dev.vhost_ops;
>> > >>
>> > >>          nc->vring_enable = enable;
>> > >>
>> > >>          if (vhost_ops && vhost_ops->vhost_set_vring_enable) {
>> > >>              return vhost_ops->vhost_set_vring_enable(&net->dev, enable);
>> > >>          }
>> > >>
>> > >>          return 0;
>> > >>      }
>> > >>
>> > >> @Eugenio, @Jason, should we change some things there if vhost-vdpa
>> > >> implements the vhost_set_vring_enable callback?
>> > >
>> > >Eugenio may know more, I remember we need to enable cvq first for
>> > >shadow virtqueue to restore some states.
>> > >
>> > >>
>> > >> Do you remember why we didn't implement it from the beginning?
>> > >
>> > >It seems the vrings parameter is introduced after vhost-vdpa is
>> > >implemented.
>> >
>> > Sorry, I mean why we didn't implement the vhost_set_vring_enable
>> > callback for vhost-vdpa from the beginning.
>>
>> Adding Cindy who writes those codes for more thoughts.
>>
>it's a really long time ago, and I can't remember it clearly. It seems
>like there might be an issue with the sequence being called for
>dev_start and vhost_set_vring_enable?
>I have searched my mail but find nothing. I think we should do a full
>test if we want to this

IMHO in hw/net/vhost_net.c we should check the type of backend (e.g. 
vhost-vdpa) to decide whether to call 
vhost_ops->vhost_set_vring_enable() or not, instead of just checking 
whether the callback is implemented or not, because in the future we 
might have other backends (e.g. a native vdpa-blk driver) that might 
want that callback implemented.

Something like this (untested):

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index e8e1661646..1fea678e29 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -543,7 +543,8 @@ int vhost_set_vring_enable(NetClientState *nc, int enable)

      nc->vring_enable = enable;

-    if (vhost_ops && vhost_ops->vhost_set_vring_enable) {
+    if (nc->info->type != NET_CLIENT_DRIVER_VHOST_VDPA &&
+            vhost_ops && vhost_ops->vhost_set_vring_enable) {
          return vhost_ops->vhost_set_vring_enable(&net->dev, enable);
      }

WDYT?

Thanks,
Stefano
Kevin Wolf Feb. 7, 2024, 10:18 a.m. UTC | #13
Am 06.02.2024 um 17:44 hat Eugenio Perez Martin geschrieben:
> On Mon, Feb 5, 2024 at 2:49 PM Kevin Wolf <kwolf@redhat.com> wrote:
> >
> > Am 05.02.2024 um 13:22 hat Eugenio Perez Martin geschrieben:
> > > On Fri, Feb 2, 2024 at 2:25 PM Kevin Wolf <kwolf@redhat.com> wrote:
> > > >
> > > > VDUSE requires that virtqueues are first enabled before the DRIVER_OK
> > > > status flag is set; with the current API of the kernel module, it is
> > > > impossible to enable the opposite order in our block export code because
> > > > userspace is not notified when a virtqueue is enabled.
> > > >
> > > > This requirement also mathces the normal initialisation order as done by
> > > > the generic vhost code in QEMU. However, commit 6c482547 accidentally
> > > > changed the order for vdpa-dev and broke access to VDUSE devices with
> > > > this.
> > > >
> > > > This changes vdpa-dev to use the normal order again and use the standard
> > > > vhost callback .vhost_set_vring_enable for this. VDUSE devices can be
> > > > used with vdpa-dev again after this fix.
> > > >
> > > > Cc: qemu-stable@nongnu.org
> > > > Fixes: 6c4825476a4351530bcac17abab72295b75ffe98
> > > > Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> > > > ---
> > > >  hw/virtio/vdpa-dev.c   |  5 +----
> > > >  hw/virtio/vhost-vdpa.c | 17 +++++++++++++++++
> > > >  2 files changed, 18 insertions(+), 4 deletions(-)
> > > >
> > > > diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c
> > > > index eb9ecea83b..13e87f06f6 100644
> > > > --- a/hw/virtio/vdpa-dev.c
> > > > +++ b/hw/virtio/vdpa-dev.c
> > > > @@ -253,14 +253,11 @@ static int vhost_vdpa_device_start(VirtIODevice *vdev, Error **errp)
> > > >
> > > >      s->dev.acked_features = vdev->guest_features;
> > > >
> > > > -    ret = vhost_dev_start(&s->dev, vdev, false);
> > > > +    ret = vhost_dev_start(&s->dev, vdev, true);
> > > >      if (ret < 0) {
> > > >          error_setg_errno(errp, -ret, "Error starting vhost");
> > > >          goto err_guest_notifiers;
> > > >      }
> > > > -    for (i = 0; i < s->dev.nvqs; ++i) {
> > > > -        vhost_vdpa_set_vring_ready(&s->vdpa, i);
> > > > -    }
> > > >      s->started = true;
> > > >
> > > >      /*
> > > > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> > > > index 3a43beb312..c4574d56c5 100644
> > > > --- a/hw/virtio/vhost-vdpa.c
> > > > +++ b/hw/virtio/vhost-vdpa.c
> > > > @@ -904,6 +904,22 @@ int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx)
> > > >      return r;
> > > >  }
> > > >
> > > > +static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int enable)
> > > > +{
> > > > +    struct vhost_vdpa *v = dev->opaque;
> > > > +    unsigned int i;
> > > > +    int ret;
> > > > +
> > > > +    for (i = 0; i < dev->nvqs; ++i) {
> > > > +        ret = vhost_vdpa_set_vring_ready(v, i);
> > > > +        if (ret < 0) {
> > > > +            return ret;
> > > > +        }
> > > > +    }
> > > > +
> > > > +    return 0;
> > > > +}
> > > > +
> > > >  static int vhost_vdpa_set_config_call(struct vhost_dev *dev,
> > > >                                         int fd)
> > > >  {
> > > > @@ -1524,6 +1540,7 @@ const VhostOps vdpa_ops = {
> > > >          .vhost_set_features = vhost_vdpa_set_features,
> > > >          .vhost_reset_device = vhost_vdpa_reset_device,
> > > >          .vhost_get_vq_index = vhost_vdpa_get_vq_index,
> > > > +        .vhost_set_vring_enable = vhost_vdpa_set_vring_enable,
> > > >          .vhost_get_config  = vhost_vdpa_get_config,
> > > >          .vhost_set_config = vhost_vdpa_set_config,
> > > >          .vhost_requires_shm_log = NULL,
> > >
> > > vhost-vdpa net enables CVQ before dataplane ones to configure all the
> > > device in the destination of a live migration. To go back again to
> > > this callback would cause the device to enable the dataplane before
> > > virtqueues are configured again.
> >
> > Not that it makes a difference, but I don't think it would actually be
> > going back. Even before your commit 6c482547, we were not making use of
> > the generic callback but just called the function in a slightly
> > different place (but less different than after commit 6c482547).
> >
> > But anyway... Why don't the other vhost backend need the same for
> > vhost-net then? Do they just not support live migration?
> 
> They don't support control virtqueue. More specifically, control
> virtqueue is handled directly in QEMU.

So the network device already has to special case vdpa instead of using
the same code for all vhost backends? :-/

> > I don't know the code well enough to say where the problem is, but if
> > vhost-vdpa networking code relies on the usual vhost operations not
> > being implemented and bypasses VhostOps to replace it, that sounds like
> > a design problem to me.
> 
> I don't follow this. What vhost operation is expected not to be implemented?

You were concerned about implementing .vhost_set_vring_enable in
vdpa_ops like my patch does. So it seems that the networking code
requires that it is not implemented?

On the other hand, for vhost-vdpa, the callback seems to be called in
exactly the right place where virtqueues need to be enabled, like for
other vhost devices.

> > Maybe VhostOps needs a new operation to enable
> > just a single virtqueue that can be used by the networking code instead?
> >
> > > How does VDUSE userspace knows how many queues should enable? Can't
> > > the kernel perform the necessary actions after DRIVER_OK, like
> > > configuring the kick etc?
> >
> > Not sure if I understand the question. The vdpa kernel interface always
> > enables individual queues, so the VDUSE userspace will enable whatever
> > queues it was asked to enable. The only restriction is that the queues
> > need to be enabled before setting DRIVER_OK.
> >
> > The interface that enables all virtqueues at once seems to be just
> > .vhost_set_vring_enable in QEMU.
> 
> It enables all virtqueues of the same vhost device in QEMU, but QEMU
> creates one vhost_dev per each vhost-net virtqueue pair and another
> one for CVQ. This goes back to the time where mq vhost-kernel net
> devices were implemented by mergin many tap devices. net/vhost-vdpa.c
> only enables the last one, which corresponds to CVQ, and then enables
> the rest once all messages have been received.

So it's less about the granularity of .vhost_set_vring_enable, which
would just be right, but about the order in which it is called for the
different vhost_devs?

Can we influence the order in another way than just not implementing the
callback at all? I think net specific weirdness should be contained in
the net implementation and not affect generic vdpa code.

> On the other hand, .vhost_set_vring_enable is also used for queue
> reset (vhost_net_virtqueue_reset and vhost_net_virtqueue_restart). In
> other words, it is called after the set DRIVER_OK. I guess it is fine
> for VDUSE as long as it does not offer vring reset capabilities, but
> future wise should we start going in that direction?

I don't actually know VDUSE very well, but that would probably make
sense.

Though for the moment, I just tried to simply attach a VDUSE device and
failed, so I'm just trying to fix the regression from your commit.

> But kernels without VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK should be
> supported, right. Maybe we can add vhost_vdpa_set_vring_enable and
> call vhost_dev_start with vrings parameters conditional to the feature
> flag? That way the caller can enable it later or just enable all the
> rings altogether.

Which specific caller do you mean here?

For vdpa-dev, I don't see any problem with just passing vrings=true
unconditionally. That is clearly the least problematic order even if
another order may in theory be allowed by the spec.

For vhost_net, I don't know. As far as I am concerned, there is no
reason to change its call and it could continue to pass vrings=false.

So vhost_dev_start() seems entirely unproblematic.

The only part that changes for net is that vhost_set_vring_enable() now
does something where it didn't do anything before. If that is a problem
for vdpa based network code, maybe we can just special case vdpa there
to not call vhost_ops->vhost_set_vring_enable? (Despite its name, it's a
network specific function; it should probably be called something like
vhost_net_set_vring_enable.)

With something like the below, net shouldn't see any difference any
more, right? (Happy to add a comment before it if you write it for me.)

Kevin

diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index e8e1661646..fb6d13bd69 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -543,6 +543,10 @@ int vhost_set_vring_enable(NetClientState *nc, int enable)

     nc->vring_enable = enable;

+    if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
+        return 0;
+    }
+
     if (vhost_ops && vhost_ops->vhost_set_vring_enable) {
         return vhost_ops->vhost_set_vring_enable(&net->dev, enable);
     }
Stefano Garzarella Feb. 7, 2024, 11:06 a.m. UTC | #14
On Wed, Feb 07, 2024 at 11:18:54AM +0100, Kevin Wolf wrote:
>Am 06.02.2024 um 17:44 hat Eugenio Perez Martin geschrieben:
>> On Mon, Feb 5, 2024 at 2:49 PM Kevin Wolf <kwolf@redhat.com> wrote:
>> >
>> > Am 05.02.2024 um 13:22 hat Eugenio Perez Martin geschrieben:
>> > > On Fri, Feb 2, 2024 at 2:25 PM Kevin Wolf <kwolf@redhat.com> wrote:
>> > > >
>> > > > VDUSE requires that virtqueues are first enabled before the DRIVER_OK
>> > > > status flag is set; with the current API of the kernel module, it is
>> > > > impossible to enable the opposite order in our block export code because
>> > > > userspace is not notified when a virtqueue is enabled.
>> > > >
>> > > > This requirement also mathces the normal initialisation order as done by
>> > > > the generic vhost code in QEMU. However, commit 6c482547 accidentally
>> > > > changed the order for vdpa-dev and broke access to VDUSE devices with
>> > > > this.
>> > > >
>> > > > This changes vdpa-dev to use the normal order again and use the standard
>> > > > vhost callback .vhost_set_vring_enable for this. VDUSE devices can be
>> > > > used with vdpa-dev again after this fix.
>> > > >
>> > > > Cc: qemu-stable@nongnu.org
>> > > > Fixes: 6c4825476a4351530bcac17abab72295b75ffe98
>> > > > Signed-off-by: Kevin Wolf <kwolf@redhat.com>
>> > > > ---
>> > > >  hw/virtio/vdpa-dev.c   |  5 +----
>> > > >  hw/virtio/vhost-vdpa.c | 17 +++++++++++++++++
>> > > >  2 files changed, 18 insertions(+), 4 deletions(-)
>> > > >
>> > > > diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c
>> > > > index eb9ecea83b..13e87f06f6 100644
>> > > > --- a/hw/virtio/vdpa-dev.c
>> > > > +++ b/hw/virtio/vdpa-dev.c
>> > > > @@ -253,14 +253,11 @@ static int vhost_vdpa_device_start(VirtIODevice *vdev, Error **errp)
>> > > >
>> > > >      s->dev.acked_features = vdev->guest_features;
>> > > >
>> > > > -    ret = vhost_dev_start(&s->dev, vdev, false);
>> > > > +    ret = vhost_dev_start(&s->dev, vdev, true);
>> > > >      if (ret < 0) {
>> > > >          error_setg_errno(errp, -ret, "Error starting vhost");
>> > > >          goto err_guest_notifiers;
>> > > >      }
>> > > > -    for (i = 0; i < s->dev.nvqs; ++i) {
>> > > > -        vhost_vdpa_set_vring_ready(&s->vdpa, i);
>> > > > -    }
>> > > >      s->started = true;
>> > > >
>> > > >      /*
>> > > > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
>> > > > index 3a43beb312..c4574d56c5 100644
>> > > > --- a/hw/virtio/vhost-vdpa.c
>> > > > +++ b/hw/virtio/vhost-vdpa.c
>> > > > @@ -904,6 +904,22 @@ int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx)
>> > > >      return r;
>> > > >  }
>> > > >
>> > > > +static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int enable)
>> > > > +{
>> > > > +    struct vhost_vdpa *v = dev->opaque;
>> > > > +    unsigned int i;
>> > > > +    int ret;
>> > > > +
>> > > > +    for (i = 0; i < dev->nvqs; ++i) {
>> > > > +        ret = vhost_vdpa_set_vring_ready(v, i);
>> > > > +        if (ret < 0) {
>> > > > +            return ret;
>> > > > +        }
>> > > > +    }
>> > > > +
>> > > > +    return 0;
>> > > > +}
>> > > > +
>> > > >  static int vhost_vdpa_set_config_call(struct vhost_dev *dev,
>> > > >                                         int fd)
>> > > >  {
>> > > > @@ -1524,6 +1540,7 @@ const VhostOps vdpa_ops = {
>> > > >          .vhost_set_features = vhost_vdpa_set_features,
>> > > >          .vhost_reset_device = vhost_vdpa_reset_device,
>> > > >          .vhost_get_vq_index = vhost_vdpa_get_vq_index,
>> > > > +        .vhost_set_vring_enable = vhost_vdpa_set_vring_enable,
>> > > >          .vhost_get_config  = vhost_vdpa_get_config,
>> > > >          .vhost_set_config = vhost_vdpa_set_config,
>> > > >          .vhost_requires_shm_log = NULL,
>> > >
>> > > vhost-vdpa net enables CVQ before dataplane ones to configure all the
>> > > device in the destination of a live migration. To go back again to
>> > > this callback would cause the device to enable the dataplane before
>> > > virtqueues are configured again.
>> >
>> > Not that it makes a difference, but I don't think it would actually be
>> > going back. Even before your commit 6c482547, we were not making use of
>> > the generic callback but just called the function in a slightly
>> > different place (but less different than after commit 6c482547).
>> >
>> > But anyway... Why don't the other vhost backend need the same for
>> > vhost-net then? Do they just not support live migration?
>>
>> They don't support control virtqueue. More specifically, control
>> virtqueue is handled directly in QEMU.
>
>So the network device already has to special case vdpa instead of using
>the same code for all vhost backends? :-/
>
>> > I don't know the code well enough to say where the problem is, but if
>> > vhost-vdpa networking code relies on the usual vhost operations not
>> > being implemented and bypasses VhostOps to replace it, that sounds like
>> > a design problem to me.
>>
>> I don't follow this. What vhost operation is expected not to be implemented?
>
>You were concerned about implementing .vhost_set_vring_enable in
>vdpa_ops like my patch does. So it seems that the networking code
>requires that it is not implemented?
>
>On the other hand, for vhost-vdpa, the callback seems to be called in
>exactly the right place where virtqueues need to be enabled, like for
>other vhost devices.
>
>> > Maybe VhostOps needs a new operation to enable
>> > just a single virtqueue that can be used by the networking code instead?
>> >
>> > > How does VDUSE userspace knows how many queues should enable? Can't
>> > > the kernel perform the necessary actions after DRIVER_OK, like
>> > > configuring the kick etc?
>> >
>> > Not sure if I understand the question. The vdpa kernel interface always
>> > enables individual queues, so the VDUSE userspace will enable whatever
>> > queues it was asked to enable. The only restriction is that the queues
>> > need to be enabled before setting DRIVER_OK.
>> >
>> > The interface that enables all virtqueues at once seems to be just
>> > .vhost_set_vring_enable in QEMU.
>>
>> It enables all virtqueues of the same vhost device in QEMU, but QEMU
>> creates one vhost_dev per each vhost-net virtqueue pair and another
>> one for CVQ. This goes back to the time where mq vhost-kernel net
>> devices were implemented by mergin many tap devices. net/vhost-vdpa.c
>> only enables the last one, which corresponds to CVQ, and then enables
>> the rest once all messages have been received.
>
>So it's less about the granularity of .vhost_set_vring_enable, which
>would just be right, but about the order in which it is called for the
>different vhost_devs?
>
>Can we influence the order in another way than just not implementing the
>callback at all? I think net specific weirdness should be contained in
>the net implementation and not affect generic vdpa code.
>
>> On the other hand, .vhost_set_vring_enable is also used for queue
>> reset (vhost_net_virtqueue_reset and vhost_net_virtqueue_restart). In
>> other words, it is called after the set DRIVER_OK. I guess it is fine
>> for VDUSE as long as it does not offer vring reset capabilities, but
>> future wise should we start going in that direction?
>
>I don't actually know VDUSE very well, but that would probably make
>sense.
>
>Though for the moment, I just tried to simply attach a VDUSE device and
>failed, so I'm just trying to fix the regression from your commit.
>
>> But kernels without VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK should be
>> supported, right. Maybe we can add vhost_vdpa_set_vring_enable and
>> call vhost_dev_start with vrings parameters conditional to the feature
>> flag? That way the caller can enable it later or just enable all the
>> rings altogether.
>
>Which specific caller do you mean here?
>
>For vdpa-dev, I don't see any problem with just passing vrings=true
>unconditionally. That is clearly the least problematic order even if
>another order may in theory be allowed by the spec.
>
>For vhost_net, I don't know. As far as I am concerned, there is no
>reason to change its call and it could continue to pass vrings=false.
>
>So vhost_dev_start() seems entirely unproblematic.
>
>The only part that changes for net is that vhost_set_vring_enable() now
>does something where it didn't do anything before. If that is a problem
>for vdpa based network code, maybe we can just special case vdpa there
>to not call vhost_ops->vhost_set_vring_enable? (Despite its name, it's a
>network specific function; it should probably be called something like
>vhost_net_set_vring_enable.)
>
>With something like the below, net shouldn't see any difference any
>more, right? (Happy to add a comment before it if you write it for me.)

+1 for this.
I proposed something similar, but I prefer your early return which is 
clearer:
https://lore.kernel.org/qemu-devel/xlk2pspyo4gwguxopm6k534nzjei5y3m6zbh2l6dagmuwpamtk@dtkgca6yppce/

Thanks,
Stefano

>
>Kevin
>
>diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
>index e8e1661646..fb6d13bd69 100644
>--- a/hw/net/vhost_net.c
>+++ b/hw/net/vhost_net.c
>@@ -543,6 +543,10 @@ int vhost_set_vring_enable(NetClientState *nc, int enable)
>
>     nc->vring_enable = enable;
>
>+    if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
>+        return 0;
>+    }
>+
>     if (vhost_ops && vhost_ops->vhost_set_vring_enable) {
>         return vhost_ops->vhost_set_vring_enable(&net->dev, enable);
>     }
>
Jason Wang Feb. 8, 2024, 7:02 a.m. UTC | #15
On Wed, Feb 7, 2024 at 4:47 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
>
> On Wed, Feb 07, 2024 at 11:17:34AM +0800, Jason Wang wrote:
> >On Tue, Feb 6, 2024 at 4:31 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
> >>
> >> On Tue, Feb 06, 2024 at 10:47:40AM +0800, Jason Wang wrote:
> >> >On Mon, Feb 5, 2024 at 6:51 PM Stefano Garzarella <sgarzare@redhat.com> wrote:
> >> >>
> >> >> On Fri, Feb 02, 2024 at 02:25:21PM +0100, Kevin Wolf wrote:
> >> >> >VDUSE requires that virtqueues are first enabled before the DRIVER_OK
> >> >> >status flag is set; with the current API of the kernel module, it is
> >> >> >impossible to enable the opposite order in our block export code because
> >> >> >userspace is not notified when a virtqueue is enabled.
> >> >
> >> >Did this mean virtio-blk will enable a virtqueue after DRIVER_OK?
> >>
> >> It's not specific to virtio-blk, but to the generic vdpa device we have
> >> in QEMU (i.e. vhost-vdpa-device). Yep, after commit
> >> 6c4825476a4351530bcac17abab72295b75ffe98, virtqueues are enabled after
> >> DRIVER_OK.
> >
> >Right.
> >
> >>
> >> >Sepc is not clear about this and that's why we introduce
> >> >VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK.
> >>
> >> Ah, I didn't know about this new feature. So after commit
> >> 6c4825476a4351530bcac17abab72295b75ffe98 the vhost-vdpa-device is not
> >> complying with the specification, right?
> >
> >Kind of, but as stated, it's just because spec is unclear about the
> >behaviour. There's a chance that spec will explicitly support it in
> >the future.
> >
> >>
> >> >
> >> >>
> >> >> Yeah, IMHO the VDUSE protocol is missing a VDUSE_SET_VQ_READY message,
> >> >
> >> >I think you meant when VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK is
> >> >negotiated.
> >>
> >> At this point yes. But if VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK is not
> >> negotiated, should we return an error in vhost-vdpa kernel module if
> >> VHOST_VDPA_SET_VRING_ENABLE is called when DRIVER_OK is already set?
> >
> >I'm not sure if this can break some setups or not. It might be better
> >to leave it as is?
>
> As I also answered in the kernel patch, IMHO we have to return an error,
> because any setups are broken anyway (see the problem we're fixing with
 is > this patch),

For VDUSE probably yes, but not for other parents. It's easy to
mandate on day 0 but might be hard for now.

For mlx5e, it supports the semantic
VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK even before the feature bit is
introduced.

And we don't do other checks like for example setting vq address, size
after DRIVER_OK.

We can hear from others.

> so at this point it's better to return an error, so they
> don't spend time figuring out why the VDUSE device doesn't work.
>
> >
> >Without VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK, we don't know if
> >parent support vq_ready after driver_ok.
>
> So we have to assume that it doesn't support it, to be more
> conservative, right?
>
> >With VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK, we know parent support
> >vq_ready after driver_ok.
> >
> >>
> >> >If this is truth, it seems a little more complicated, for
> >> >example the get_backend_features needs to be forward to the userspace?
> >>
> >> I'm not understanding, don't we already have VHOST_GET_BACKEND_FEATURES
> >> for this? Or do you mean userspace on the VDUSE side?
> >
> >Yes, since in this case the parent is in the userspace, there's no way
> >for VDUSE to know if user space supports vq_ready after driver_ok or
> >not.
> >
> >As you may have noticed, we don't have a message for vq_ready which
> >implies that vq_ready after driver_ok can't be supported.
>
> Yep, I see.
>
> >
> >>
> >> >This seems suboptimal to implement this in the spec first and then we
> >> >can leverage the features. Or we can have another parameter for the
> >> >ioctl that creates the vduse device.
> >>
> >> I got a little lost, though in vhost-user, the device can always expect
> >> a vring_enable/disable, so I thought it was not complicated in VDUSE.
> >
> >Yes, the problem is assuming we have a message for vq_ready, there
> >could be  a "legacy" userspace that doesn't support that.  So in that
> >case, VDUSE needs to know if the userspace parent can support that or
> >not.
>
> I think VDUSE needs to negotiate features (if it doesn't already) as
> vhost-user does with the backend. Also for new future functionality.

It negotiates virtio features but not vhost backend features.

Thanks
Eugenio Perez Martin Feb. 8, 2024, 5:20 p.m. UTC | #16
On Wed, Feb 7, 2024 at 11:18 AM Kevin Wolf <kwolf@redhat.com> wrote:
>
> Am 06.02.2024 um 17:44 hat Eugenio Perez Martin geschrieben:
> > On Mon, Feb 5, 2024 at 2:49 PM Kevin Wolf <kwolf@redhat.com> wrote:
> > >
> > > Am 05.02.2024 um 13:22 hat Eugenio Perez Martin geschrieben:
> > > > On Fri, Feb 2, 2024 at 2:25 PM Kevin Wolf <kwolf@redhat.com> wrote:
> > > > >
> > > > > VDUSE requires that virtqueues are first enabled before the DRIVER_OK
> > > > > status flag is set; with the current API of the kernel module, it is
> > > > > impossible to enable the opposite order in our block export code because
> > > > > userspace is not notified when a virtqueue is enabled.
> > > > >
> > > > > This requirement also mathces the normal initialisation order as done by
> > > > > the generic vhost code in QEMU. However, commit 6c482547 accidentally
> > > > > changed the order for vdpa-dev and broke access to VDUSE devices with
> > > > > this.
> > > > >
> > > > > This changes vdpa-dev to use the normal order again and use the standard
> > > > > vhost callback .vhost_set_vring_enable for this. VDUSE devices can be
> > > > > used with vdpa-dev again after this fix.
> > > > >
> > > > > Cc: qemu-stable@nongnu.org
> > > > > Fixes: 6c4825476a4351530bcac17abab72295b75ffe98
> > > > > Signed-off-by: Kevin Wolf <kwolf@redhat.com>
> > > > > ---
> > > > >  hw/virtio/vdpa-dev.c   |  5 +----
> > > > >  hw/virtio/vhost-vdpa.c | 17 +++++++++++++++++
> > > > >  2 files changed, 18 insertions(+), 4 deletions(-)
> > > > >
> > > > > diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c
> > > > > index eb9ecea83b..13e87f06f6 100644
> > > > > --- a/hw/virtio/vdpa-dev.c
> > > > > +++ b/hw/virtio/vdpa-dev.c
> > > > > @@ -253,14 +253,11 @@ static int vhost_vdpa_device_start(VirtIODevice *vdev, Error **errp)
> > > > >
> > > > >      s->dev.acked_features = vdev->guest_features;
> > > > >
> > > > > -    ret = vhost_dev_start(&s->dev, vdev, false);
> > > > > +    ret = vhost_dev_start(&s->dev, vdev, true);
> > > > >      if (ret < 0) {
> > > > >          error_setg_errno(errp, -ret, "Error starting vhost");
> > > > >          goto err_guest_notifiers;
> > > > >      }
> > > > > -    for (i = 0; i < s->dev.nvqs; ++i) {
> > > > > -        vhost_vdpa_set_vring_ready(&s->vdpa, i);
> > > > > -    }
> > > > >      s->started = true;
> > > > >
> > > > >      /*
> > > > > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> > > > > index 3a43beb312..c4574d56c5 100644
> > > > > --- a/hw/virtio/vhost-vdpa.c
> > > > > +++ b/hw/virtio/vhost-vdpa.c
> > > > > @@ -904,6 +904,22 @@ int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx)
> > > > >      return r;
> > > > >  }
> > > > >
> > > > > +static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int enable)
> > > > > +{
> > > > > +    struct vhost_vdpa *v = dev->opaque;
> > > > > +    unsigned int i;
> > > > > +    int ret;
> > > > > +
> > > > > +    for (i = 0; i < dev->nvqs; ++i) {
> > > > > +        ret = vhost_vdpa_set_vring_ready(v, i);
> > > > > +        if (ret < 0) {
> > > > > +            return ret;
> > > > > +        }
> > > > > +    }
> > > > > +
> > > > > +    return 0;
> > > > > +}
> > > > > +
> > > > >  static int vhost_vdpa_set_config_call(struct vhost_dev *dev,
> > > > >                                         int fd)
> > > > >  {
> > > > > @@ -1524,6 +1540,7 @@ const VhostOps vdpa_ops = {
> > > > >          .vhost_set_features = vhost_vdpa_set_features,
> > > > >          .vhost_reset_device = vhost_vdpa_reset_device,
> > > > >          .vhost_get_vq_index = vhost_vdpa_get_vq_index,
> > > > > +        .vhost_set_vring_enable = vhost_vdpa_set_vring_enable,
> > > > >          .vhost_get_config  = vhost_vdpa_get_config,
> > > > >          .vhost_set_config = vhost_vdpa_set_config,
> > > > >          .vhost_requires_shm_log = NULL,
> > > >
> > > > vhost-vdpa net enables CVQ before dataplane ones to configure all the
> > > > device in the destination of a live migration. To go back again to
> > > > this callback would cause the device to enable the dataplane before
> > > > virtqueues are configured again.
> > >
> > > Not that it makes a difference, but I don't think it would actually be
> > > going back. Even before your commit 6c482547, we were not making use of
> > > the generic callback but just called the function in a slightly
> > > different place (but less different than after commit 6c482547).
> > >
> > > But anyway... Why don't the other vhost backend need the same for
> > > vhost-net then? Do they just not support live migration?
> >
> > They don't support control virtqueue. More specifically, control
> > virtqueue is handled directly in QEMU.
>
> So the network device already has to special case vdpa instead of using
> the same code for all vhost backends? :-/
>
> > > I don't know the code well enough to say where the problem is, but if
> > > vhost-vdpa networking code relies on the usual vhost operations not
> > > being implemented and bypasses VhostOps to replace it, that sounds like
> > > a design problem to me.
> >
> > I don't follow this. What vhost operation is expected not to be implemented?
>
> You were concerned about implementing .vhost_set_vring_enable in
> vdpa_ops like my patch does. So it seems that the networking code
> requires that it is not implemented?
>

Kind of. vdpa_net requires that the enable calls on the dataplane
vrings are done after DRIVER_OK. So as you say in your mail, we're
good with vhost_dev_start, and changes may be focused to vhost_net.c

Vhost vdpa enable / disable semantics is just different from
vhost-user, the one that originally implemented
.vhost_set_vring_enable. That is the reason why the vhost vDPA enable
was called at vhost_vdpa_dev_start originally.

Vhost-user also calls that function to enable and disable queues. And
the vhost kernel does not even implement the vhost_op, but relies on
vhost_net_set_backend (not sure about other kinds of devices).

This patch is moving toward making them equal, which is not correct.
But maybe it is ok to do that tiny step in that direction to fix
generic vhost-vdpa generic dev and fix that later.

Just for completion, I tried to implement it using the vhost_op but it
was rejected already [1]. It was before Stefano's patch for
selectively disable vrings enable.

> On the other hand, for vhost-vdpa, the callback seems to be called in
> exactly the right place where virtqueues need to be enabled, like for
> other vhost devices.
>
> > > Maybe VhostOps needs a new operation to enable
> > > just a single virtqueue that can be used by the networking code instead?
> > >
> > > > How does VDUSE userspace knows how many queues should enable? Can't
> > > > the kernel perform the necessary actions after DRIVER_OK, like
> > > > configuring the kick etc?
> > >
> > > Not sure if I understand the question. The vdpa kernel interface always
> > > enables individual queues, so the VDUSE userspace will enable whatever
> > > queues it was asked to enable. The only restriction is that the queues
> > > need to be enabled before setting DRIVER_OK.
> > >
> > > The interface that enables all virtqueues at once seems to be just
> > > .vhost_set_vring_enable in QEMU.
> >
> > It enables all virtqueues of the same vhost device in QEMU, but QEMU
> > creates one vhost_dev per each vhost-net virtqueue pair and another
> > one for CVQ. This goes back to the time where mq vhost-kernel net
> > devices were implemented by mergin many tap devices. net/vhost-vdpa.c
> > only enables the last one, which corresponds to CVQ, and then enables
> > the rest once all messages have been received.
>
> So it's less about the granularity of .vhost_set_vring_enable, which
> would just be right, but about the order in which it is called for the
> different vhost_devs?
>
> Can we influence the order in another way than just not implementing the
> callback at all? I think net specific weirdness should be contained in
> the net implementation and not affect generic vdpa code.
>
> > On the other hand, .vhost_set_vring_enable is also used for queue
> > reset (vhost_net_virtqueue_reset and vhost_net_virtqueue_restart). In
> > other words, it is called after the set DRIVER_OK. I guess it is fine
> > for VDUSE as long as it does not offer vring reset capabilities, but
> > future wise should we start going in that direction?
>
> I don't actually know VDUSE very well, but that would probably make
> sense.
>
> Though for the moment, I just tried to simply attach a VDUSE device and
> failed, so I'm just trying to fix the regression from your commit.
>

Understandable.

> > But kernels without VHOST_BACKEND_F_ENABLE_AFTER_DRIVER_OK should be
> > supported, right. Maybe we can add vhost_vdpa_set_vring_enable and
> > call vhost_dev_start with vrings parameters conditional to the feature
> > flag? That way the caller can enable it later or just enable all the
> > rings altogether.
>
> Which specific caller do you mean here?
>
> For vdpa-dev, I don't see any problem with just passing vrings=true
> unconditionally. That is clearly the least problematic order even if
> another order may in theory be allowed by the spec.
>
> For vhost_net, I don't know. As far as I am concerned, there is no
> reason to change its call and it could continue to pass vrings=false.
>
> So vhost_dev_start() seems entirely unproblematic.
>

Yes I think the same.

> The only part that changes for net is that vhost_set_vring_enable() now
> does something where it didn't do anything before. If that is a problem
> for vdpa based network code, maybe we can just special case vdpa there
> to not call vhost_ops->vhost_set_vring_enable? (Despite its name, it's a
> network specific function; it should probably be called something like
> vhost_net_set_vring_enable.)
>
> With something like the below, net shouldn't see any difference any
> more, right? (Happy to add a comment before it if you write it for me.)
>

Early return should work but I'm not sure if that is the most
sustainable way. We should remove peer->type conditionals as much as
possible :).

To split vhost_ops would be better, yes. But these should be split
between the virtio vring enable vs vhost vring enable. The first one
does not accept a boolean, as it is only one direction. But this
option requires more changes, so I'm happy with the early return for
now. Not sure about Jason or MST.

As a draft for the comment:

Net vhost-vdpa devices need to dataplane virtqueues after DRIVER_OK,
so it can recover device state before starting dataplane. Because of
that, not enabling virtqueues here and leaving it to net/vhost-vdpa.c.
---

And maybe we can add another comment at vhost_dev_start doc? Something
in the line of:
* vring = yes means vhost_dev_start will start all vrings. vring =
false delegates vring initialization to the caller.

Does that make sense to you?

Thanks!

[1] https://lore.kernel.org/qemu-devel/0aae4d77-2c03-7ba2-8496-024b5a683449@redhat.com/

> Kevin
>
> diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> index e8e1661646..fb6d13bd69 100644
> --- a/hw/net/vhost_net.c
> +++ b/hw/net/vhost_net.c
> @@ -543,6 +543,10 @@ int vhost_set_vring_enable(NetClientState *nc, int enable)
>
>      nc->vring_enable = enable;
>
> +    if (nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
> +        return 0;
> +    }
> +
>      if (vhost_ops && vhost_ops->vhost_set_vring_enable) {
>          return vhost_ops->vhost_set_vring_enable(&net->dev, enable);
>      }
>
diff mbox series

Patch

diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c
index eb9ecea83b..13e87f06f6 100644
--- a/hw/virtio/vdpa-dev.c
+++ b/hw/virtio/vdpa-dev.c
@@ -253,14 +253,11 @@  static int vhost_vdpa_device_start(VirtIODevice *vdev, Error **errp)
 
     s->dev.acked_features = vdev->guest_features;
 
-    ret = vhost_dev_start(&s->dev, vdev, false);
+    ret = vhost_dev_start(&s->dev, vdev, true);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Error starting vhost");
         goto err_guest_notifiers;
     }
-    for (i = 0; i < s->dev.nvqs; ++i) {
-        vhost_vdpa_set_vring_ready(&s->vdpa, i);
-    }
     s->started = true;
 
     /*
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 3a43beb312..c4574d56c5 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -904,6 +904,22 @@  int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx)
     return r;
 }
 
+static int vhost_vdpa_set_vring_enable(struct vhost_dev *dev, int enable)
+{
+    struct vhost_vdpa *v = dev->opaque;
+    unsigned int i;
+    int ret;
+
+    for (i = 0; i < dev->nvqs; ++i) {
+        ret = vhost_vdpa_set_vring_ready(v, i);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    return 0;
+}
+
 static int vhost_vdpa_set_config_call(struct vhost_dev *dev,
                                        int fd)
 {
@@ -1524,6 +1540,7 @@  const VhostOps vdpa_ops = {
         .vhost_set_features = vhost_vdpa_set_features,
         .vhost_reset_device = vhost_vdpa_reset_device,
         .vhost_get_vq_index = vhost_vdpa_get_vq_index,
+        .vhost_set_vring_enable = vhost_vdpa_set_vring_enable,
         .vhost_get_config  = vhost_vdpa_get_config,
         .vhost_set_config = vhost_vdpa_set_config,
         .vhost_requires_shm_log = NULL,