diff mbox series

[RFC,v8,14/21] vhost: Make possible to check for device exclusive vq group

Message ID 20220519191306.821774-15-eperezma@redhat.com (mailing list archive)
State New, archived
Headers show
Series Net Control VQ support with asid in vDPA SVQ | expand

Commit Message

Eugenio Perez Martin May 19, 2022, 7:12 p.m. UTC
CVQ needs to be in its own group, not shared with any data vq. Enable
the checking of it here, before introducing address space id concepts.

Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
 include/hw/virtio/vhost.h |  2 +
 hw/net/vhost_net.c        |  4 +-
 hw/virtio/vhost-vdpa.c    | 79 ++++++++++++++++++++++++++++++++++++++-
 hw/virtio/trace-events    |  1 +
 4 files changed, 84 insertions(+), 2 deletions(-)

Comments

Jason Wang June 8, 2022, 4:25 a.m. UTC | #1
在 2022/5/20 03:12, Eugenio Pérez 写道:
> CVQ needs to be in its own group, not shared with any data vq. Enable
> the checking of it here, before introducing address space id concepts.
>
> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> ---
>   include/hw/virtio/vhost.h |  2 +
>   hw/net/vhost_net.c        |  4 +-
>   hw/virtio/vhost-vdpa.c    | 79 ++++++++++++++++++++++++++++++++++++++-
>   hw/virtio/trace-events    |  1 +
>   4 files changed, 84 insertions(+), 2 deletions(-)
>
> diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
> index b291fe4e24..cebec1d817 100644
> --- a/include/hw/virtio/vhost.h
> +++ b/include/hw/virtio/vhost.h
> @@ -84,6 +84,8 @@ struct vhost_dev {
>       int vq_index_end;
>       /* if non-zero, minimum required value for max_queues */
>       int num_queues;
> +    /* Must be a vq group different than any other vhost dev */
> +    bool independent_vq_group;


We probably need a better abstraction here.

E.g having a parent vhost_dev_group structure.


>       uint64_t features;
>       uint64_t acked_features;
>       uint64_t backend_features;
> diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> index ccac5b7a64..1c2386c01c 100644
> --- a/hw/net/vhost_net.c
> +++ b/hw/net/vhost_net.c
> @@ -339,14 +339,16 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
>       }
>   
>       for (i = 0; i < nvhosts; i++) {
> +        bool cvq_idx = i >= data_queue_pairs;
>   
> -        if (i < data_queue_pairs) {
> +        if (!cvq_idx) {
>               peer = qemu_get_peer(ncs, i);
>           } else { /* Control Virtqueue */
>               peer = qemu_get_peer(ncs, n->max_queue_pairs);
>           }
>   
>           net = get_vhost_net(peer);
> +        net->dev.independent_vq_group = !!cvq_idx;
>           vhost_net_set_vq_index(net, i * 2, index_end);
>   
>           /* Suppress the masking guest notifiers on vhost user
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index eec6d544e9..52dd8baa8d 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -685,7 +685,8 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
>   {
>       uint64_t features;
>       uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 |
> -        0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH;
> +        0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH |
> +        0x1ULL << VHOST_BACKEND_F_IOTLB_ASID;
>       int r;
>   
>       if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) {
> @@ -1110,6 +1111,78 @@ static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev)
>       return true;
>   }
>   
> +static int vhost_vdpa_get_vring_group(struct vhost_dev *dev,
> +                                      struct vhost_vring_state *state)
> +{
> +    int ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_VRING_GROUP, state);
> +    trace_vhost_vdpa_get_vring_group(dev, state->index, state->num);
> +    return ret;
> +}
> +
> +static bool vhost_dev_is_independent_group(struct vhost_dev *dev)
> +{
> +    struct vhost_vdpa *v = dev->opaque;
> +    struct vhost_vring_state this_vq_group = {
> +        .index = dev->vq_index,
> +    };
> +    int ret;
> +
> +    if (!(dev->backend_cap & VHOST_BACKEND_F_IOTLB_ASID)) {
> +        return true;
> +    }


This should be false?


> +
> +    if (!v->shadow_vqs_enabled) {
> +        return true;
> +    }


And here?


> +
> +    ret = vhost_vdpa_get_vring_group(dev, &this_vq_group);
> +    if (unlikely(ret)) {
> +        goto call_err;
> +    }
> +
> +    for (int i = 1; i < dev->nvqs; ++i) {
> +        struct vhost_vring_state vq_group = {
> +            .index = dev->vq_index + i,
> +        };
> +
> +        ret = vhost_vdpa_get_vring_group(dev, &vq_group);
> +        if (unlikely(ret)) {
> +            goto call_err;
> +        }
> +        if (unlikely(vq_group.num != this_vq_group.num)) {
> +            error_report("VQ %d group is different than VQ %d one",
> +                         this_vq_group.index, vq_group.index);


Not sure this is needed. The group id is not tied to vq index if I 
understand correctly.

E.g we have 1 qp with cvq, we can have

group 0 cvq

group 1 tx/rx

Thanks


> +            return false;
> +        }
> +    }
> +
> +    for (int i = 0; i < dev->vq_index_end; ++i) {
> +        struct vhost_vring_state vq_group = {
> +            .index = i,
> +        };
> +
> +        if (dev->vq_index <= i && i < dev->vq_index + dev->nvqs) {
> +            continue;
> +        }
> +
> +        ret = vhost_vdpa_get_vring_group(dev, &vq_group);
> +        if (unlikely(ret)) {
> +            goto call_err;
> +        }
> +        if (unlikely(vq_group.num == this_vq_group.num)) {
> +            error_report("VQ %d group is the same as VQ %d one",
> +                         this_vq_group.index, vq_group.index);
> +            return false;
> +        }
> +    }
> +
> +    return true;
> +
> +call_err:
> +    error_report("Can't read vq group, errno=%d (%s)", ret, g_strerror(-ret));
> +    return false;
> +}
> +
>   static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
>   {
>       struct vhost_vdpa *v = dev->opaque;
> @@ -1118,6 +1191,10 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
>   
>       if (started) {
>           vhost_vdpa_host_notifiers_init(dev);
> +        if (dev->independent_vq_group &&
> +            !vhost_dev_is_independent_group(dev)) {
> +            return -1;
> +        }
>           ok = vhost_vdpa_svqs_start(dev);
>           if (unlikely(!ok)) {
>               return -1;
> diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
> index ab8e095b73..ffb8eb26e7 100644
> --- a/hw/virtio/trace-events
> +++ b/hw/virtio/trace-events
> @@ -46,6 +46,7 @@ vhost_vdpa_set_vring_ready(void *dev) "dev: %p"
>   vhost_vdpa_dump_config(void *dev, const char *line) "dev: %p %s"
>   vhost_vdpa_set_config(void *dev, uint32_t offset, uint32_t size, uint32_t flags) "dev: %p offset: %"PRIu32" size: %"PRIu32" flags: 0x%"PRIx32
>   vhost_vdpa_get_config(void *dev, void *config, uint32_t config_len) "dev: %p config: %p config_len: %"PRIu32
> +vhost_vdpa_get_vring_group(void *dev, unsigned int index, unsigned int num) "dev: %p index: %u num: %u"
>   vhost_vdpa_dev_start(void *dev, bool started) "dev: %p started: %d"
>   vhost_vdpa_set_log_base(void *dev, uint64_t base, unsigned long long size, int refcnt, int fd, void *log) "dev: %p base: 0x%"PRIx64" size: %llu refcnt: %d fd: %d log: %p"
>   vhost_vdpa_set_vring_addr(void *dev, unsigned int index, unsigned int flags, uint64_t desc_user_addr, uint64_t used_user_addr, uint64_t avail_user_addr, uint64_t log_guest_addr) "dev: %p index: %u flags: 0x%x desc_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" log_guest_addr: 0x%"PRIx64
Eugenio Perez Martin June 8, 2022, 7:21 p.m. UTC | #2
On Wed, Jun 8, 2022 at 6:25 AM Jason Wang <jasowang@redhat.com> wrote:
>
>
> 在 2022/5/20 03:12, Eugenio Pérez 写道:
> > CVQ needs to be in its own group, not shared with any data vq. Enable
> > the checking of it here, before introducing address space id concepts.
> >
> > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > ---
> >   include/hw/virtio/vhost.h |  2 +
> >   hw/net/vhost_net.c        |  4 +-
> >   hw/virtio/vhost-vdpa.c    | 79 ++++++++++++++++++++++++++++++++++++++-
> >   hw/virtio/trace-events    |  1 +
> >   4 files changed, 84 insertions(+), 2 deletions(-)
> >
> > diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
> > index b291fe4e24..cebec1d817 100644
> > --- a/include/hw/virtio/vhost.h
> > +++ b/include/hw/virtio/vhost.h
> > @@ -84,6 +84,8 @@ struct vhost_dev {
> >       int vq_index_end;
> >       /* if non-zero, minimum required value for max_queues */
> >       int num_queues;
> > +    /* Must be a vq group different than any other vhost dev */
> > +    bool independent_vq_group;
>
>
> We probably need a better abstraction here.
>
> E.g having a parent vhost_dev_group structure.
>

I think there is room for improvement too, but to make this work we
don't need the device model to know all the other devices at this
moment. I'm open to implementing it if we decide that solution is more
maintainable or whatever other reason though.

>
> >       uint64_t features;
> >       uint64_t acked_features;
> >       uint64_t backend_features;
> > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > index ccac5b7a64..1c2386c01c 100644
> > --- a/hw/net/vhost_net.c
> > +++ b/hw/net/vhost_net.c
> > @@ -339,14 +339,16 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
> >       }
> >
> >       for (i = 0; i < nvhosts; i++) {
> > +        bool cvq_idx = i >= data_queue_pairs;
> >
> > -        if (i < data_queue_pairs) {
> > +        if (!cvq_idx) {
> >               peer = qemu_get_peer(ncs, i);
> >           } else { /* Control Virtqueue */
> >               peer = qemu_get_peer(ncs, n->max_queue_pairs);
> >           }
> >
> >           net = get_vhost_net(peer);
> > +        net->dev.independent_vq_group = !!cvq_idx;
> >           vhost_net_set_vq_index(net, i * 2, index_end);
> >
> >           /* Suppress the masking guest notifiers on vhost user
> > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> > index eec6d544e9..52dd8baa8d 100644
> > --- a/hw/virtio/vhost-vdpa.c
> > +++ b/hw/virtio/vhost-vdpa.c
> > @@ -685,7 +685,8 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
> >   {
> >       uint64_t features;
> >       uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 |
> > -        0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH;
> > +        0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH |
> > +        0x1ULL << VHOST_BACKEND_F_IOTLB_ASID;
> >       int r;
> >
> >       if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) {
> > @@ -1110,6 +1111,78 @@ static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev)
> >       return true;
> >   }
> >
> > +static int vhost_vdpa_get_vring_group(struct vhost_dev *dev,
> > +                                      struct vhost_vring_state *state)
> > +{
> > +    int ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_VRING_GROUP, state);
> > +    trace_vhost_vdpa_get_vring_group(dev, state->index, state->num);
> > +    return ret;
> > +}
> > +
> > +static bool vhost_dev_is_independent_group(struct vhost_dev *dev)
> > +{
> > +    struct vhost_vdpa *v = dev->opaque;
> > +    struct vhost_vring_state this_vq_group = {
> > +        .index = dev->vq_index,
> > +    };
> > +    int ret;
> > +
> > +    if (!(dev->backend_cap & VHOST_BACKEND_F_IOTLB_ASID)) {
> > +        return true;
> > +    }
>
>
> This should be false?
>
>
> > +
> > +    if (!v->shadow_vqs_enabled) {
> > +        return true;
> > +    }
>
>
> And here?
>

They're true so it doesn't get in the middle if the device already
knows there is no need to check vhost_dev for an independent group.

With recent mq changes, I think I can delete these checks and move
them to net/vhost-vdpa.

>
> > +
> > +    ret = vhost_vdpa_get_vring_group(dev, &this_vq_group);
> > +    if (unlikely(ret)) {
> > +        goto call_err;
> > +    }
> > +
> > +    for (int i = 1; i < dev->nvqs; ++i) {
> > +        struct vhost_vring_state vq_group = {
> > +            .index = dev->vq_index + i,
> > +        };
> > +
> > +        ret = vhost_vdpa_get_vring_group(dev, &vq_group);
> > +        if (unlikely(ret)) {
> > +            goto call_err;
> > +        }
> > +        if (unlikely(vq_group.num != this_vq_group.num)) {
> > +            error_report("VQ %d group is different than VQ %d one",
> > +                         this_vq_group.index, vq_group.index);
>
>
> Not sure this is needed. The group id is not tied to vq index if I
> understand correctly.
>
> E.g we have 1 qp with cvq, we can have
>
> group 0 cvq
>
> group 1 tx/rx
>

This function is severly undocumented, thanks for pointing out :).

It checks if the virtqueues that belong to this vhost_dev does not
share vq group with any other virtqueue in the device. We need to
check it at device startup, since cvq index changes depending on _F_MQ
negotiated.

Since we're going to check all other virtqueues, we don't need to know
other vhost_dev individually: We know the set of vqs to check is all
vqs but our vhost_dev one.

Does it make it more clear?

Thanks!

> Thanks
>
>
> > +            return false;
> > +        }
> > +    }
> > +
> > +    for (int i = 0; i < dev->vq_index_end; ++i) {
> > +        struct vhost_vring_state vq_group = {
> > +            .index = i,
> > +        };
> > +
> > +        if (dev->vq_index <= i && i < dev->vq_index + dev->nvqs) {
> > +            continue;
> > +        }
> > +
> > +        ret = vhost_vdpa_get_vring_group(dev, &vq_group);
> > +        if (unlikely(ret)) {
> > +            goto call_err;
> > +        }
> > +        if (unlikely(vq_group.num == this_vq_group.num)) {
> > +            error_report("VQ %d group is the same as VQ %d one",
> > +                         this_vq_group.index, vq_group.index);
> > +            return false;
> > +        }
> > +    }
> > +
> > +    return true;
> > +
> > +call_err:
> > +    error_report("Can't read vq group, errno=%d (%s)", ret, g_strerror(-ret));
> > +    return false;
> > +}
> > +
> >   static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
> >   {
> >       struct vhost_vdpa *v = dev->opaque;
> > @@ -1118,6 +1191,10 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
> >
> >       if (started) {
> >           vhost_vdpa_host_notifiers_init(dev);
> > +        if (dev->independent_vq_group &&
> > +            !vhost_dev_is_independent_group(dev)) {
> > +            return -1;
> > +        }
> >           ok = vhost_vdpa_svqs_start(dev);
> >           if (unlikely(!ok)) {
> >               return -1;
> > diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
> > index ab8e095b73..ffb8eb26e7 100644
> > --- a/hw/virtio/trace-events
> > +++ b/hw/virtio/trace-events
> > @@ -46,6 +46,7 @@ vhost_vdpa_set_vring_ready(void *dev) "dev: %p"
> >   vhost_vdpa_dump_config(void *dev, const char *line) "dev: %p %s"
> >   vhost_vdpa_set_config(void *dev, uint32_t offset, uint32_t size, uint32_t flags) "dev: %p offset: %"PRIu32" size: %"PRIu32" flags: 0x%"PRIx32
> >   vhost_vdpa_get_config(void *dev, void *config, uint32_t config_len) "dev: %p config: %p config_len: %"PRIu32
> > +vhost_vdpa_get_vring_group(void *dev, unsigned int index, unsigned int num) "dev: %p index: %u num: %u"
> >   vhost_vdpa_dev_start(void *dev, bool started) "dev: %p started: %d"
> >   vhost_vdpa_set_log_base(void *dev, uint64_t base, unsigned long long size, int refcnt, int fd, void *log) "dev: %p base: 0x%"PRIx64" size: %llu refcnt: %d fd: %d log: %p"
> >   vhost_vdpa_set_vring_addr(void *dev, unsigned int index, unsigned int flags, uint64_t desc_user_addr, uint64_t used_user_addr, uint64_t avail_user_addr, uint64_t log_guest_addr) "dev: %p index: %u flags: 0x%x desc_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" log_guest_addr: 0x%"PRIx64
>
Jason Wang June 9, 2022, 7:13 a.m. UTC | #3
On Thu, Jun 9, 2022 at 3:22 AM Eugenio Perez Martin <eperezma@redhat.com> wrote:
>
> On Wed, Jun 8, 2022 at 6:25 AM Jason Wang <jasowang@redhat.com> wrote:
> >
> >
> > 在 2022/5/20 03:12, Eugenio Pérez 写道:
> > > CVQ needs to be in its own group, not shared with any data vq. Enable
> > > the checking of it here, before introducing address space id concepts.
> > >
> > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > > ---
> > >   include/hw/virtio/vhost.h |  2 +
> > >   hw/net/vhost_net.c        |  4 +-
> > >   hw/virtio/vhost-vdpa.c    | 79 ++++++++++++++++++++++++++++++++++++++-
> > >   hw/virtio/trace-events    |  1 +
> > >   4 files changed, 84 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
> > > index b291fe4e24..cebec1d817 100644
> > > --- a/include/hw/virtio/vhost.h
> > > +++ b/include/hw/virtio/vhost.h
> > > @@ -84,6 +84,8 @@ struct vhost_dev {
> > >       int vq_index_end;
> > >       /* if non-zero, minimum required value for max_queues */
> > >       int num_queues;
> > > +    /* Must be a vq group different than any other vhost dev */
> > > +    bool independent_vq_group;
> >
> >
> > We probably need a better abstraction here.
> >
> > E.g having a parent vhost_dev_group structure.
> >
>
> I think there is room for improvement too, but to make this work we
> don't need the device model to know all the other devices at this
> moment. I'm open to implementing it if we decide that solution is more
> maintainable or whatever other reason though.

I see, so let's keep it as is and do the enhancement in the future.

>
> >
> > >       uint64_t features;
> > >       uint64_t acked_features;
> > >       uint64_t backend_features;
> > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > > index ccac5b7a64..1c2386c01c 100644
> > > --- a/hw/net/vhost_net.c
> > > +++ b/hw/net/vhost_net.c
> > > @@ -339,14 +339,16 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
> > >       }
> > >
> > >       for (i = 0; i < nvhosts; i++) {
> > > +        bool cvq_idx = i >= data_queue_pairs;
> > >
> > > -        if (i < data_queue_pairs) {
> > > +        if (!cvq_idx) {
> > >               peer = qemu_get_peer(ncs, i);
> > >           } else { /* Control Virtqueue */
> > >               peer = qemu_get_peer(ncs, n->max_queue_pairs);
> > >           }
> > >
> > >           net = get_vhost_net(peer);
> > > +        net->dev.independent_vq_group = !!cvq_idx;
> > >           vhost_net_set_vq_index(net, i * 2, index_end);
> > >
> > >           /* Suppress the masking guest notifiers on vhost user
> > > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> > > index eec6d544e9..52dd8baa8d 100644
> > > --- a/hw/virtio/vhost-vdpa.c
> > > +++ b/hw/virtio/vhost-vdpa.c
> > > @@ -685,7 +685,8 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
> > >   {
> > >       uint64_t features;
> > >       uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 |
> > > -        0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH;
> > > +        0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH |
> > > +        0x1ULL << VHOST_BACKEND_F_IOTLB_ASID;
> > >       int r;
> > >
> > >       if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) {
> > > @@ -1110,6 +1111,78 @@ static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev)
> > >       return true;
> > >   }
> > >
> > > +static int vhost_vdpa_get_vring_group(struct vhost_dev *dev,
> > > +                                      struct vhost_vring_state *state)
> > > +{
> > > +    int ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_VRING_GROUP, state);
> > > +    trace_vhost_vdpa_get_vring_group(dev, state->index, state->num);
> > > +    return ret;
> > > +}
> > > +
> > > +static bool vhost_dev_is_independent_group(struct vhost_dev *dev)
> > > +{
> > > +    struct vhost_vdpa *v = dev->opaque;
> > > +    struct vhost_vring_state this_vq_group = {
> > > +        .index = dev->vq_index,
> > > +    };
> > > +    int ret;
> > > +
> > > +    if (!(dev->backend_cap & VHOST_BACKEND_F_IOTLB_ASID)) {
> > > +        return true;
> > > +    }
> >
> >
> > This should be false?
> >
> >
> > > +
> > > +    if (!v->shadow_vqs_enabled) {
> > > +        return true;
> > > +    }
> >
> >
> > And here?
> >
>
> They're true so it doesn't get in the middle if the device already
> knows there is no need to check vhost_dev for an independent group.

I'm not sure I understand this.

Without ASID but with MQ, we know all vhost_devs are not independent.

>
> With recent mq changes, I think I can delete these checks and move
> them to net/vhost-vdpa.
>
> >
> > > +
> > > +    ret = vhost_vdpa_get_vring_group(dev, &this_vq_group);
> > > +    if (unlikely(ret)) {
> > > +        goto call_err;
> > > +    }
> > > +
> > > +    for (int i = 1; i < dev->nvqs; ++i) {
> > > +        struct vhost_vring_state vq_group = {
> > > +            .index = dev->vq_index + i,
> > > +        };
> > > +
> > > +        ret = vhost_vdpa_get_vring_group(dev, &vq_group);
> > > +        if (unlikely(ret)) {
> > > +            goto call_err;
> > > +        }
> > > +        if (unlikely(vq_group.num != this_vq_group.num)) {
> > > +            error_report("VQ %d group is different than VQ %d one",
> > > +                         this_vq_group.index, vq_group.index);
> >
> >
> > Not sure this is needed. The group id is not tied to vq index if I
> > understand correctly.
> >
> > E.g we have 1 qp with cvq, we can have
> >
> > group 0 cvq
> >
> > group 1 tx/rx
> >
>
> This function is severly undocumented, thanks for pointing out :).
>
> It checks if the virtqueues that belong to this vhost_dev does not
> share vq group with any other virtqueue in the device. We need to
> check it at device startup, since cvq index changes depending on _F_MQ
> negotiated.
>
> Since we're going to check all other virtqueues, we don't need to know
> other vhost_dev individually: We know the set of vqs to check is all
> vqs but our vhost_dev one.
>
> Does it make it more clear?

Kind of, but

We make independent_vq_group to be true for cvq unconditionally, and
check other vhost_dev during start. This seems less optimal. Any
reason we do all of the logic during net_inti_vhost_vdpa()?

E.g we can get and set up the group asid stuff once there instead of
on each stop/start.

Thanks

>
> Thanks!
>
> > Thanks
> >
> >
> > > +            return false;
> > > +        }
> > > +    }
> > > +
> > > +    for (int i = 0; i < dev->vq_index_end; ++i) {
> > > +        struct vhost_vring_state vq_group = {
> > > +            .index = i,
> > > +        };
> > > +
> > > +        if (dev->vq_index <= i && i < dev->vq_index + dev->nvqs) {
> > > +            continue;
> > > +        }
> > > +
> > > +        ret = vhost_vdpa_get_vring_group(dev, &vq_group);
> > > +        if (unlikely(ret)) {
> > > +            goto call_err;
> > > +        }
> > > +        if (unlikely(vq_group.num == this_vq_group.num)) {
> > > +            error_report("VQ %d group is the same as VQ %d one",
> > > +                         this_vq_group.index, vq_group.index);
> > > +            return false;
> > > +        }
> > > +    }
> > > +
> > > +    return true;
> > > +
> > > +call_err:
> > > +    error_report("Can't read vq group, errno=%d (%s)", ret, g_strerror(-ret));
> > > +    return false;
> > > +}
> > > +
> > >   static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
> > >   {
> > >       struct vhost_vdpa *v = dev->opaque;
> > > @@ -1118,6 +1191,10 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
> > >
> > >       if (started) {
> > >           vhost_vdpa_host_notifiers_init(dev);
> > > +        if (dev->independent_vq_group &&
> > > +            !vhost_dev_is_independent_group(dev)) {
> > > +            return -1;
> > > +        }
> > >           ok = vhost_vdpa_svqs_start(dev);
> > >           if (unlikely(!ok)) {
> > >               return -1;
> > > diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
> > > index ab8e095b73..ffb8eb26e7 100644
> > > --- a/hw/virtio/trace-events
> > > +++ b/hw/virtio/trace-events
> > > @@ -46,6 +46,7 @@ vhost_vdpa_set_vring_ready(void *dev) "dev: %p"
> > >   vhost_vdpa_dump_config(void *dev, const char *line) "dev: %p %s"
> > >   vhost_vdpa_set_config(void *dev, uint32_t offset, uint32_t size, uint32_t flags) "dev: %p offset: %"PRIu32" size: %"PRIu32" flags: 0x%"PRIx32
> > >   vhost_vdpa_get_config(void *dev, void *config, uint32_t config_len) "dev: %p config: %p config_len: %"PRIu32
> > > +vhost_vdpa_get_vring_group(void *dev, unsigned int index, unsigned int num) "dev: %p index: %u num: %u"
> > >   vhost_vdpa_dev_start(void *dev, bool started) "dev: %p started: %d"
> > >   vhost_vdpa_set_log_base(void *dev, uint64_t base, unsigned long long size, int refcnt, int fd, void *log) "dev: %p base: 0x%"PRIx64" size: %llu refcnt: %d fd: %d log: %p"
> > >   vhost_vdpa_set_vring_addr(void *dev, unsigned int index, unsigned int flags, uint64_t desc_user_addr, uint64_t used_user_addr, uint64_t avail_user_addr, uint64_t log_guest_addr) "dev: %p index: %u flags: 0x%x desc_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" log_guest_addr: 0x%"PRIx64
> >
>
Eugenio Perez Martin June 9, 2022, 7:51 a.m. UTC | #4
On Thu, Jun 9, 2022 at 9:13 AM Jason Wang <jasowang@redhat.com> wrote:
>
> On Thu, Jun 9, 2022 at 3:22 AM Eugenio Perez Martin <eperezma@redhat.com> wrote:
> >
> > On Wed, Jun 8, 2022 at 6:25 AM Jason Wang <jasowang@redhat.com> wrote:
> > >
> > >
> > > 在 2022/5/20 03:12, Eugenio Pérez 写道:
> > > > CVQ needs to be in its own group, not shared with any data vq. Enable
> > > > the checking of it here, before introducing address space id concepts.
> > > >
> > > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > > > ---
> > > >   include/hw/virtio/vhost.h |  2 +
> > > >   hw/net/vhost_net.c        |  4 +-
> > > >   hw/virtio/vhost-vdpa.c    | 79 ++++++++++++++++++++++++++++++++++++++-
> > > >   hw/virtio/trace-events    |  1 +
> > > >   4 files changed, 84 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
> > > > index b291fe4e24..cebec1d817 100644
> > > > --- a/include/hw/virtio/vhost.h
> > > > +++ b/include/hw/virtio/vhost.h
> > > > @@ -84,6 +84,8 @@ struct vhost_dev {
> > > >       int vq_index_end;
> > > >       /* if non-zero, minimum required value for max_queues */
> > > >       int num_queues;
> > > > +    /* Must be a vq group different than any other vhost dev */
> > > > +    bool independent_vq_group;
> > >
> > >
> > > We probably need a better abstraction here.
> > >
> > > E.g having a parent vhost_dev_group structure.
> > >
> >
> > I think there is room for improvement too, but to make this work we
> > don't need the device model to know all the other devices at this
> > moment. I'm open to implementing it if we decide that solution is more
> > maintainable or whatever other reason though.
>
> I see, so let's keep it as is and do the enhancement in the future.
>
> >
> > >
> > > >       uint64_t features;
> > > >       uint64_t acked_features;
> > > >       uint64_t backend_features;
> > > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > > > index ccac5b7a64..1c2386c01c 100644
> > > > --- a/hw/net/vhost_net.c
> > > > +++ b/hw/net/vhost_net.c
> > > > @@ -339,14 +339,16 @@ int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
> > > >       }
> > > >
> > > >       for (i = 0; i < nvhosts; i++) {
> > > > +        bool cvq_idx = i >= data_queue_pairs;
> > > >
> > > > -        if (i < data_queue_pairs) {
> > > > +        if (!cvq_idx) {
> > > >               peer = qemu_get_peer(ncs, i);
> > > >           } else { /* Control Virtqueue */
> > > >               peer = qemu_get_peer(ncs, n->max_queue_pairs);
> > > >           }
> > > >
> > > >           net = get_vhost_net(peer);
> > > > +        net->dev.independent_vq_group = !!cvq_idx;
> > > >           vhost_net_set_vq_index(net, i * 2, index_end);
> > > >
> > > >           /* Suppress the masking guest notifiers on vhost user
> > > > diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> > > > index eec6d544e9..52dd8baa8d 100644
> > > > --- a/hw/virtio/vhost-vdpa.c
> > > > +++ b/hw/virtio/vhost-vdpa.c
> > > > @@ -685,7 +685,8 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
> > > >   {
> > > >       uint64_t features;
> > > >       uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 |
> > > > -        0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH;
> > > > +        0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH |
> > > > +        0x1ULL << VHOST_BACKEND_F_IOTLB_ASID;
> > > >       int r;
> > > >
> > > >       if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) {
> > > > @@ -1110,6 +1111,78 @@ static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev)
> > > >       return true;
> > > >   }
> > > >
> > > > +static int vhost_vdpa_get_vring_group(struct vhost_dev *dev,
> > > > +                                      struct vhost_vring_state *state)
> > > > +{
> > > > +    int ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_VRING_GROUP, state);
> > > > +    trace_vhost_vdpa_get_vring_group(dev, state->index, state->num);
> > > > +    return ret;
> > > > +}
> > > > +
> > > > +static bool vhost_dev_is_independent_group(struct vhost_dev *dev)
> > > > +{
> > > > +    struct vhost_vdpa *v = dev->opaque;
> > > > +    struct vhost_vring_state this_vq_group = {
> > > > +        .index = dev->vq_index,
> > > > +    };
> > > > +    int ret;
> > > > +
> > > > +    if (!(dev->backend_cap & VHOST_BACKEND_F_IOTLB_ASID)) {
> > > > +        return true;
> > > > +    }
> > >
> > >
> > > This should be false?
> > >
> > >
> > > > +
> > > > +    if (!v->shadow_vqs_enabled) {
> > > > +        return true;
> > > > +    }
> > >
> > >
> > > And here?
> > >
> >
> > They're true so it doesn't get in the middle if the device already
> > knows there is no need to check vhost_dev for an independent group.
>
> I'm not sure I understand this.
>
> Without ASID but with MQ, we know all vhost_devs are not independent.
>

I think we can move this discussion to another level: What is the
right action if the device exposes MQ but cannot set a different asid
for cvq for whatever reason?

a. To forbid migration (migration_blocker). This maintains retro
compatibility but we could migrate to a VMM that does not support it,
preventing the VM to be migrated again forever. This is the method I'm
implementing for the next version but we can decide otherwise for
sure.
b. To fail device initialization: This makes new versions of qemu to
fail with old devices, but prevents the migration locking problem of
a.

Note that I think we should treat all these cases the same: The vdpa
device not offer _F_ASID, the device does not have >1asid, the cvq
does not belong to an independent group, the ioctl to set vq group
asid does not succeed... I think that treating them differently forces
us to fall into the two drawbacks of 1. and 2. at the same time. More
on this below.

> >
> > With recent mq changes, I think I can delete these checks and move
> > them to net/vhost-vdpa.
> >
> > >
> > > > +
> > > > +    ret = vhost_vdpa_get_vring_group(dev, &this_vq_group);
> > > > +    if (unlikely(ret)) {
> > > > +        goto call_err;
> > > > +    }
> > > > +
> > > > +    for (int i = 1; i < dev->nvqs; ++i) {
> > > > +        struct vhost_vring_state vq_group = {
> > > > +            .index = dev->vq_index + i,
> > > > +        };
> > > > +
> > > > +        ret = vhost_vdpa_get_vring_group(dev, &vq_group);
> > > > +        if (unlikely(ret)) {
> > > > +            goto call_err;
> > > > +        }
> > > > +        if (unlikely(vq_group.num != this_vq_group.num)) {
> > > > +            error_report("VQ %d group is different than VQ %d one",
> > > > +                         this_vq_group.index, vq_group.index);
> > >
> > >
> > > Not sure this is needed. The group id is not tied to vq index if I
> > > understand correctly.
> > >
> > > E.g we have 1 qp with cvq, we can have
> > >
> > > group 0 cvq
> > >
> > > group 1 tx/rx
> > >
> >
> > This function is severly undocumented, thanks for pointing out :).
> >
> > It checks if the virtqueues that belong to this vhost_dev does not
> > share vq group with any other virtqueue in the device. We need to
> > check it at device startup, since cvq index changes depending on _F_MQ
> > negotiated.
> >
> > Since we're going to check all other virtqueues, we don't need to know
> > other vhost_dev individually: We know the set of vqs to check is all
> > vqs but our vhost_dev one.
> >
> > Does it make it more clear?
>
> Kind of, but
>
> We make independent_vq_group to be true for cvq unconditionally, and
> check other vhost_dev during start. This seems less optimal. Any
> reason we do all of the logic during net_inti_vhost_vdpa()?
>
> E.g we can get and set up the group asid stuff once there instead of
> on each stop/start.
>

There are a few reasons:
* vq groups ASID is reset every time the device is reset.
* If the guest does not acknowledge features, we don't know the cvq
index, so we cannot check the vq group of the cvq. I think the device
should even return -EINVAL in this case,
* Assuming we can circumvent the last two points, it does not make
sense to forbid migration of a guest because VMM cannot inspect CVQ as
long as the guest does not negotiate CVQ.

Thanks!

> Thanks
>
> >
> > Thanks!
> >
> > > Thanks
> > >
> > >
> > > > +            return false;
> > > > +        }
> > > > +    }
> > > > +
> > > > +    for (int i = 0; i < dev->vq_index_end; ++i) {
> > > > +        struct vhost_vring_state vq_group = {
> > > > +            .index = i,
> > > > +        };
> > > > +
> > > > +        if (dev->vq_index <= i && i < dev->vq_index + dev->nvqs) {
> > > > +            continue;
> > > > +        }
> > > > +
> > > > +        ret = vhost_vdpa_get_vring_group(dev, &vq_group);
> > > > +        if (unlikely(ret)) {
> > > > +            goto call_err;
> > > > +        }
> > > > +        if (unlikely(vq_group.num == this_vq_group.num)) {
> > > > +            error_report("VQ %d group is the same as VQ %d one",
> > > > +                         this_vq_group.index, vq_group.index);
> > > > +            return false;
> > > > +        }
> > > > +    }
> > > > +
> > > > +    return true;
> > > > +
> > > > +call_err:
> > > > +    error_report("Can't read vq group, errno=%d (%s)", ret, g_strerror(-ret));
> > > > +    return false;
> > > > +}
> > > > +
> > > >   static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
> > > >   {
> > > >       struct vhost_vdpa *v = dev->opaque;
> > > > @@ -1118,6 +1191,10 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
> > > >
> > > >       if (started) {
> > > >           vhost_vdpa_host_notifiers_init(dev);
> > > > +        if (dev->independent_vq_group &&
> > > > +            !vhost_dev_is_independent_group(dev)) {
> > > > +            return -1;
> > > > +        }
> > > >           ok = vhost_vdpa_svqs_start(dev);
> > > >           if (unlikely(!ok)) {
> > > >               return -1;
> > > > diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
> > > > index ab8e095b73..ffb8eb26e7 100644
> > > > --- a/hw/virtio/trace-events
> > > > +++ b/hw/virtio/trace-events
> > > > @@ -46,6 +46,7 @@ vhost_vdpa_set_vring_ready(void *dev) "dev: %p"
> > > >   vhost_vdpa_dump_config(void *dev, const char *line) "dev: %p %s"
> > > >   vhost_vdpa_set_config(void *dev, uint32_t offset, uint32_t size, uint32_t flags) "dev: %p offset: %"PRIu32" size: %"PRIu32" flags: 0x%"PRIx32
> > > >   vhost_vdpa_get_config(void *dev, void *config, uint32_t config_len) "dev: %p config: %p config_len: %"PRIu32
> > > > +vhost_vdpa_get_vring_group(void *dev, unsigned int index, unsigned int num) "dev: %p index: %u num: %u"
> > > >   vhost_vdpa_dev_start(void *dev, bool started) "dev: %p started: %d"
> > > >   vhost_vdpa_set_log_base(void *dev, uint64_t base, unsigned long long size, int refcnt, int fd, void *log) "dev: %p base: 0x%"PRIx64" size: %llu refcnt: %d fd: %d log: %p"
> > > >   vhost_vdpa_set_vring_addr(void *dev, unsigned int index, unsigned int flags, uint64_t desc_user_addr, uint64_t used_user_addr, uint64_t avail_user_addr, uint64_t log_guest_addr) "dev: %p index: %u flags: 0x%x desc_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" log_guest_addr: 0x%"PRIx64
> > >
> >
>
diff mbox series

Patch

diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h
index b291fe4e24..cebec1d817 100644
--- a/include/hw/virtio/vhost.h
+++ b/include/hw/virtio/vhost.h
@@ -84,6 +84,8 @@  struct vhost_dev {
     int vq_index_end;
     /* if non-zero, minimum required value for max_queues */
     int num_queues;
+    /* Must be a vq group different than any other vhost dev */
+    bool independent_vq_group;
     uint64_t features;
     uint64_t acked_features;
     uint64_t backend_features;
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index ccac5b7a64..1c2386c01c 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -339,14 +339,16 @@  int vhost_net_start(VirtIODevice *dev, NetClientState *ncs,
     }
 
     for (i = 0; i < nvhosts; i++) {
+        bool cvq_idx = i >= data_queue_pairs;
 
-        if (i < data_queue_pairs) {
+        if (!cvq_idx) {
             peer = qemu_get_peer(ncs, i);
         } else { /* Control Virtqueue */
             peer = qemu_get_peer(ncs, n->max_queue_pairs);
         }
 
         net = get_vhost_net(peer);
+        net->dev.independent_vq_group = !!cvq_idx;
         vhost_net_set_vq_index(net, i * 2, index_end);
 
         /* Suppress the masking guest notifiers on vhost user
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index eec6d544e9..52dd8baa8d 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -685,7 +685,8 @@  static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev)
 {
     uint64_t features;
     uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 |
-        0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH;
+        0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH |
+        0x1ULL << VHOST_BACKEND_F_IOTLB_ASID;
     int r;
 
     if (vhost_vdpa_call(dev, VHOST_GET_BACKEND_FEATURES, &features)) {
@@ -1110,6 +1111,78 @@  static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev)
     return true;
 }
 
+static int vhost_vdpa_get_vring_group(struct vhost_dev *dev,
+                                      struct vhost_vring_state *state)
+{
+    int ret = vhost_vdpa_call(dev, VHOST_VDPA_GET_VRING_GROUP, state);
+    trace_vhost_vdpa_get_vring_group(dev, state->index, state->num);
+    return ret;
+}
+
+static bool vhost_dev_is_independent_group(struct vhost_dev *dev)
+{
+    struct vhost_vdpa *v = dev->opaque;
+    struct vhost_vring_state this_vq_group = {
+        .index = dev->vq_index,
+    };
+    int ret;
+
+    if (!(dev->backend_cap & VHOST_BACKEND_F_IOTLB_ASID)) {
+        return true;
+    }
+
+    if (!v->shadow_vqs_enabled) {
+        return true;
+    }
+
+    ret = vhost_vdpa_get_vring_group(dev, &this_vq_group);
+    if (unlikely(ret)) {
+        goto call_err;
+    }
+
+    for (int i = 1; i < dev->nvqs; ++i) {
+        struct vhost_vring_state vq_group = {
+            .index = dev->vq_index + i,
+        };
+
+        ret = vhost_vdpa_get_vring_group(dev, &vq_group);
+        if (unlikely(ret)) {
+            goto call_err;
+        }
+        if (unlikely(vq_group.num != this_vq_group.num)) {
+            error_report("VQ %d group is different than VQ %d one",
+                         this_vq_group.index, vq_group.index);
+            return false;
+        }
+    }
+
+    for (int i = 0; i < dev->vq_index_end; ++i) {
+        struct vhost_vring_state vq_group = {
+            .index = i,
+        };
+
+        if (dev->vq_index <= i && i < dev->vq_index + dev->nvqs) {
+            continue;
+        }
+
+        ret = vhost_vdpa_get_vring_group(dev, &vq_group);
+        if (unlikely(ret)) {
+            goto call_err;
+        }
+        if (unlikely(vq_group.num == this_vq_group.num)) {
+            error_report("VQ %d group is the same as VQ %d one",
+                         this_vq_group.index, vq_group.index);
+            return false;
+        }
+    }
+
+    return true;
+
+call_err:
+    error_report("Can't read vq group, errno=%d (%s)", ret, g_strerror(-ret));
+    return false;
+}
+
 static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
 {
     struct vhost_vdpa *v = dev->opaque;
@@ -1118,6 +1191,10 @@  static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
 
     if (started) {
         vhost_vdpa_host_notifiers_init(dev);
+        if (dev->independent_vq_group &&
+            !vhost_dev_is_independent_group(dev)) {
+            return -1;
+        }
         ok = vhost_vdpa_svqs_start(dev);
         if (unlikely(!ok)) {
             return -1;
diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events
index ab8e095b73..ffb8eb26e7 100644
--- a/hw/virtio/trace-events
+++ b/hw/virtio/trace-events
@@ -46,6 +46,7 @@  vhost_vdpa_set_vring_ready(void *dev) "dev: %p"
 vhost_vdpa_dump_config(void *dev, const char *line) "dev: %p %s"
 vhost_vdpa_set_config(void *dev, uint32_t offset, uint32_t size, uint32_t flags) "dev: %p offset: %"PRIu32" size: %"PRIu32" flags: 0x%"PRIx32
 vhost_vdpa_get_config(void *dev, void *config, uint32_t config_len) "dev: %p config: %p config_len: %"PRIu32
+vhost_vdpa_get_vring_group(void *dev, unsigned int index, unsigned int num) "dev: %p index: %u num: %u"
 vhost_vdpa_dev_start(void *dev, bool started) "dev: %p started: %d"
 vhost_vdpa_set_log_base(void *dev, uint64_t base, unsigned long long size, int refcnt, int fd, void *log) "dev: %p base: 0x%"PRIx64" size: %llu refcnt: %d fd: %d log: %p"
 vhost_vdpa_set_vring_addr(void *dev, unsigned int index, unsigned int flags, uint64_t desc_user_addr, uint64_t used_user_addr, uint64_t avail_user_addr, uint64_t log_guest_addr) "dev: %p index: %u flags: 0x%x desc_user_addr: 0x%"PRIx64" used_user_addr: 0x%"PRIx64" avail_user_addr: 0x%"PRIx64" log_guest_addr: 0x%"PRIx64