diff mbox series

[3/3] vdpa: Expose VIRTIO_NET_F_STATUS unconditionally

Message ID 20221026095303.37907-4-eperezma@redhat.com (mailing list archive)
State New, archived
Headers show
Series Emulate status feature in vhost-vdpa net | expand

Commit Message

Eugenio Perez Martin Oct. 26, 2022, 9:53 a.m. UTC
Now that qemu can handle and emulate it if the vdpa backend does not
support it we can offer it always.

Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
 include/net/vhost-vdpa.h |  1 +
 hw/net/vhost_net.c       | 16 ++++++++++++++--
 net/vhost-vdpa.c         |  3 +++
 3 files changed, 18 insertions(+), 2 deletions(-)

Comments

Jason Wang Oct. 27, 2022, 4:31 a.m. UTC | #1
在 2022/10/26 17:53, Eugenio Pérez 写道:
> Now that qemu can handle and emulate it if the vdpa backend does not
> support it we can offer it always.
>
> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>


I may miss something but isn't more easier to simply remove the 
_F_STATUS from vdpa_feature_bits[]?

Thanks


> ---
>   include/net/vhost-vdpa.h |  1 +
>   hw/net/vhost_net.c       | 16 ++++++++++++++--
>   net/vhost-vdpa.c         |  3 +++
>   3 files changed, 18 insertions(+), 2 deletions(-)
>
> diff --git a/include/net/vhost-vdpa.h b/include/net/vhost-vdpa.h
> index b81f9a6f2a..cfbcce6427 100644
> --- a/include/net/vhost-vdpa.h
> +++ b/include/net/vhost-vdpa.h
> @@ -17,5 +17,6 @@
>   struct vhost_net *vhost_vdpa_get_vhost_net(NetClientState *nc);
>   
>   extern const int vdpa_feature_bits[];
> +extern const uint64_t vhost_vdpa_net_added_feature_bits;
>   
>   #endif /* VHOST_VDPA_H */
> diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> index d28f8b974b..7c15cc6e8f 100644
> --- a/hw/net/vhost_net.c
> +++ b/hw/net/vhost_net.c
> @@ -109,10 +109,22 @@ static const int *vhost_net_get_feature_bits(struct vhost_net *net)
>       return feature_bits;
>   }
>   
> +static uint64_t vhost_net_add_feature_bits(struct vhost_net *net)
> +{
> +    if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
> +        return vhost_vdpa_net_added_feature_bits;
> +    }
> +
> +    return 0;
> +}
> +
>   uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
>   {
> -    return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net),
> -            features);
> +    uint64_t ret = vhost_get_features(&net->dev,
> +                                      vhost_net_get_feature_bits(net),
> +                                      features);
> +
> +    return ret | vhost_net_add_feature_bits(net);
>   }
>   int vhost_net_get_config(struct vhost_net *net,  uint8_t *config,
>                            uint32_t config_len)
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index 6d64000202..24d2857593 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -99,6 +99,9 @@ static const uint64_t vdpa_svq_device_features =
>       BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
>       BIT_ULL(VIRTIO_NET_F_STANDBY);
>   
> +const uint64_t vhost_vdpa_net_added_feature_bits =
> +    BIT_ULL(VIRTIO_NET_F_STATUS);
> +
>   VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
>   {
>       VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
Eugenio Perez Martin Oct. 27, 2022, 6:46 a.m. UTC | #2
On Thu, Oct 27, 2022 at 6:32 AM Jason Wang <jasowang@redhat.com> wrote:
>
>
> 在 2022/10/26 17:53, Eugenio Pérez 写道:
> > Now that qemu can handle and emulate it if the vdpa backend does not
> > support it we can offer it always.
> >
> > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
>
>
> I may miss something but isn't more easier to simply remove the
> _F_STATUS from vdpa_feature_bits[]?
>

How is that? if we remove it, the guest cannot ack it so it cannot
access the net status, isn't it?

The goal with this patch series is to let the guest access the status
always, even if the device doesn't support _F_STATUS.

> Thanks
>
>
> > ---
> >   include/net/vhost-vdpa.h |  1 +
> >   hw/net/vhost_net.c       | 16 ++++++++++++++--
> >   net/vhost-vdpa.c         |  3 +++
> >   3 files changed, 18 insertions(+), 2 deletions(-)
> >
> > diff --git a/include/net/vhost-vdpa.h b/include/net/vhost-vdpa.h
> > index b81f9a6f2a..cfbcce6427 100644
> > --- a/include/net/vhost-vdpa.h
> > +++ b/include/net/vhost-vdpa.h
> > @@ -17,5 +17,6 @@
> >   struct vhost_net *vhost_vdpa_get_vhost_net(NetClientState *nc);
> >
> >   extern const int vdpa_feature_bits[];
> > +extern const uint64_t vhost_vdpa_net_added_feature_bits;
> >
> >   #endif /* VHOST_VDPA_H */
> > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > index d28f8b974b..7c15cc6e8f 100644
> > --- a/hw/net/vhost_net.c
> > +++ b/hw/net/vhost_net.c
> > @@ -109,10 +109,22 @@ static const int *vhost_net_get_feature_bits(struct vhost_net *net)
> >       return feature_bits;
> >   }
> >
> > +static uint64_t vhost_net_add_feature_bits(struct vhost_net *net)
> > +{
> > +    if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
> > +        return vhost_vdpa_net_added_feature_bits;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> >   uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
> >   {
> > -    return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net),
> > -            features);
> > +    uint64_t ret = vhost_get_features(&net->dev,
> > +                                      vhost_net_get_feature_bits(net),
> > +                                      features);
> > +
> > +    return ret | vhost_net_add_feature_bits(net);
> >   }
> >   int vhost_net_get_config(struct vhost_net *net,  uint8_t *config,
> >                            uint32_t config_len)
> > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > index 6d64000202..24d2857593 100644
> > --- a/net/vhost-vdpa.c
> > +++ b/net/vhost-vdpa.c
> > @@ -99,6 +99,9 @@ static const uint64_t vdpa_svq_device_features =
> >       BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
> >       BIT_ULL(VIRTIO_NET_F_STANDBY);
> >
> > +const uint64_t vhost_vdpa_net_added_feature_bits =
> > +    BIT_ULL(VIRTIO_NET_F_STATUS);
> > +
> >   VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
> >   {
> >       VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
>
Jason Wang Oct. 27, 2022, 6:54 a.m. UTC | #3
On Thu, Oct 27, 2022 at 2:47 PM Eugenio Perez Martin
<eperezma@redhat.com> wrote:
>
> On Thu, Oct 27, 2022 at 6:32 AM Jason Wang <jasowang@redhat.com> wrote:
> >
> >
> > 在 2022/10/26 17:53, Eugenio Pérez 写道:
> > > Now that qemu can handle and emulate it if the vdpa backend does not
> > > support it we can offer it always.
> > >
> > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> >
> >
> > I may miss something but isn't more easier to simply remove the
> > _F_STATUS from vdpa_feature_bits[]?
> >
>
> How is that? if we remove it, the guest cannot ack it so it cannot
> access the net status, isn't it?

My understanding is that the bits stored in the vdpa_feature_bits[]
are the features that must be explicitly supported by the vhost
device. So if we remove _F_STATUS, Qemu vhost code won't validate if
vhost-vdpa device has this support:

uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits,
                            uint64_t features)
{
    const int *bit = feature_bits;
    while (*bit != VHOST_INVALID_FEATURE_BIT) {
        uint64_t bit_mask = (1ULL << *bit);
        if (!(hdev->features & bit_mask)) {
            features &= ~bit_mask;
        }
        bit++;
    }
    return features;
}

Thanks



>
> The goal with this patch series is to let the guest access the status
> always, even if the device doesn't support _F_STATUS.
>
> > Thanks
> >
> >
> > > ---
> > >   include/net/vhost-vdpa.h |  1 +
> > >   hw/net/vhost_net.c       | 16 ++++++++++++++--
> > >   net/vhost-vdpa.c         |  3 +++
> > >   3 files changed, 18 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/include/net/vhost-vdpa.h b/include/net/vhost-vdpa.h
> > > index b81f9a6f2a..cfbcce6427 100644
> > > --- a/include/net/vhost-vdpa.h
> > > +++ b/include/net/vhost-vdpa.h
> > > @@ -17,5 +17,6 @@
> > >   struct vhost_net *vhost_vdpa_get_vhost_net(NetClientState *nc);
> > >
> > >   extern const int vdpa_feature_bits[];
> > > +extern const uint64_t vhost_vdpa_net_added_feature_bits;
> > >
> > >   #endif /* VHOST_VDPA_H */
> > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > > index d28f8b974b..7c15cc6e8f 100644
> > > --- a/hw/net/vhost_net.c
> > > +++ b/hw/net/vhost_net.c
> > > @@ -109,10 +109,22 @@ static const int *vhost_net_get_feature_bits(struct vhost_net *net)
> > >       return feature_bits;
> > >   }
> > >
> > > +static uint64_t vhost_net_add_feature_bits(struct vhost_net *net)
> > > +{
> > > +    if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
> > > +        return vhost_vdpa_net_added_feature_bits;
> > > +    }
> > > +
> > > +    return 0;
> > > +}
> > > +
> > >   uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
> > >   {
> > > -    return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net),
> > > -            features);
> > > +    uint64_t ret = vhost_get_features(&net->dev,
> > > +                                      vhost_net_get_feature_bits(net),
> > > +                                      features);
> > > +
> > > +    return ret | vhost_net_add_feature_bits(net);
> > >   }
> > >   int vhost_net_get_config(struct vhost_net *net,  uint8_t *config,
> > >                            uint32_t config_len)
> > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > > index 6d64000202..24d2857593 100644
> > > --- a/net/vhost-vdpa.c
> > > +++ b/net/vhost-vdpa.c
> > > @@ -99,6 +99,9 @@ static const uint64_t vdpa_svq_device_features =
> > >       BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
> > >       BIT_ULL(VIRTIO_NET_F_STANDBY);
> > >
> > > +const uint64_t vhost_vdpa_net_added_feature_bits =
> > > +    BIT_ULL(VIRTIO_NET_F_STATUS);
> > > +
> > >   VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
> > >   {
> > >       VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> >
>
Eugenio Perez Martin Oct. 27, 2022, 10:17 a.m. UTC | #4
On Thu, Oct 27, 2022 at 8:54 AM Jason Wang <jasowang@redhat.com> wrote:
>
> On Thu, Oct 27, 2022 at 2:47 PM Eugenio Perez Martin
> <eperezma@redhat.com> wrote:
> >
> > On Thu, Oct 27, 2022 at 6:32 AM Jason Wang <jasowang@redhat.com> wrote:
> > >
> > >
> > > 在 2022/10/26 17:53, Eugenio Pérez 写道:
> > > > Now that qemu can handle and emulate it if the vdpa backend does not
> > > > support it we can offer it always.
> > > >
> > > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > >
> > >
> > > I may miss something but isn't more easier to simply remove the
> > > _F_STATUS from vdpa_feature_bits[]?
> > >
> >
> > How is that? if we remove it, the guest cannot ack it so it cannot
> > access the net status, isn't it?
>
> My understanding is that the bits stored in the vdpa_feature_bits[]
> are the features that must be explicitly supported by the vhost
> device.

(Non English native here, so maybe I don't get what you mean :) ) The
device may not support them. net simulator lacks some of them
actually, and it works.

From what I see these are the only features that will be forwarded to
the guest as device_features. If it is not in the list, the feature
will be masked out, as if the device does not support it.

So now _F_STATUS it was forwarded only if the device supports it. If
we remove it from bit_mask, it will never be offered to the guest. But
we want to offer it always, since we will need it for
_F_GUEST_ANNOUNCE.

Things get more complex because we actually need to ack it back if the
device offers it, so the vdpa device can report link_down. We will
only emulate LINK_UP always in the case the device does not support
_F_STATUS.

> So if we remove _F_STATUS, Qemu vhost code won't validate if
> vhost-vdpa device has this support:
>
> uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits,
>                             uint64_t features)
> {
>     const int *bit = feature_bits;
>     while (*bit != VHOST_INVALID_FEATURE_BIT) {
>         uint64_t bit_mask = (1ULL << *bit);
>         if (!(hdev->features & bit_mask)) {
>             features &= ~bit_mask;
>         }
>         bit++;
>     }
>     return features;
> }
>

Now maybe I'm the one missing something, but why is this not done as a
masking directly?

Instead of making feature_bits an array of ints, to declare it as a
uint64_t with the valid feature bits and simply return features &
feature_bits.

Thanks!

> Thanks
>
>
>
> >
> > The goal with this patch series is to let the guest access the status
> > always, even if the device doesn't support _F_STATUS.
> >
> > > Thanks
> > >
> > >
> > > > ---
> > > >   include/net/vhost-vdpa.h |  1 +
> > > >   hw/net/vhost_net.c       | 16 ++++++++++++++--
> > > >   net/vhost-vdpa.c         |  3 +++
> > > >   3 files changed, 18 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/include/net/vhost-vdpa.h b/include/net/vhost-vdpa.h
> > > > index b81f9a6f2a..cfbcce6427 100644
> > > > --- a/include/net/vhost-vdpa.h
> > > > +++ b/include/net/vhost-vdpa.h
> > > > @@ -17,5 +17,6 @@
> > > >   struct vhost_net *vhost_vdpa_get_vhost_net(NetClientState *nc);
> > > >
> > > >   extern const int vdpa_feature_bits[];
> > > > +extern const uint64_t vhost_vdpa_net_added_feature_bits;
> > > >
> > > >   #endif /* VHOST_VDPA_H */
> > > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > > > index d28f8b974b..7c15cc6e8f 100644
> > > > --- a/hw/net/vhost_net.c
> > > > +++ b/hw/net/vhost_net.c
> > > > @@ -109,10 +109,22 @@ static const int *vhost_net_get_feature_bits(struct vhost_net *net)
> > > >       return feature_bits;
> > > >   }
> > > >
> > > > +static uint64_t vhost_net_add_feature_bits(struct vhost_net *net)
> > > > +{
> > > > +    if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
> > > > +        return vhost_vdpa_net_added_feature_bits;
> > > > +    }
> > > > +
> > > > +    return 0;
> > > > +}
> > > > +
> > > >   uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
> > > >   {
> > > > -    return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net),
> > > > -            features);
> > > > +    uint64_t ret = vhost_get_features(&net->dev,
> > > > +                                      vhost_net_get_feature_bits(net),
> > > > +                                      features);
> > > > +
> > > > +    return ret | vhost_net_add_feature_bits(net);
> > > >   }
> > > >   int vhost_net_get_config(struct vhost_net *net,  uint8_t *config,
> > > >                            uint32_t config_len)
> > > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > > > index 6d64000202..24d2857593 100644
> > > > --- a/net/vhost-vdpa.c
> > > > +++ b/net/vhost-vdpa.c
> > > > @@ -99,6 +99,9 @@ static const uint64_t vdpa_svq_device_features =
> > > >       BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
> > > >       BIT_ULL(VIRTIO_NET_F_STANDBY);
> > > >
> > > > +const uint64_t vhost_vdpa_net_added_feature_bits =
> > > > +    BIT_ULL(VIRTIO_NET_F_STATUS);
> > > > +
> > > >   VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
> > > >   {
> > > >       VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> > >
> >
>
Jason Wang Oct. 28, 2022, 1:58 a.m. UTC | #5
On Thu, Oct 27, 2022 at 6:18 PM Eugenio Perez Martin
<eperezma@redhat.com> wrote:
>
> On Thu, Oct 27, 2022 at 8:54 AM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Thu, Oct 27, 2022 at 2:47 PM Eugenio Perez Martin
> > <eperezma@redhat.com> wrote:
> > >
> > > On Thu, Oct 27, 2022 at 6:32 AM Jason Wang <jasowang@redhat.com> wrote:
> > > >
> > > >
> > > > 在 2022/10/26 17:53, Eugenio Pérez 写道:
> > > > > Now that qemu can handle and emulate it if the vdpa backend does not
> > > > > support it we can offer it always.
> > > > >
> > > > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > > >
> > > >
> > > > I may miss something but isn't more easier to simply remove the
> > > > _F_STATUS from vdpa_feature_bits[]?
> > > >
> > >
> > > How is that? if we remove it, the guest cannot ack it so it cannot
> > > access the net status, isn't it?
> >
> > My understanding is that the bits stored in the vdpa_feature_bits[]
> > are the features that must be explicitly supported by the vhost
> > device.
>
> (Non English native here, so maybe I don't get what you mean :) ) The
> device may not support them. net simulator lacks some of them
> actually, and it works.

Speaking too fast, I think I meant that, if the bit doesn't belong to
vdpa_feature_bits[], it is assumed to be supported by the Qemu without
the support of the vhost. So Qemu won't even try to validate if vhost
has this support. E.g for vhost-net, we only have:

static const int kernel_feature_bits[] = {
    VIRTIO_F_NOTIFY_ON_EMPTY,
    VIRTIO_RING_F_INDIRECT_DESC,
    VIRTIO_RING_F_EVENT_IDX,
    VIRTIO_NET_F_MRG_RXBUF,
    VIRTIO_F_VERSION_1,
    VIRTIO_NET_F_MTU,
    VIRTIO_F_IOMMU_PLATFORM,
    VIRTIO_F_RING_PACKED,
    VIRTIO_NET_F_HASH_REPORT,
    VHOST_INVALID_FEATURE_BIT
};

You can see there's no STATUS bit there since it is emulated by Qemu.

>
> From what I see these are the only features that will be forwarded to
> the guest as device_features. If it is not in the list, the feature
> will be masked out,

Only when there's no support for this feature from the vhost.

> as if the device does not support it.
>
> So now _F_STATUS it was forwarded only if the device supports it. If
> we remove it from bit_mask, it will never be offered to the guest. But
> we want to offer it always, since we will need it for
> _F_GUEST_ANNOUNCE.
>
> Things get more complex because we actually need to ack it back if the
> device offers it, so the vdpa device can report link_down. We will
> only emulate LINK_UP always in the case the device does not support
> _F_STATUS.
>
> > So if we remove _F_STATUS, Qemu vhost code won't validate if
> > vhost-vdpa device has this support:
> >
> > uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits,
> >                             uint64_t features)
> > {
> >     const int *bit = feature_bits;
> >     while (*bit != VHOST_INVALID_FEATURE_BIT) {
> >         uint64_t bit_mask = (1ULL << *bit);
> >         if (!(hdev->features & bit_mask)) {
> >             features &= ~bit_mask;
> >         }
> >         bit++;
> >     }
> >     return features;
> > }
> >
>
> Now maybe I'm the one missing something, but why is this not done as a
> masking directly?

Not sure, the code has been there since day 0.

But you can see from the code:

1) if STATUS is in feature_bits, we need validate the hdev->features
and mask it if the vhost doesn't have the support
2) if STATUS is not, we don't do the check and driver may still see STATUS

Thanks

>
> Instead of making feature_bits an array of ints, to declare it as a
> uint64_t with the valid feature bits and simply return features &
> feature_bits.
>
> Thanks!
>
> > Thanks
> >
> >
> >
> > >
> > > The goal with this patch series is to let the guest access the status
> > > always, even if the device doesn't support _F_STATUS.
> > >
> > > > Thanks
> > > >
> > > >
> > > > > ---
> > > > >   include/net/vhost-vdpa.h |  1 +
> > > > >   hw/net/vhost_net.c       | 16 ++++++++++++++--
> > > > >   net/vhost-vdpa.c         |  3 +++
> > > > >   3 files changed, 18 insertions(+), 2 deletions(-)
> > > > >
> > > > > diff --git a/include/net/vhost-vdpa.h b/include/net/vhost-vdpa.h
> > > > > index b81f9a6f2a..cfbcce6427 100644
> > > > > --- a/include/net/vhost-vdpa.h
> > > > > +++ b/include/net/vhost-vdpa.h
> > > > > @@ -17,5 +17,6 @@
> > > > >   struct vhost_net *vhost_vdpa_get_vhost_net(NetClientState *nc);
> > > > >
> > > > >   extern const int vdpa_feature_bits[];
> > > > > +extern const uint64_t vhost_vdpa_net_added_feature_bits;
> > > > >
> > > > >   #endif /* VHOST_VDPA_H */
> > > > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > > > > index d28f8b974b..7c15cc6e8f 100644
> > > > > --- a/hw/net/vhost_net.c
> > > > > +++ b/hw/net/vhost_net.c
> > > > > @@ -109,10 +109,22 @@ static const int *vhost_net_get_feature_bits(struct vhost_net *net)
> > > > >       return feature_bits;
> > > > >   }
> > > > >
> > > > > +static uint64_t vhost_net_add_feature_bits(struct vhost_net *net)
> > > > > +{
> > > > > +    if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
> > > > > +        return vhost_vdpa_net_added_feature_bits;
> > > > > +    }
> > > > > +
> > > > > +    return 0;
> > > > > +}
> > > > > +
> > > > >   uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
> > > > >   {
> > > > > -    return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net),
> > > > > -            features);
> > > > > +    uint64_t ret = vhost_get_features(&net->dev,
> > > > > +                                      vhost_net_get_feature_bits(net),
> > > > > +                                      features);
> > > > > +
> > > > > +    return ret | vhost_net_add_feature_bits(net);
> > > > >   }
> > > > >   int vhost_net_get_config(struct vhost_net *net,  uint8_t *config,
> > > > >                            uint32_t config_len)
> > > > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > > > > index 6d64000202..24d2857593 100644
> > > > > --- a/net/vhost-vdpa.c
> > > > > +++ b/net/vhost-vdpa.c
> > > > > @@ -99,6 +99,9 @@ static const uint64_t vdpa_svq_device_features =
> > > > >       BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
> > > > >       BIT_ULL(VIRTIO_NET_F_STANDBY);
> > > > >
> > > > > +const uint64_t vhost_vdpa_net_added_feature_bits =
> > > > > +    BIT_ULL(VIRTIO_NET_F_STATUS);
> > > > > +
> > > > >   VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
> > > > >   {
> > > > >       VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> > > >
> > >
> >
>
Eugenio Perez Martin Oct. 28, 2022, 9:29 a.m. UTC | #6
On Fri, Oct 28, 2022 at 3:59 AM Jason Wang <jasowang@redhat.com> wrote:
>
> On Thu, Oct 27, 2022 at 6:18 PM Eugenio Perez Martin
> <eperezma@redhat.com> wrote:
> >
> > On Thu, Oct 27, 2022 at 8:54 AM Jason Wang <jasowang@redhat.com> wrote:
> > >
> > > On Thu, Oct 27, 2022 at 2:47 PM Eugenio Perez Martin
> > > <eperezma@redhat.com> wrote:
> > > >
> > > > On Thu, Oct 27, 2022 at 6:32 AM Jason Wang <jasowang@redhat.com> wrote:
> > > > >
> > > > >
> > > > > 在 2022/10/26 17:53, Eugenio Pérez 写道:
> > > > > > Now that qemu can handle and emulate it if the vdpa backend does not
> > > > > > support it we can offer it always.
> > > > > >
> > > > > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > > > >
> > > > >
> > > > > I may miss something but isn't more easier to simply remove the
> > > > > _F_STATUS from vdpa_feature_bits[]?
> > > > >
> > > >
> > > > How is that? if we remove it, the guest cannot ack it so it cannot
> > > > access the net status, isn't it?
> > >
> > > My understanding is that the bits stored in the vdpa_feature_bits[]
> > > are the features that must be explicitly supported by the vhost
> > > device.
> >
> > (Non English native here, so maybe I don't get what you mean :) ) The
> > device may not support them. net simulator lacks some of them
> > actually, and it works.
>
> Speaking too fast, I think I meant that, if the bit doesn't belong to
> vdpa_feature_bits[], it is assumed to be supported by the Qemu without
> the support of the vhost. So Qemu won't even try to validate if vhost
> has this support. E.g for vhost-net, we only have:
>
> static const int kernel_feature_bits[] = {
>     VIRTIO_F_NOTIFY_ON_EMPTY,
>     VIRTIO_RING_F_INDIRECT_DESC,
>     VIRTIO_RING_F_EVENT_IDX,
>     VIRTIO_NET_F_MRG_RXBUF,
>     VIRTIO_F_VERSION_1,
>     VIRTIO_NET_F_MTU,
>     VIRTIO_F_IOMMU_PLATFORM,
>     VIRTIO_F_RING_PACKED,
>     VIRTIO_NET_F_HASH_REPORT,
>     VHOST_INVALID_FEATURE_BIT
> };
>
> You can see there's no STATUS bit there since it is emulated by Qemu.
>

Ok now I get what you mean, and yes we may modify the patches in that direction.

But if we go then we need to modify how qemu ack the features, because
the features that are not in vdpa_feature_bits are not acked to the
device. More on this later.

> >
> > From what I see these are the only features that will be forwarded to
> > the guest as device_features. If it is not in the list, the feature
> > will be masked out,
>
> Only when there's no support for this feature from the vhost.
>
> > as if the device does not support it.
> >
> > So now _F_STATUS it was forwarded only if the device supports it. If
> > we remove it from bit_mask, it will never be offered to the guest. But
> > we want to offer it always, since we will need it for
> > _F_GUEST_ANNOUNCE.
> >
> > Things get more complex because we actually need to ack it back if the
> > device offers it, so the vdpa device can report link_down. We will
> > only emulate LINK_UP always in the case the device does not support
> > _F_STATUS.
> >
> > > So if we remove _F_STATUS, Qemu vhost code won't validate if
> > > vhost-vdpa device has this support:
> > >
> > > uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits,
> > >                             uint64_t features)
> > > {
> > >     const int *bit = feature_bits;
> > >     while (*bit != VHOST_INVALID_FEATURE_BIT) {
> > >         uint64_t bit_mask = (1ULL << *bit);
> > >         if (!(hdev->features & bit_mask)) {
> > >             features &= ~bit_mask;
> > >         }
> > >         bit++;
> > >     }
> > >     return features;
> > > }
> > >
> >
> > Now maybe I'm the one missing something, but why is this not done as a
> > masking directly?
>
> Not sure, the code has been there since day 0.
>
> But you can see from the code:
>
> 1) if STATUS is in feature_bits, we need validate the hdev->features
> and mask it if the vhost doesn't have the support
> 2) if STATUS is not, we don't do the check and driver may still see STATUS
>

That's useful for _F_GUEST_ANNOUNCE, but we need to ack _F_STATUS for
the device if it supports it. QEMU cannot detect by itself when the
link is not up. I think that setting unconditionally
VIRTIO_NET_S_LINK_UP is actually a regression, since the guest cannot
detect the link down that way.

To enable _F_STATUS unconditionally is only done in the case the
device does not support it, because its emulation is very easy. That
way we support _F_GUEST_ANNOUNCE in all cases without device's
cooperation.

Having said that, should we go the opposite route and ack _F_STATE as
long as the device supports it? As an advantage, all backends should
support that at this moment, isn't it?

Thanks!




> Thanks
>
> >
> > Instead of making feature_bits an array of ints, to declare it as a
> > uint64_t with the valid feature bits and simply return features &
> > feature_bits.
> >
> > Thanks!
> >
> > > Thanks
> > >
> > >
> > >
> > > >
> > > > The goal with this patch series is to let the guest access the status
> > > > always, even if the device doesn't support _F_STATUS.
> > > >
> > > > > Thanks
> > > > >
> > > > >
> > > > > > ---
> > > > > >   include/net/vhost-vdpa.h |  1 +
> > > > > >   hw/net/vhost_net.c       | 16 ++++++++++++++--
> > > > > >   net/vhost-vdpa.c         |  3 +++
> > > > > >   3 files changed, 18 insertions(+), 2 deletions(-)
> > > > > >
> > > > > > diff --git a/include/net/vhost-vdpa.h b/include/net/vhost-vdpa.h
> > > > > > index b81f9a6f2a..cfbcce6427 100644
> > > > > > --- a/include/net/vhost-vdpa.h
> > > > > > +++ b/include/net/vhost-vdpa.h
> > > > > > @@ -17,5 +17,6 @@
> > > > > >   struct vhost_net *vhost_vdpa_get_vhost_net(NetClientState *nc);
> > > > > >
> > > > > >   extern const int vdpa_feature_bits[];
> > > > > > +extern const uint64_t vhost_vdpa_net_added_feature_bits;
> > > > > >
> > > > > >   #endif /* VHOST_VDPA_H */
> > > > > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > > > > > index d28f8b974b..7c15cc6e8f 100644
> > > > > > --- a/hw/net/vhost_net.c
> > > > > > +++ b/hw/net/vhost_net.c
> > > > > > @@ -109,10 +109,22 @@ static const int *vhost_net_get_feature_bits(struct vhost_net *net)
> > > > > >       return feature_bits;
> > > > > >   }
> > > > > >
> > > > > > +static uint64_t vhost_net_add_feature_bits(struct vhost_net *net)
> > > > > > +{
> > > > > > +    if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
> > > > > > +        return vhost_vdpa_net_added_feature_bits;
> > > > > > +    }
> > > > > > +
> > > > > > +    return 0;
> > > > > > +}
> > > > > > +
> > > > > >   uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
> > > > > >   {
> > > > > > -    return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net),
> > > > > > -            features);
> > > > > > +    uint64_t ret = vhost_get_features(&net->dev,
> > > > > > +                                      vhost_net_get_feature_bits(net),
> > > > > > +                                      features);
> > > > > > +
> > > > > > +    return ret | vhost_net_add_feature_bits(net);
> > > > > >   }
> > > > > >   int vhost_net_get_config(struct vhost_net *net,  uint8_t *config,
> > > > > >                            uint32_t config_len)
> > > > > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > > > > > index 6d64000202..24d2857593 100644
> > > > > > --- a/net/vhost-vdpa.c
> > > > > > +++ b/net/vhost-vdpa.c
> > > > > > @@ -99,6 +99,9 @@ static const uint64_t vdpa_svq_device_features =
> > > > > >       BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
> > > > > >       BIT_ULL(VIRTIO_NET_F_STANDBY);
> > > > > >
> > > > > > +const uint64_t vhost_vdpa_net_added_feature_bits =
> > > > > > +    BIT_ULL(VIRTIO_NET_F_STATUS);
> > > > > > +
> > > > > >   VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
> > > > > >   {
> > > > > >       VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> > > > >
> > > >
> > >
> >
>
Jason Wang Nov. 1, 2022, 8:09 a.m. UTC | #7
On Fri, Oct 28, 2022 at 5:30 PM Eugenio Perez Martin
<eperezma@redhat.com> wrote:
>
> On Fri, Oct 28, 2022 at 3:59 AM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Thu, Oct 27, 2022 at 6:18 PM Eugenio Perez Martin
> > <eperezma@redhat.com> wrote:
> > >
> > > On Thu, Oct 27, 2022 at 8:54 AM Jason Wang <jasowang@redhat.com> wrote:
> > > >
> > > > On Thu, Oct 27, 2022 at 2:47 PM Eugenio Perez Martin
> > > > <eperezma@redhat.com> wrote:
> > > > >
> > > > > On Thu, Oct 27, 2022 at 6:32 AM Jason Wang <jasowang@redhat.com> wrote:
> > > > > >
> > > > > >
> > > > > > 在 2022/10/26 17:53, Eugenio Pérez 写道:
> > > > > > > Now that qemu can handle and emulate it if the vdpa backend does not
> > > > > > > support it we can offer it always.
> > > > > > >
> > > > > > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > > > > >
> > > > > >
> > > > > > I may miss something but isn't more easier to simply remove the
> > > > > > _F_STATUS from vdpa_feature_bits[]?
> > > > > >
> > > > >
> > > > > How is that? if we remove it, the guest cannot ack it so it cannot
> > > > > access the net status, isn't it?
> > > >
> > > > My understanding is that the bits stored in the vdpa_feature_bits[]
> > > > are the features that must be explicitly supported by the vhost
> > > > device.
> > >
> > > (Non English native here, so maybe I don't get what you mean :) ) The
> > > device may not support them. net simulator lacks some of them
> > > actually, and it works.
> >
> > Speaking too fast, I think I meant that, if the bit doesn't belong to
> > vdpa_feature_bits[], it is assumed to be supported by the Qemu without
> > the support of the vhost. So Qemu won't even try to validate if vhost
> > has this support. E.g for vhost-net, we only have:
> >
> > static const int kernel_feature_bits[] = {
> >     VIRTIO_F_NOTIFY_ON_EMPTY,
> >     VIRTIO_RING_F_INDIRECT_DESC,
> >     VIRTIO_RING_F_EVENT_IDX,
> >     VIRTIO_NET_F_MRG_RXBUF,
> >     VIRTIO_F_VERSION_1,
> >     VIRTIO_NET_F_MTU,
> >     VIRTIO_F_IOMMU_PLATFORM,
> >     VIRTIO_F_RING_PACKED,
> >     VIRTIO_NET_F_HASH_REPORT,
> >     VHOST_INVALID_FEATURE_BIT
> > };
> >
> > You can see there's no STATUS bit there since it is emulated by Qemu.
> >
>
> Ok now I get what you mean, and yes we may modify the patches in that direction.
>
> But if we go then we need to modify how qemu ack the features, because
> the features that are not in vdpa_feature_bits are not acked to the
> device. More on this later.
>
> > >
> > > From what I see these are the only features that will be forwarded to
> > > the guest as device_features. If it is not in the list, the feature
> > > will be masked out,
> >
> > Only when there's no support for this feature from the vhost.
> >
> > > as if the device does not support it.
> > >
> > > So now _F_STATUS it was forwarded only if the device supports it. If
> > > we remove it from bit_mask, it will never be offered to the guest. But
> > > we want to offer it always, since we will need it for
> > > _F_GUEST_ANNOUNCE.
> > >
> > > Things get more complex because we actually need to ack it back if the
> > > device offers it, so the vdpa device can report link_down. We will
> > > only emulate LINK_UP always in the case the device does not support
> > > _F_STATUS.
> > >
> > > > So if we remove _F_STATUS, Qemu vhost code won't validate if
> > > > vhost-vdpa device has this support:
> > > >
> > > > uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits,
> > > >                             uint64_t features)
> > > > {
> > > >     const int *bit = feature_bits;
> > > >     while (*bit != VHOST_INVALID_FEATURE_BIT) {
> > > >         uint64_t bit_mask = (1ULL << *bit);
> > > >         if (!(hdev->features & bit_mask)) {
> > > >             features &= ~bit_mask;
> > > >         }
> > > >         bit++;
> > > >     }
> > > >     return features;
> > > > }
> > > >
> > >
> > > Now maybe I'm the one missing something, but why is this not done as a
> > > masking directly?
> >
> > Not sure, the code has been there since day 0.
> >
> > But you can see from the code:
> >
> > 1) if STATUS is in feature_bits, we need validate the hdev->features
> > and mask it if the vhost doesn't have the support
> > 2) if STATUS is not, we don't do the check and driver may still see STATUS
> >
>
> That's useful for _F_GUEST_ANNOUNCE, but we need to ack _F_STATUS for
> the device if it supports it.

Rethink about this, I don't see why ANNOUNCE depends on STATUS (spec
doesn't say so).

> QEMU cannot detect by itself when the
> link is not up. I think that setting unconditionally
> VIRTIO_NET_S_LINK_UP is actually a regression, since the guest cannot
> detect the link down that way.

I think the idea is to still read status from config if the device
supports this.

>
> To enable _F_STATUS unconditionally is only done in the case the
> device does not support it, because its emulation is very easy. That
> way we support _F_GUEST_ANNOUNCE in all cases without device's
> cooperation.
>
> Having said that, should we go the opposite route and ack _F_STATE as
> long as the device supports it? As an advantage, all backends should
> support that at this moment, isn't it?

So I think the method used in this patch is fine, but I wonder if it's
better to move it to the vhost layer instead of doing it in vhost_net
since we do the features validation there. We probably need another
table as input for get/set features there?

Thanks

>
> Thanks!
>
>
>
>
> > Thanks
> >
> > >
> > > Instead of making feature_bits an array of ints, to declare it as a
> > > uint64_t with the valid feature bits and simply return features &
> > > feature_bits.
> > >
> > > Thanks!
> > >
> > > > Thanks
> > > >
> > > >
> > > >
> > > > >
> > > > > The goal with this patch series is to let the guest access the status
> > > > > always, even if the device doesn't support _F_STATUS.
> > > > >
> > > > > > Thanks
> > > > > >
> > > > > >
> > > > > > > ---
> > > > > > >   include/net/vhost-vdpa.h |  1 +
> > > > > > >   hw/net/vhost_net.c       | 16 ++++++++++++++--
> > > > > > >   net/vhost-vdpa.c         |  3 +++
> > > > > > >   3 files changed, 18 insertions(+), 2 deletions(-)
> > > > > > >
> > > > > > > diff --git a/include/net/vhost-vdpa.h b/include/net/vhost-vdpa.h
> > > > > > > index b81f9a6f2a..cfbcce6427 100644
> > > > > > > --- a/include/net/vhost-vdpa.h
> > > > > > > +++ b/include/net/vhost-vdpa.h
> > > > > > > @@ -17,5 +17,6 @@
> > > > > > >   struct vhost_net *vhost_vdpa_get_vhost_net(NetClientState *nc);
> > > > > > >
> > > > > > >   extern const int vdpa_feature_bits[];
> > > > > > > +extern const uint64_t vhost_vdpa_net_added_feature_bits;
> > > > > > >
> > > > > > >   #endif /* VHOST_VDPA_H */
> > > > > > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > > > > > > index d28f8b974b..7c15cc6e8f 100644
> > > > > > > --- a/hw/net/vhost_net.c
> > > > > > > +++ b/hw/net/vhost_net.c
> > > > > > > @@ -109,10 +109,22 @@ static const int *vhost_net_get_feature_bits(struct vhost_net *net)
> > > > > > >       return feature_bits;
> > > > > > >   }
> > > > > > >
> > > > > > > +static uint64_t vhost_net_add_feature_bits(struct vhost_net *net)
> > > > > > > +{
> > > > > > > +    if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
> > > > > > > +        return vhost_vdpa_net_added_feature_bits;
> > > > > > > +    }
> > > > > > > +
> > > > > > > +    return 0;
> > > > > > > +}
> > > > > > > +
> > > > > > >   uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
> > > > > > >   {
> > > > > > > -    return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net),
> > > > > > > -            features);
> > > > > > > +    uint64_t ret = vhost_get_features(&net->dev,
> > > > > > > +                                      vhost_net_get_feature_bits(net),
> > > > > > > +                                      features);
> > > > > > > +
> > > > > > > +    return ret | vhost_net_add_feature_bits(net);
> > > > > > >   }
> > > > > > >   int vhost_net_get_config(struct vhost_net *net,  uint8_t *config,
> > > > > > >                            uint32_t config_len)
> > > > > > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > > > > > > index 6d64000202..24d2857593 100644
> > > > > > > --- a/net/vhost-vdpa.c
> > > > > > > +++ b/net/vhost-vdpa.c
> > > > > > > @@ -99,6 +99,9 @@ static const uint64_t vdpa_svq_device_features =
> > > > > > >       BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
> > > > > > >       BIT_ULL(VIRTIO_NET_F_STANDBY);
> > > > > > >
> > > > > > > +const uint64_t vhost_vdpa_net_added_feature_bits =
> > > > > > > +    BIT_ULL(VIRTIO_NET_F_STATUS);
> > > > > > > +
> > > > > > >   VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
> > > > > > >   {
> > > > > > >       VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> > > > > >
> > > > >
> > > >
> > >
> >
>
Eugenio Perez Martin Nov. 2, 2022, 11:18 a.m. UTC | #8
On Tue, Nov 1, 2022 at 9:10 AM Jason Wang <jasowang@redhat.com> wrote:
>
> On Fri, Oct 28, 2022 at 5:30 PM Eugenio Perez Martin
> <eperezma@redhat.com> wrote:
> >
> > On Fri, Oct 28, 2022 at 3:59 AM Jason Wang <jasowang@redhat.com> wrote:
> > >
> > > On Thu, Oct 27, 2022 at 6:18 PM Eugenio Perez Martin
> > > <eperezma@redhat.com> wrote:
> > > >
> > > > On Thu, Oct 27, 2022 at 8:54 AM Jason Wang <jasowang@redhat.com> wrote:
> > > > >
> > > > > On Thu, Oct 27, 2022 at 2:47 PM Eugenio Perez Martin
> > > > > <eperezma@redhat.com> wrote:
> > > > > >
> > > > > > On Thu, Oct 27, 2022 at 6:32 AM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > >
> > > > > > >
> > > > > > > 在 2022/10/26 17:53, Eugenio Pérez 写道:
> > > > > > > > Now that qemu can handle and emulate it if the vdpa backend does not
> > > > > > > > support it we can offer it always.
> > > > > > > >
> > > > > > > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > > > > > >
> > > > > > >
> > > > > > > I may miss something but isn't more easier to simply remove the
> > > > > > > _F_STATUS from vdpa_feature_bits[]?
> > > > > > >
> > > > > >
> > > > > > How is that? if we remove it, the guest cannot ack it so it cannot
> > > > > > access the net status, isn't it?
> > > > >
> > > > > My understanding is that the bits stored in the vdpa_feature_bits[]
> > > > > are the features that must be explicitly supported by the vhost
> > > > > device.
> > > >
> > > > (Non English native here, so maybe I don't get what you mean :) ) The
> > > > device may not support them. net simulator lacks some of them
> > > > actually, and it works.
> > >
> > > Speaking too fast, I think I meant that, if the bit doesn't belong to
> > > vdpa_feature_bits[], it is assumed to be supported by the Qemu without
> > > the support of the vhost. So Qemu won't even try to validate if vhost
> > > has this support. E.g for vhost-net, we only have:
> > >
> > > static const int kernel_feature_bits[] = {
> > >     VIRTIO_F_NOTIFY_ON_EMPTY,
> > >     VIRTIO_RING_F_INDIRECT_DESC,
> > >     VIRTIO_RING_F_EVENT_IDX,
> > >     VIRTIO_NET_F_MRG_RXBUF,
> > >     VIRTIO_F_VERSION_1,
> > >     VIRTIO_NET_F_MTU,
> > >     VIRTIO_F_IOMMU_PLATFORM,
> > >     VIRTIO_F_RING_PACKED,
> > >     VIRTIO_NET_F_HASH_REPORT,
> > >     VHOST_INVALID_FEATURE_BIT
> > > };
> > >
> > > You can see there's no STATUS bit there since it is emulated by Qemu.
> > >
> >
> > Ok now I get what you mean, and yes we may modify the patches in that direction.
> >
> > But if we go then we need to modify how qemu ack the features, because
> > the features that are not in vdpa_feature_bits are not acked to the
> > device. More on this later.
> >
> > > >
> > > > From what I see these are the only features that will be forwarded to
> > > > the guest as device_features. If it is not in the list, the feature
> > > > will be masked out,
> > >
> > > Only when there's no support for this feature from the vhost.
> > >
> > > > as if the device does not support it.
> > > >
> > > > So now _F_STATUS it was forwarded only if the device supports it. If
> > > > we remove it from bit_mask, it will never be offered to the guest. But
> > > > we want to offer it always, since we will need it for
> > > > _F_GUEST_ANNOUNCE.
> > > >
> > > > Things get more complex because we actually need to ack it back if the
> > > > device offers it, so the vdpa device can report link_down. We will
> > > > only emulate LINK_UP always in the case the device does not support
> > > > _F_STATUS.
> > > >
> > > > > So if we remove _F_STATUS, Qemu vhost code won't validate if
> > > > > vhost-vdpa device has this support:
> > > > >
> > > > > uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits,
> > > > >                             uint64_t features)
> > > > > {
> > > > >     const int *bit = feature_bits;
> > > > >     while (*bit != VHOST_INVALID_FEATURE_BIT) {
> > > > >         uint64_t bit_mask = (1ULL << *bit);
> > > > >         if (!(hdev->features & bit_mask)) {
> > > > >             features &= ~bit_mask;
> > > > >         }
> > > > >         bit++;
> > > > >     }
> > > > >     return features;
> > > > > }
> > > > >
> > > >
> > > > Now maybe I'm the one missing something, but why is this not done as a
> > > > masking directly?
> > >
> > > Not sure, the code has been there since day 0.
> > >
> > > But you can see from the code:
> > >
> > > 1) if STATUS is in feature_bits, we need validate the hdev->features
> > > and mask it if the vhost doesn't have the support
> > > 2) if STATUS is not, we don't do the check and driver may still see STATUS
> > >
> >
> > That's useful for _F_GUEST_ANNOUNCE, but we need to ack _F_STATUS for
> > the device if it supports it.
>
> Rethink about this, I don't see why ANNOUNCE depends on STATUS (spec
> doesn't say so).
>

It is needed for the guest to read the status bit:
"""
status only exists if VIRTIO_NET_F_STATUS is set. Two read-only bits
(for the driver) are currently defined for the status field:
VIRTIO_NET_S_LINK_UP and VIRTIO_NET_S_ANNOUNCE.
"""

A change on the standard could be possible, like "status only exists
if VIRTIO_NET_F_STATUS or VIRTIO_NET_F_GUEST_ANNOUNCE is set".
However, Linux drivers already expect _F_STATUS to read _S_ANNOUNCE
and to emulate _F_STATUS in case the device doesn't support it should
not be a big deal in my opinion.

> > QEMU cannot detect by itself when the
> > link is not up. I think that setting unconditionally
> > VIRTIO_NET_S_LINK_UP is actually a regression, since the guest cannot
> > detect the link down that way.
>
> I think the idea is to still read status from config if the device
> supports this.
>

Yes, that's my point. If I delete it from vdpa_feature_bits, it will
not be acked to the device, so we cannot read status from the device.

> >
> > To enable _F_STATUS unconditionally is only done in the case the
> > device does not support it, because its emulation is very easy. That
> > way we support _F_GUEST_ANNOUNCE in all cases without device's
> > cooperation.
> >
> > Having said that, should we go the opposite route and ack _F_STATE as
> > long as the device supports it? As an advantage, all backends should
> > support that at this moment, isn't it?
>
> So I think the method used in this patch is fine, but I wonder if it's
> better to move it to the vhost layer instead of doing it in vhost_net
> since we do the features validation there. We probably need another
> table as input for get/set features there?
>

We can discuss how to do it for sure. But as you pointed out,
vhost_net and virtio_net already modify the features received from the
devices, so it makes sense to me to modify the features set by the
guest.

The problem is that we need to transmit to vhost when ack _F_STATUS
and when not. The first proposal was to add a new member of vhost_vdpa
but this is not optimal.

If we add a new table it should be a static const one, and vhost_vdpa
should have a pointer to it, isn't it? Something like features that
are emulated by qemu so they must be offered always to the guest?

Thanks!

> Thanks
>
> >
> > Thanks!
> >
> >
> >
> >
> > > Thanks
> > >
> > > >
> > > > Instead of making feature_bits an array of ints, to declare it as a
> > > > uint64_t with the valid feature bits and simply return features &
> > > > feature_bits.
> > > >
> > > > Thanks!
> > > >
> > > > > Thanks
> > > > >
> > > > >
> > > > >
> > > > > >
> > > > > > The goal with this patch series is to let the guest access the status
> > > > > > always, even if the device doesn't support _F_STATUS.
> > > > > >
> > > > > > > Thanks
> > > > > > >
> > > > > > >
> > > > > > > > ---
> > > > > > > >   include/net/vhost-vdpa.h |  1 +
> > > > > > > >   hw/net/vhost_net.c       | 16 ++++++++++++++--
> > > > > > > >   net/vhost-vdpa.c         |  3 +++
> > > > > > > >   3 files changed, 18 insertions(+), 2 deletions(-)
> > > > > > > >
> > > > > > > > diff --git a/include/net/vhost-vdpa.h b/include/net/vhost-vdpa.h
> > > > > > > > index b81f9a6f2a..cfbcce6427 100644
> > > > > > > > --- a/include/net/vhost-vdpa.h
> > > > > > > > +++ b/include/net/vhost-vdpa.h
> > > > > > > > @@ -17,5 +17,6 @@
> > > > > > > >   struct vhost_net *vhost_vdpa_get_vhost_net(NetClientState *nc);
> > > > > > > >
> > > > > > > >   extern const int vdpa_feature_bits[];
> > > > > > > > +extern const uint64_t vhost_vdpa_net_added_feature_bits;
> > > > > > > >
> > > > > > > >   #endif /* VHOST_VDPA_H */
> > > > > > > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > > > > > > > index d28f8b974b..7c15cc6e8f 100644
> > > > > > > > --- a/hw/net/vhost_net.c
> > > > > > > > +++ b/hw/net/vhost_net.c
> > > > > > > > @@ -109,10 +109,22 @@ static const int *vhost_net_get_feature_bits(struct vhost_net *net)
> > > > > > > >       return feature_bits;
> > > > > > > >   }
> > > > > > > >
> > > > > > > > +static uint64_t vhost_net_add_feature_bits(struct vhost_net *net)
> > > > > > > > +{
> > > > > > > > +    if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
> > > > > > > > +        return vhost_vdpa_net_added_feature_bits;
> > > > > > > > +    }
> > > > > > > > +
> > > > > > > > +    return 0;
> > > > > > > > +}
> > > > > > > > +
> > > > > > > >   uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
> > > > > > > >   {
> > > > > > > > -    return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net),
> > > > > > > > -            features);
> > > > > > > > +    uint64_t ret = vhost_get_features(&net->dev,
> > > > > > > > +                                      vhost_net_get_feature_bits(net),
> > > > > > > > +                                      features);
> > > > > > > > +
> > > > > > > > +    return ret | vhost_net_add_feature_bits(net);
> > > > > > > >   }
> > > > > > > >   int vhost_net_get_config(struct vhost_net *net,  uint8_t *config,
> > > > > > > >                            uint32_t config_len)
> > > > > > > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > > > > > > > index 6d64000202..24d2857593 100644
> > > > > > > > --- a/net/vhost-vdpa.c
> > > > > > > > +++ b/net/vhost-vdpa.c
> > > > > > > > @@ -99,6 +99,9 @@ static const uint64_t vdpa_svq_device_features =
> > > > > > > >       BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
> > > > > > > >       BIT_ULL(VIRTIO_NET_F_STANDBY);
> > > > > > > >
> > > > > > > > +const uint64_t vhost_vdpa_net_added_feature_bits =
> > > > > > > > +    BIT_ULL(VIRTIO_NET_F_STATUS);
> > > > > > > > +
> > > > > > > >   VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
> > > > > > > >   {
> > > > > > > >       VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> > > > > > >
> > > > > >
> > > > >
> > > >
> > >
> >
>
Jason Wang Nov. 3, 2022, 3:21 a.m. UTC | #9
On Wed, Nov 2, 2022 at 7:19 PM Eugenio Perez Martin <eperezma@redhat.com> wrote:
>
> On Tue, Nov 1, 2022 at 9:10 AM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Fri, Oct 28, 2022 at 5:30 PM Eugenio Perez Martin
> > <eperezma@redhat.com> wrote:
> > >
> > > On Fri, Oct 28, 2022 at 3:59 AM Jason Wang <jasowang@redhat.com> wrote:
> > > >
> > > > On Thu, Oct 27, 2022 at 6:18 PM Eugenio Perez Martin
> > > > <eperezma@redhat.com> wrote:
> > > > >
> > > > > On Thu, Oct 27, 2022 at 8:54 AM Jason Wang <jasowang@redhat.com> wrote:
> > > > > >
> > > > > > On Thu, Oct 27, 2022 at 2:47 PM Eugenio Perez Martin
> > > > > > <eperezma@redhat.com> wrote:
> > > > > > >
> > > > > > > On Thu, Oct 27, 2022 at 6:32 AM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > >
> > > > > > > >
> > > > > > > > 在 2022/10/26 17:53, Eugenio Pérez 写道:
> > > > > > > > > Now that qemu can handle and emulate it if the vdpa backend does not
> > > > > > > > > support it we can offer it always.
> > > > > > > > >
> > > > > > > > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > > > > > > >
> > > > > > > >
> > > > > > > > I may miss something but isn't more easier to simply remove the
> > > > > > > > _F_STATUS from vdpa_feature_bits[]?
> > > > > > > >
> > > > > > >
> > > > > > > How is that? if we remove it, the guest cannot ack it so it cannot
> > > > > > > access the net status, isn't it?
> > > > > >
> > > > > > My understanding is that the bits stored in the vdpa_feature_bits[]
> > > > > > are the features that must be explicitly supported by the vhost
> > > > > > device.
> > > > >
> > > > > (Non English native here, so maybe I don't get what you mean :) ) The
> > > > > device may not support them. net simulator lacks some of them
> > > > > actually, and it works.
> > > >
> > > > Speaking too fast, I think I meant that, if the bit doesn't belong to
> > > > vdpa_feature_bits[], it is assumed to be supported by the Qemu without
> > > > the support of the vhost. So Qemu won't even try to validate if vhost
> > > > has this support. E.g for vhost-net, we only have:
> > > >
> > > > static const int kernel_feature_bits[] = {
> > > >     VIRTIO_F_NOTIFY_ON_EMPTY,
> > > >     VIRTIO_RING_F_INDIRECT_DESC,
> > > >     VIRTIO_RING_F_EVENT_IDX,
> > > >     VIRTIO_NET_F_MRG_RXBUF,
> > > >     VIRTIO_F_VERSION_1,
> > > >     VIRTIO_NET_F_MTU,
> > > >     VIRTIO_F_IOMMU_PLATFORM,
> > > >     VIRTIO_F_RING_PACKED,
> > > >     VIRTIO_NET_F_HASH_REPORT,
> > > >     VHOST_INVALID_FEATURE_BIT
> > > > };
> > > >
> > > > You can see there's no STATUS bit there since it is emulated by Qemu.
> > > >
> > >
> > > Ok now I get what you mean, and yes we may modify the patches in that direction.
> > >
> > > But if we go then we need to modify how qemu ack the features, because
> > > the features that are not in vdpa_feature_bits are not acked to the
> > > device. More on this later.
> > >
> > > > >
> > > > > From what I see these are the only features that will be forwarded to
> > > > > the guest as device_features. If it is not in the list, the feature
> > > > > will be masked out,
> > > >
> > > > Only when there's no support for this feature from the vhost.
> > > >
> > > > > as if the device does not support it.
> > > > >
> > > > > So now _F_STATUS it was forwarded only if the device supports it. If
> > > > > we remove it from bit_mask, it will never be offered to the guest. But
> > > > > we want to offer it always, since we will need it for
> > > > > _F_GUEST_ANNOUNCE.
> > > > >
> > > > > Things get more complex because we actually need to ack it back if the
> > > > > device offers it, so the vdpa device can report link_down. We will
> > > > > only emulate LINK_UP always in the case the device does not support
> > > > > _F_STATUS.
> > > > >
> > > > > > So if we remove _F_STATUS, Qemu vhost code won't validate if
> > > > > > vhost-vdpa device has this support:
> > > > > >
> > > > > > uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits,
> > > > > >                             uint64_t features)
> > > > > > {
> > > > > >     const int *bit = feature_bits;
> > > > > >     while (*bit != VHOST_INVALID_FEATURE_BIT) {
> > > > > >         uint64_t bit_mask = (1ULL << *bit);
> > > > > >         if (!(hdev->features & bit_mask)) {
> > > > > >             features &= ~bit_mask;
> > > > > >         }
> > > > > >         bit++;
> > > > > >     }
> > > > > >     return features;
> > > > > > }
> > > > > >
> > > > >
> > > > > Now maybe I'm the one missing something, but why is this not done as a
> > > > > masking directly?
> > > >
> > > > Not sure, the code has been there since day 0.
> > > >
> > > > But you can see from the code:
> > > >
> > > > 1) if STATUS is in feature_bits, we need validate the hdev->features
> > > > and mask it if the vhost doesn't have the support
> > > > 2) if STATUS is not, we don't do the check and driver may still see STATUS
> > > >
> > >
> > > That's useful for _F_GUEST_ANNOUNCE, but we need to ack _F_STATUS for
> > > the device if it supports it.
> >
> > Rethink about this, I don't see why ANNOUNCE depends on STATUS (spec
> > doesn't say so).
> >
>
> It is needed for the guest to read the status bit:
> """
> status only exists if VIRTIO_NET_F_STATUS is set. Two read-only bits
> (for the driver) are currently defined for the status field:
> VIRTIO_NET_S_LINK_UP and VIRTIO_NET_S_ANNOUNCE.
> """
>
> A change on the standard could be possible, like "status only exists
> if VIRTIO_NET_F_STATUS or VIRTIO_NET_F_GUEST_ANNOUNCE is set".
> However, Linux drivers already expect _F_STATUS to read _S_ANNOUNCE
> and to emulate _F_STATUS in case the device doesn't support it should
> not be a big deal in my opinion.

RIght, so I think we need a spec patch to clarify the dependency,
currently, spec said ANNOUNCE depends on CTRL_VQ.

>
> > > QEMU cannot detect by itself when the
> > > link is not up. I think that setting unconditionally
> > > VIRTIO_NET_S_LINK_UP is actually a regression, since the guest cannot
> > > detect the link down that way.
> >
> > I think the idea is to still read status from config if the device
> > supports this.
> >
>
> Yes, that's my point. If I delete it from vdpa_feature_bits, it will
> not be acked to the device, so we cannot read status from the device.
>
> > >
> > > To enable _F_STATUS unconditionally is only done in the case the
> > > device does not support it, because its emulation is very easy. That
> > > way we support _F_GUEST_ANNOUNCE in all cases without device's
> > > cooperation.
> > >
> > > Having said that, should we go the opposite route and ack _F_STATE as
> > > long as the device supports it? As an advantage, all backends should
> > > support that at this moment, isn't it?
> >
> > So I think the method used in this patch is fine, but I wonder if it's
> > better to move it to the vhost layer instead of doing it in vhost_net
> > since we do the features validation there. We probably need another
> > table as input for get/set features there?
> >
>
> We can discuss how to do it for sure. But as you pointed out,
> vhost_net and virtio_net already modify the features received from the
> devices, so it makes sense to me to modify the features set by the
> guest.
>
> The problem is that we need to transmit to vhost when ack _F_STATUS
> and when not. The first proposal was to add a new member of vhost_vdpa
> but this is not optimal.
>
> If we add a new table it should be a static const one, and vhost_vdpa
> should have a pointer to it, isn't it?

Yes.

> Something like features that
> are emulated by qemu so they must be offered always to the guest?

Kind of, actually it should be the features:

1) could be always seen by guest
2) when vhost device have this feature, use that
3) when vhost device doesn't have this feature, emulate one

But a question still, is there a vDPA parent that can't do _F_STATUS
now (if not, we probably don't need to bother now).

Thanks

>
> Thanks!
>
> > Thanks
> >
> > >
> > > Thanks!
> > >
> > >
> > >
> > >
> > > > Thanks
> > > >
> > > > >
> > > > > Instead of making feature_bits an array of ints, to declare it as a
> > > > > uint64_t with the valid feature bits and simply return features &
> > > > > feature_bits.
> > > > >
> > > > > Thanks!
> > > > >
> > > > > > Thanks
> > > > > >
> > > > > >
> > > > > >
> > > > > > >
> > > > > > > The goal with this patch series is to let the guest access the status
> > > > > > > always, even if the device doesn't support _F_STATUS.
> > > > > > >
> > > > > > > > Thanks
> > > > > > > >
> > > > > > > >
> > > > > > > > > ---
> > > > > > > > >   include/net/vhost-vdpa.h |  1 +
> > > > > > > > >   hw/net/vhost_net.c       | 16 ++++++++++++++--
> > > > > > > > >   net/vhost-vdpa.c         |  3 +++
> > > > > > > > >   3 files changed, 18 insertions(+), 2 deletions(-)
> > > > > > > > >
> > > > > > > > > diff --git a/include/net/vhost-vdpa.h b/include/net/vhost-vdpa.h
> > > > > > > > > index b81f9a6f2a..cfbcce6427 100644
> > > > > > > > > --- a/include/net/vhost-vdpa.h
> > > > > > > > > +++ b/include/net/vhost-vdpa.h
> > > > > > > > > @@ -17,5 +17,6 @@
> > > > > > > > >   struct vhost_net *vhost_vdpa_get_vhost_net(NetClientState *nc);
> > > > > > > > >
> > > > > > > > >   extern const int vdpa_feature_bits[];
> > > > > > > > > +extern const uint64_t vhost_vdpa_net_added_feature_bits;
> > > > > > > > >
> > > > > > > > >   #endif /* VHOST_VDPA_H */
> > > > > > > > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > > > > > > > > index d28f8b974b..7c15cc6e8f 100644
> > > > > > > > > --- a/hw/net/vhost_net.c
> > > > > > > > > +++ b/hw/net/vhost_net.c
> > > > > > > > > @@ -109,10 +109,22 @@ static const int *vhost_net_get_feature_bits(struct vhost_net *net)
> > > > > > > > >       return feature_bits;
> > > > > > > > >   }
> > > > > > > > >
> > > > > > > > > +static uint64_t vhost_net_add_feature_bits(struct vhost_net *net)
> > > > > > > > > +{
> > > > > > > > > +    if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
> > > > > > > > > +        return vhost_vdpa_net_added_feature_bits;
> > > > > > > > > +    }
> > > > > > > > > +
> > > > > > > > > +    return 0;
> > > > > > > > > +}
> > > > > > > > > +
> > > > > > > > >   uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
> > > > > > > > >   {
> > > > > > > > > -    return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net),
> > > > > > > > > -            features);
> > > > > > > > > +    uint64_t ret = vhost_get_features(&net->dev,
> > > > > > > > > +                                      vhost_net_get_feature_bits(net),
> > > > > > > > > +                                      features);
> > > > > > > > > +
> > > > > > > > > +    return ret | vhost_net_add_feature_bits(net);
> > > > > > > > >   }
> > > > > > > > >   int vhost_net_get_config(struct vhost_net *net,  uint8_t *config,
> > > > > > > > >                            uint32_t config_len)
> > > > > > > > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > > > > > > > > index 6d64000202..24d2857593 100644
> > > > > > > > > --- a/net/vhost-vdpa.c
> > > > > > > > > +++ b/net/vhost-vdpa.c
> > > > > > > > > @@ -99,6 +99,9 @@ static const uint64_t vdpa_svq_device_features =
> > > > > > > > >       BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
> > > > > > > > >       BIT_ULL(VIRTIO_NET_F_STANDBY);
> > > > > > > > >
> > > > > > > > > +const uint64_t vhost_vdpa_net_added_feature_bits =
> > > > > > > > > +    BIT_ULL(VIRTIO_NET_F_STATUS);
> > > > > > > > > +
> > > > > > > > >   VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
> > > > > > > > >   {
> > > > > > > > >       VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> > > > > > > >
> > > > > > >
> > > > > >
> > > > >
> > > >
> > >
> >
>
Eugenio Perez Martin Nov. 3, 2022, 8:11 a.m. UTC | #10
On Thu, Nov 3, 2022 at 4:21 AM Jason Wang <jasowang@redhat.com> wrote:
>
> On Wed, Nov 2, 2022 at 7:19 PM Eugenio Perez Martin <eperezma@redhat.com> wrote:
> >
> > On Tue, Nov 1, 2022 at 9:10 AM Jason Wang <jasowang@redhat.com> wrote:
> > >
> > > On Fri, Oct 28, 2022 at 5:30 PM Eugenio Perez Martin
> > > <eperezma@redhat.com> wrote:
> > > >
> > > > On Fri, Oct 28, 2022 at 3:59 AM Jason Wang <jasowang@redhat.com> wrote:
> > > > >
> > > > > On Thu, Oct 27, 2022 at 6:18 PM Eugenio Perez Martin
> > > > > <eperezma@redhat.com> wrote:
> > > > > >
> > > > > > On Thu, Oct 27, 2022 at 8:54 AM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > >
> > > > > > > On Thu, Oct 27, 2022 at 2:47 PM Eugenio Perez Martin
> > > > > > > <eperezma@redhat.com> wrote:
> > > > > > > >
> > > > > > > > On Thu, Oct 27, 2022 at 6:32 AM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > 在 2022/10/26 17:53, Eugenio Pérez 写道:
> > > > > > > > > > Now that qemu can handle and emulate it if the vdpa backend does not
> > > > > > > > > > support it we can offer it always.
> > > > > > > > > >
> > > > > > > > > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > I may miss something but isn't more easier to simply remove the
> > > > > > > > > _F_STATUS from vdpa_feature_bits[]?
> > > > > > > > >
> > > > > > > >
> > > > > > > > How is that? if we remove it, the guest cannot ack it so it cannot
> > > > > > > > access the net status, isn't it?
> > > > > > >
> > > > > > > My understanding is that the bits stored in the vdpa_feature_bits[]
> > > > > > > are the features that must be explicitly supported by the vhost
> > > > > > > device.
> > > > > >
> > > > > > (Non English native here, so maybe I don't get what you mean :) ) The
> > > > > > device may not support them. net simulator lacks some of them
> > > > > > actually, and it works.
> > > > >
> > > > > Speaking too fast, I think I meant that, if the bit doesn't belong to
> > > > > vdpa_feature_bits[], it is assumed to be supported by the Qemu without
> > > > > the support of the vhost. So Qemu won't even try to validate if vhost
> > > > > has this support. E.g for vhost-net, we only have:
> > > > >
> > > > > static const int kernel_feature_bits[] = {
> > > > >     VIRTIO_F_NOTIFY_ON_EMPTY,
> > > > >     VIRTIO_RING_F_INDIRECT_DESC,
> > > > >     VIRTIO_RING_F_EVENT_IDX,
> > > > >     VIRTIO_NET_F_MRG_RXBUF,
> > > > >     VIRTIO_F_VERSION_1,
> > > > >     VIRTIO_NET_F_MTU,
> > > > >     VIRTIO_F_IOMMU_PLATFORM,
> > > > >     VIRTIO_F_RING_PACKED,
> > > > >     VIRTIO_NET_F_HASH_REPORT,
> > > > >     VHOST_INVALID_FEATURE_BIT
> > > > > };
> > > > >
> > > > > You can see there's no STATUS bit there since it is emulated by Qemu.
> > > > >
> > > >
> > > > Ok now I get what you mean, and yes we may modify the patches in that direction.
> > > >
> > > > But if we go then we need to modify how qemu ack the features, because
> > > > the features that are not in vdpa_feature_bits are not acked to the
> > > > device. More on this later.
> > > >
> > > > > >
> > > > > > From what I see these are the only features that will be forwarded to
> > > > > > the guest as device_features. If it is not in the list, the feature
> > > > > > will be masked out,
> > > > >
> > > > > Only when there's no support for this feature from the vhost.
> > > > >
> > > > > > as if the device does not support it.
> > > > > >
> > > > > > So now _F_STATUS it was forwarded only if the device supports it. If
> > > > > > we remove it from bit_mask, it will never be offered to the guest. But
> > > > > > we want to offer it always, since we will need it for
> > > > > > _F_GUEST_ANNOUNCE.
> > > > > >
> > > > > > Things get more complex because we actually need to ack it back if the
> > > > > > device offers it, so the vdpa device can report link_down. We will
> > > > > > only emulate LINK_UP always in the case the device does not support
> > > > > > _F_STATUS.
> > > > > >
> > > > > > > So if we remove _F_STATUS, Qemu vhost code won't validate if
> > > > > > > vhost-vdpa device has this support:
> > > > > > >
> > > > > > > uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits,
> > > > > > >                             uint64_t features)
> > > > > > > {
> > > > > > >     const int *bit = feature_bits;
> > > > > > >     while (*bit != VHOST_INVALID_FEATURE_BIT) {
> > > > > > >         uint64_t bit_mask = (1ULL << *bit);
> > > > > > >         if (!(hdev->features & bit_mask)) {
> > > > > > >             features &= ~bit_mask;
> > > > > > >         }
> > > > > > >         bit++;
> > > > > > >     }
> > > > > > >     return features;
> > > > > > > }
> > > > > > >
> > > > > >
> > > > > > Now maybe I'm the one missing something, but why is this not done as a
> > > > > > masking directly?
> > > > >
> > > > > Not sure, the code has been there since day 0.
> > > > >
> > > > > But you can see from the code:
> > > > >
> > > > > 1) if STATUS is in feature_bits, we need validate the hdev->features
> > > > > and mask it if the vhost doesn't have the support
> > > > > 2) if STATUS is not, we don't do the check and driver may still see STATUS
> > > > >
> > > >
> > > > That's useful for _F_GUEST_ANNOUNCE, but we need to ack _F_STATUS for
> > > > the device if it supports it.
> > >
> > > Rethink about this, I don't see why ANNOUNCE depends on STATUS (spec
> > > doesn't say so).
> > >
> >
> > It is needed for the guest to read the status bit:
> > """
> > status only exists if VIRTIO_NET_F_STATUS is set. Two read-only bits
> > (for the driver) are currently defined for the status field:
> > VIRTIO_NET_S_LINK_UP and VIRTIO_NET_S_ANNOUNCE.
> > """
> >
> > A change on the standard could be possible, like "status only exists
> > if VIRTIO_NET_F_STATUS or VIRTIO_NET_F_GUEST_ANNOUNCE is set".
> > However, Linux drivers already expect _F_STATUS to read _S_ANNOUNCE
> > and to emulate _F_STATUS in case the device doesn't support it should
> > not be a big deal in my opinion.
>
> RIght, so I think we need a spec patch to clarify the dependency,
> currently, spec said ANNOUNCE depends on CTRL_VQ.
>

Would it be enough to expand it under the "Feature bit requirements" section?

> >
> > > > QEMU cannot detect by itself when the
> > > > link is not up. I think that setting unconditionally
> > > > VIRTIO_NET_S_LINK_UP is actually a regression, since the guest cannot
> > > > detect the link down that way.
> > >
> > > I think the idea is to still read status from config if the device
> > > supports this.
> > >
> >
> > Yes, that's my point. If I delete it from vdpa_feature_bits, it will
> > not be acked to the device, so we cannot read status from the device.
> >
> > > >
> > > > To enable _F_STATUS unconditionally is only done in the case the
> > > > device does not support it, because its emulation is very easy. That
> > > > way we support _F_GUEST_ANNOUNCE in all cases without device's
> > > > cooperation.
> > > >
> > > > Having said that, should we go the opposite route and ack _F_STATE as
> > > > long as the device supports it? As an advantage, all backends should
> > > > support that at this moment, isn't it?
> > >
> > > So I think the method used in this patch is fine, but I wonder if it's
> > > better to move it to the vhost layer instead of doing it in vhost_net
> > > since we do the features validation there. We probably need another
> > > table as input for get/set features there?
> > >
> >
> > We can discuss how to do it for sure. But as you pointed out,
> > vhost_net and virtio_net already modify the features received from the
> > devices, so it makes sense to me to modify the features set by the
> > guest.
> >
> > The problem is that we need to transmit to vhost when ack _F_STATUS
> > and when not. The first proposal was to add a new member of vhost_vdpa
> > but this is not optimal.
> >
> > If we add a new table it should be a static const one, and vhost_vdpa
> > should have a pointer to it, isn't it?
>
> Yes.
>
> > Something like features that
> > are emulated by qemu so they must be offered always to the guest?
>
> Kind of, actually it should be the features:
>
> 1) could be always seen by guest
> 2) when vhost device have this feature, use that
> 3) when vhost device doesn't have this feature, emulate one
>

I'm fine with that approach, but restricting the changes to either
vhost_net or virtio_net makes more sense to me. The net config space
interception goes to virtio_net anyway, not to vhost-vdpa.

I'll try to prepare the patches with a new array.

> But a question still, is there a vDPA parent that can't do _F_STATUS
> now (if not, we probably don't need to bother now).
>

Only mlx have _F_STATUS at this moment.

If I understand you correctly, you are proposing not to emulate
_F_STATUS if the device does not support it? To emulate _F_STATUS is
not a big deal emulating _F_GUEST_ANNOUNCE on top anyway.

Thanks!

> Thanks
>
> >
> > Thanks!
> >
> > > Thanks
> > >
> > > >
> > > > Thanks!
> > > >
> > > >
> > > >
> > > >
> > > > > Thanks
> > > > >
> > > > > >
> > > > > > Instead of making feature_bits an array of ints, to declare it as a
> > > > > > uint64_t with the valid feature bits and simply return features &
> > > > > > feature_bits.
> > > > > >
> > > > > > Thanks!
> > > > > >
> > > > > > > Thanks
> > > > > > >
> > > > > > >
> > > > > > >
> > > > > > > >
> > > > > > > > The goal with this patch series is to let the guest access the status
> > > > > > > > always, even if the device doesn't support _F_STATUS.
> > > > > > > >
> > > > > > > > > Thanks
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > > ---
> > > > > > > > > >   include/net/vhost-vdpa.h |  1 +
> > > > > > > > > >   hw/net/vhost_net.c       | 16 ++++++++++++++--
> > > > > > > > > >   net/vhost-vdpa.c         |  3 +++
> > > > > > > > > >   3 files changed, 18 insertions(+), 2 deletions(-)
> > > > > > > > > >
> > > > > > > > > > diff --git a/include/net/vhost-vdpa.h b/include/net/vhost-vdpa.h
> > > > > > > > > > index b81f9a6f2a..cfbcce6427 100644
> > > > > > > > > > --- a/include/net/vhost-vdpa.h
> > > > > > > > > > +++ b/include/net/vhost-vdpa.h
> > > > > > > > > > @@ -17,5 +17,6 @@
> > > > > > > > > >   struct vhost_net *vhost_vdpa_get_vhost_net(NetClientState *nc);
> > > > > > > > > >
> > > > > > > > > >   extern const int vdpa_feature_bits[];
> > > > > > > > > > +extern const uint64_t vhost_vdpa_net_added_feature_bits;
> > > > > > > > > >
> > > > > > > > > >   #endif /* VHOST_VDPA_H */
> > > > > > > > > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > > > > > > > > > index d28f8b974b..7c15cc6e8f 100644
> > > > > > > > > > --- a/hw/net/vhost_net.c
> > > > > > > > > > +++ b/hw/net/vhost_net.c
> > > > > > > > > > @@ -109,10 +109,22 @@ static const int *vhost_net_get_feature_bits(struct vhost_net *net)
> > > > > > > > > >       return feature_bits;
> > > > > > > > > >   }
> > > > > > > > > >
> > > > > > > > > > +static uint64_t vhost_net_add_feature_bits(struct vhost_net *net)
> > > > > > > > > > +{
> > > > > > > > > > +    if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
> > > > > > > > > > +        return vhost_vdpa_net_added_feature_bits;
> > > > > > > > > > +    }
> > > > > > > > > > +
> > > > > > > > > > +    return 0;
> > > > > > > > > > +}
> > > > > > > > > > +
> > > > > > > > > >   uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
> > > > > > > > > >   {
> > > > > > > > > > -    return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net),
> > > > > > > > > > -            features);
> > > > > > > > > > +    uint64_t ret = vhost_get_features(&net->dev,
> > > > > > > > > > +                                      vhost_net_get_feature_bits(net),
> > > > > > > > > > +                                      features);
> > > > > > > > > > +
> > > > > > > > > > +    return ret | vhost_net_add_feature_bits(net);
> > > > > > > > > >   }
> > > > > > > > > >   int vhost_net_get_config(struct vhost_net *net,  uint8_t *config,
> > > > > > > > > >                            uint32_t config_len)
> > > > > > > > > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > > > > > > > > > index 6d64000202..24d2857593 100644
> > > > > > > > > > --- a/net/vhost-vdpa.c
> > > > > > > > > > +++ b/net/vhost-vdpa.c
> > > > > > > > > > @@ -99,6 +99,9 @@ static const uint64_t vdpa_svq_device_features =
> > > > > > > > > >       BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
> > > > > > > > > >       BIT_ULL(VIRTIO_NET_F_STANDBY);
> > > > > > > > > >
> > > > > > > > > > +const uint64_t vhost_vdpa_net_added_feature_bits =
> > > > > > > > > > +    BIT_ULL(VIRTIO_NET_F_STATUS);
> > > > > > > > > > +
> > > > > > > > > >   VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
> > > > > > > > > >   {
> > > > > > > > > >       VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> > > > > > > > >
> > > > > > > >
> > > > > > >
> > > > > >
> > > > >
> > > >
> > >
> >
>
Jason Wang Nov. 7, 2022, 7:51 a.m. UTC | #11
On Thu, Nov 3, 2022 at 4:12 PM Eugenio Perez Martin <eperezma@redhat.com> wrote:
>
> On Thu, Nov 3, 2022 at 4:21 AM Jason Wang <jasowang@redhat.com> wrote:
> >
> > On Wed, Nov 2, 2022 at 7:19 PM Eugenio Perez Martin <eperezma@redhat.com> wrote:
> > >
> > > On Tue, Nov 1, 2022 at 9:10 AM Jason Wang <jasowang@redhat.com> wrote:
> > > >
> > > > On Fri, Oct 28, 2022 at 5:30 PM Eugenio Perez Martin
> > > > <eperezma@redhat.com> wrote:
> > > > >
> > > > > On Fri, Oct 28, 2022 at 3:59 AM Jason Wang <jasowang@redhat.com> wrote:
> > > > > >
> > > > > > On Thu, Oct 27, 2022 at 6:18 PM Eugenio Perez Martin
> > > > > > <eperezma@redhat.com> wrote:
> > > > > > >
> > > > > > > On Thu, Oct 27, 2022 at 8:54 AM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > >
> > > > > > > > On Thu, Oct 27, 2022 at 2:47 PM Eugenio Perez Martin
> > > > > > > > <eperezma@redhat.com> wrote:
> > > > > > > > >
> > > > > > > > > On Thu, Oct 27, 2022 at 6:32 AM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > 在 2022/10/26 17:53, Eugenio Pérez 写道:
> > > > > > > > > > > Now that qemu can handle and emulate it if the vdpa backend does not
> > > > > > > > > > > support it we can offer it always.
> > > > > > > > > > >
> > > > > > > > > > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > I may miss something but isn't more easier to simply remove the
> > > > > > > > > > _F_STATUS from vdpa_feature_bits[]?
> > > > > > > > > >
> > > > > > > > >
> > > > > > > > > How is that? if we remove it, the guest cannot ack it so it cannot
> > > > > > > > > access the net status, isn't it?
> > > > > > > >
> > > > > > > > My understanding is that the bits stored in the vdpa_feature_bits[]
> > > > > > > > are the features that must be explicitly supported by the vhost
> > > > > > > > device.
> > > > > > >
> > > > > > > (Non English native here, so maybe I don't get what you mean :) ) The
> > > > > > > device may not support them. net simulator lacks some of them
> > > > > > > actually, and it works.
> > > > > >
> > > > > > Speaking too fast, I think I meant that, if the bit doesn't belong to
> > > > > > vdpa_feature_bits[], it is assumed to be supported by the Qemu without
> > > > > > the support of the vhost. So Qemu won't even try to validate if vhost
> > > > > > has this support. E.g for vhost-net, we only have:
> > > > > >
> > > > > > static const int kernel_feature_bits[] = {
> > > > > >     VIRTIO_F_NOTIFY_ON_EMPTY,
> > > > > >     VIRTIO_RING_F_INDIRECT_DESC,
> > > > > >     VIRTIO_RING_F_EVENT_IDX,
> > > > > >     VIRTIO_NET_F_MRG_RXBUF,
> > > > > >     VIRTIO_F_VERSION_1,
> > > > > >     VIRTIO_NET_F_MTU,
> > > > > >     VIRTIO_F_IOMMU_PLATFORM,
> > > > > >     VIRTIO_F_RING_PACKED,
> > > > > >     VIRTIO_NET_F_HASH_REPORT,
> > > > > >     VHOST_INVALID_FEATURE_BIT
> > > > > > };
> > > > > >
> > > > > > You can see there's no STATUS bit there since it is emulated by Qemu.
> > > > > >
> > > > >
> > > > > Ok now I get what you mean, and yes we may modify the patches in that direction.
> > > > >
> > > > > But if we go then we need to modify how qemu ack the features, because
> > > > > the features that are not in vdpa_feature_bits are not acked to the
> > > > > device. More on this later.
> > > > >
> > > > > > >
> > > > > > > From what I see these are the only features that will be forwarded to
> > > > > > > the guest as device_features. If it is not in the list, the feature
> > > > > > > will be masked out,
> > > > > >
> > > > > > Only when there's no support for this feature from the vhost.
> > > > > >
> > > > > > > as if the device does not support it.
> > > > > > >
> > > > > > > So now _F_STATUS it was forwarded only if the device supports it. If
> > > > > > > we remove it from bit_mask, it will never be offered to the guest. But
> > > > > > > we want to offer it always, since we will need it for
> > > > > > > _F_GUEST_ANNOUNCE.
> > > > > > >
> > > > > > > Things get more complex because we actually need to ack it back if the
> > > > > > > device offers it, so the vdpa device can report link_down. We will
> > > > > > > only emulate LINK_UP always in the case the device does not support
> > > > > > > _F_STATUS.
> > > > > > >
> > > > > > > > So if we remove _F_STATUS, Qemu vhost code won't validate if
> > > > > > > > vhost-vdpa device has this support:
> > > > > > > >
> > > > > > > > uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits,
> > > > > > > >                             uint64_t features)
> > > > > > > > {
> > > > > > > >     const int *bit = feature_bits;
> > > > > > > >     while (*bit != VHOST_INVALID_FEATURE_BIT) {
> > > > > > > >         uint64_t bit_mask = (1ULL << *bit);
> > > > > > > >         if (!(hdev->features & bit_mask)) {
> > > > > > > >             features &= ~bit_mask;
> > > > > > > >         }
> > > > > > > >         bit++;
> > > > > > > >     }
> > > > > > > >     return features;
> > > > > > > > }
> > > > > > > >
> > > > > > >
> > > > > > > Now maybe I'm the one missing something, but why is this not done as a
> > > > > > > masking directly?
> > > > > >
> > > > > > Not sure, the code has been there since day 0.
> > > > > >
> > > > > > But you can see from the code:
> > > > > >
> > > > > > 1) if STATUS is in feature_bits, we need validate the hdev->features
> > > > > > and mask it if the vhost doesn't have the support
> > > > > > 2) if STATUS is not, we don't do the check and driver may still see STATUS
> > > > > >
> > > > >
> > > > > That's useful for _F_GUEST_ANNOUNCE, but we need to ack _F_STATUS for
> > > > > the device if it supports it.
> > > >
> > > > Rethink about this, I don't see why ANNOUNCE depends on STATUS (spec
> > > > doesn't say so).
> > > >
> > >
> > > It is needed for the guest to read the status bit:
> > > """
> > > status only exists if VIRTIO_NET_F_STATUS is set. Two read-only bits
> > > (for the driver) are currently defined for the status field:
> > > VIRTIO_NET_S_LINK_UP and VIRTIO_NET_S_ANNOUNCE.
> > > """
> > >
> > > A change on the standard could be possible, like "status only exists
> > > if VIRTIO_NET_F_STATUS or VIRTIO_NET_F_GUEST_ANNOUNCE is set".
> > > However, Linux drivers already expect _F_STATUS to read _S_ANNOUNCE
> > > and to emulate _F_STATUS in case the device doesn't support it should
> > > not be a big deal in my opinion.
> >
> > RIght, so I think we need a spec patch to clarify the dependency,
> > currently, spec said ANNOUNCE depends on CTRL_VQ.
> >
>
> Would it be enough to expand it under the "Feature bit requirements" section?
>

Yes.

> > >
> > > > > QEMU cannot detect by itself when the
> > > > > link is not up. I think that setting unconditionally
> > > > > VIRTIO_NET_S_LINK_UP is actually a regression, since the guest cannot
> > > > > detect the link down that way.
> > > >
> > > > I think the idea is to still read status from config if the device
> > > > supports this.
> > > >
> > >
> > > Yes, that's my point. If I delete it from vdpa_feature_bits, it will
> > > not be acked to the device, so we cannot read status from the device.
> > >
> > > > >
> > > > > To enable _F_STATUS unconditionally is only done in the case the
> > > > > device does not support it, because its emulation is very easy. That
> > > > > way we support _F_GUEST_ANNOUNCE in all cases without device's
> > > > > cooperation.
> > > > >
> > > > > Having said that, should we go the opposite route and ack _F_STATE as
> > > > > long as the device supports it? As an advantage, all backends should
> > > > > support that at this moment, isn't it?
> > > >
> > > > So I think the method used in this patch is fine, but I wonder if it's
> > > > better to move it to the vhost layer instead of doing it in vhost_net
> > > > since we do the features validation there. We probably need another
> > > > table as input for get/set features there?
> > > >
> > >
> > > We can discuss how to do it for sure. But as you pointed out,
> > > vhost_net and virtio_net already modify the features received from the
> > > devices, so it makes sense to me to modify the features set by the
> > > guest.
> > >
> > > The problem is that we need to transmit to vhost when ack _F_STATUS
> > > and when not. The first proposal was to add a new member of vhost_vdpa
> > > but this is not optimal.
> > >
> > > If we add a new table it should be a static const one, and vhost_vdpa
> > > should have a pointer to it, isn't it?
> >
> > Yes.
> >
> > > Something like features that
> > > are emulated by qemu so they must be offered always to the guest?
> >
> > Kind of, actually it should be the features:
> >
> > 1) could be always seen by guest
> > 2) when vhost device have this feature, use that
> > 3) when vhost device doesn't have this feature, emulate one
> >
>
> I'm fine with that approach, but restricting the changes to either
> vhost_net or virtio_net makes more sense to me. The net config space
> interception goes to virtio_net anyway, not to vhost-vdpa.
>
> I'll try to prepare the patches with a new array.

I'm ok if Michael is ok with this. I think it might help if we add a
comment in general vhost code like vhost_get_features(), then readers
know that each individual vhost implementation can mediate the feature
by their own.

>
> > But a question still, is there a vDPA parent that can't do _F_STATUS
> > now (if not, we probably don't need to bother now).
> >
>
> Only mlx have _F_STATUS at this moment.
>
> If I understand you correctly, you are proposing not to emulate
> _F_STATUS if the device does not support it? To emulate _F_STATUS is
> not a big deal emulating _F_GUEST_ANNOUNCE on top anyway.

I meant if most of the vDPA parent supports _F_STATUS, there's
probably no need to emulate it:

1) simulator, not hard to add status
2) vp_vdpa, should support _F_STATUS
3) IFCVF, I guess it should have this support, Ling Shan, can you clarify this?
4) ENI, not sure but anyhow it's a legacy parent

Thanks

>
> Thanks!
>
> > Thanks
> >
> > >
> > > Thanks!
> > >
> > > > Thanks
> > > >
> > > > >
> > > > > Thanks!
> > > > >
> > > > >
> > > > >
> > > > >
> > > > > > Thanks
> > > > > >
> > > > > > >
> > > > > > > Instead of making feature_bits an array of ints, to declare it as a
> > > > > > > uint64_t with the valid feature bits and simply return features &
> > > > > > > feature_bits.
> > > > > > >
> > > > > > > Thanks!
> > > > > > >
> > > > > > > > Thanks
> > > > > > > >
> > > > > > > >
> > > > > > > >
> > > > > > > > >
> > > > > > > > > The goal with this patch series is to let the guest access the status
> > > > > > > > > always, even if the device doesn't support _F_STATUS.
> > > > > > > > >
> > > > > > > > > > Thanks
> > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > > ---
> > > > > > > > > > >   include/net/vhost-vdpa.h |  1 +
> > > > > > > > > > >   hw/net/vhost_net.c       | 16 ++++++++++++++--
> > > > > > > > > > >   net/vhost-vdpa.c         |  3 +++
> > > > > > > > > > >   3 files changed, 18 insertions(+), 2 deletions(-)
> > > > > > > > > > >
> > > > > > > > > > > diff --git a/include/net/vhost-vdpa.h b/include/net/vhost-vdpa.h
> > > > > > > > > > > index b81f9a6f2a..cfbcce6427 100644
> > > > > > > > > > > --- a/include/net/vhost-vdpa.h
> > > > > > > > > > > +++ b/include/net/vhost-vdpa.h
> > > > > > > > > > > @@ -17,5 +17,6 @@
> > > > > > > > > > >   struct vhost_net *vhost_vdpa_get_vhost_net(NetClientState *nc);
> > > > > > > > > > >
> > > > > > > > > > >   extern const int vdpa_feature_bits[];
> > > > > > > > > > > +extern const uint64_t vhost_vdpa_net_added_feature_bits;
> > > > > > > > > > >
> > > > > > > > > > >   #endif /* VHOST_VDPA_H */
> > > > > > > > > > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > > > > > > > > > > index d28f8b974b..7c15cc6e8f 100644
> > > > > > > > > > > --- a/hw/net/vhost_net.c
> > > > > > > > > > > +++ b/hw/net/vhost_net.c
> > > > > > > > > > > @@ -109,10 +109,22 @@ static const int *vhost_net_get_feature_bits(struct vhost_net *net)
> > > > > > > > > > >       return feature_bits;
> > > > > > > > > > >   }
> > > > > > > > > > >
> > > > > > > > > > > +static uint64_t vhost_net_add_feature_bits(struct vhost_net *net)
> > > > > > > > > > > +{
> > > > > > > > > > > +    if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
> > > > > > > > > > > +        return vhost_vdpa_net_added_feature_bits;
> > > > > > > > > > > +    }
> > > > > > > > > > > +
> > > > > > > > > > > +    return 0;
> > > > > > > > > > > +}
> > > > > > > > > > > +
> > > > > > > > > > >   uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
> > > > > > > > > > >   {
> > > > > > > > > > > -    return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net),
> > > > > > > > > > > -            features);
> > > > > > > > > > > +    uint64_t ret = vhost_get_features(&net->dev,
> > > > > > > > > > > +                                      vhost_net_get_feature_bits(net),
> > > > > > > > > > > +                                      features);
> > > > > > > > > > > +
> > > > > > > > > > > +    return ret | vhost_net_add_feature_bits(net);
> > > > > > > > > > >   }
> > > > > > > > > > >   int vhost_net_get_config(struct vhost_net *net,  uint8_t *config,
> > > > > > > > > > >                            uint32_t config_len)
> > > > > > > > > > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > > > > > > > > > > index 6d64000202..24d2857593 100644
> > > > > > > > > > > --- a/net/vhost-vdpa.c
> > > > > > > > > > > +++ b/net/vhost-vdpa.c
> > > > > > > > > > > @@ -99,6 +99,9 @@ static const uint64_t vdpa_svq_device_features =
> > > > > > > > > > >       BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
> > > > > > > > > > >       BIT_ULL(VIRTIO_NET_F_STANDBY);
> > > > > > > > > > >
> > > > > > > > > > > +const uint64_t vhost_vdpa_net_added_feature_bits =
> > > > > > > > > > > +    BIT_ULL(VIRTIO_NET_F_STATUS);
> > > > > > > > > > > +
> > > > > > > > > > >   VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
> > > > > > > > > > >   {
> > > > > > > > > > >       VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> > > > > > > > > >
> > > > > > > > >
> > > > > > > >
> > > > > > >
> > > > > >
> > > > >
> > > >
> > >
> >
>
Eugenio Perez Martin Nov. 7, 2022, 9:25 a.m. UTC | #12
On Mon, Nov 7, 2022 at 8:51 AM Jason Wang <jasowang@redhat.com> wrote:
>
> On Thu, Nov 3, 2022 at 4:12 PM Eugenio Perez Martin <eperezma@redhat.com> wrote:
> >
> > On Thu, Nov 3, 2022 at 4:21 AM Jason Wang <jasowang@redhat.com> wrote:
> > >
> > > On Wed, Nov 2, 2022 at 7:19 PM Eugenio Perez Martin <eperezma@redhat.com> wrote:
> > > >
> > > > On Tue, Nov 1, 2022 at 9:10 AM Jason Wang <jasowang@redhat.com> wrote:
> > > > >
> > > > > On Fri, Oct 28, 2022 at 5:30 PM Eugenio Perez Martin
> > > > > <eperezma@redhat.com> wrote:
> > > > > >
> > > > > > On Fri, Oct 28, 2022 at 3:59 AM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > >
> > > > > > > On Thu, Oct 27, 2022 at 6:18 PM Eugenio Perez Martin
> > > > > > > <eperezma@redhat.com> wrote:
> > > > > > > >
> > > > > > > > On Thu, Oct 27, 2022 at 8:54 AM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > >
> > > > > > > > > On Thu, Oct 27, 2022 at 2:47 PM Eugenio Perez Martin
> > > > > > > > > <eperezma@redhat.com> wrote:
> > > > > > > > > >
> > > > > > > > > > On Thu, Oct 27, 2022 at 6:32 AM Jason Wang <jasowang@redhat.com> wrote:
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > 在 2022/10/26 17:53, Eugenio Pérez 写道:
> > > > > > > > > > > > Now that qemu can handle and emulate it if the vdpa backend does not
> > > > > > > > > > > > support it we can offer it always.
> > > > > > > > > > > >
> > > > > > > > > > > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > I may miss something but isn't more easier to simply remove the
> > > > > > > > > > > _F_STATUS from vdpa_feature_bits[]?
> > > > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > How is that? if we remove it, the guest cannot ack it so it cannot
> > > > > > > > > > access the net status, isn't it?
> > > > > > > > >
> > > > > > > > > My understanding is that the bits stored in the vdpa_feature_bits[]
> > > > > > > > > are the features that must be explicitly supported by the vhost
> > > > > > > > > device.
> > > > > > > >
> > > > > > > > (Non English native here, so maybe I don't get what you mean :) ) The
> > > > > > > > device may not support them. net simulator lacks some of them
> > > > > > > > actually, and it works.
> > > > > > >
> > > > > > > Speaking too fast, I think I meant that, if the bit doesn't belong to
> > > > > > > vdpa_feature_bits[], it is assumed to be supported by the Qemu without
> > > > > > > the support of the vhost. So Qemu won't even try to validate if vhost
> > > > > > > has this support. E.g for vhost-net, we only have:
> > > > > > >
> > > > > > > static const int kernel_feature_bits[] = {
> > > > > > >     VIRTIO_F_NOTIFY_ON_EMPTY,
> > > > > > >     VIRTIO_RING_F_INDIRECT_DESC,
> > > > > > >     VIRTIO_RING_F_EVENT_IDX,
> > > > > > >     VIRTIO_NET_F_MRG_RXBUF,
> > > > > > >     VIRTIO_F_VERSION_1,
> > > > > > >     VIRTIO_NET_F_MTU,
> > > > > > >     VIRTIO_F_IOMMU_PLATFORM,
> > > > > > >     VIRTIO_F_RING_PACKED,
> > > > > > >     VIRTIO_NET_F_HASH_REPORT,
> > > > > > >     VHOST_INVALID_FEATURE_BIT
> > > > > > > };
> > > > > > >
> > > > > > > You can see there's no STATUS bit there since it is emulated by Qemu.
> > > > > > >
> > > > > >
> > > > > > Ok now I get what you mean, and yes we may modify the patches in that direction.
> > > > > >
> > > > > > But if we go then we need to modify how qemu ack the features, because
> > > > > > the features that are not in vdpa_feature_bits are not acked to the
> > > > > > device. More on this later.
> > > > > >
> > > > > > > >
> > > > > > > > From what I see these are the only features that will be forwarded to
> > > > > > > > the guest as device_features. If it is not in the list, the feature
> > > > > > > > will be masked out,
> > > > > > >
> > > > > > > Only when there's no support for this feature from the vhost.
> > > > > > >
> > > > > > > > as if the device does not support it.
> > > > > > > >
> > > > > > > > So now _F_STATUS it was forwarded only if the device supports it. If
> > > > > > > > we remove it from bit_mask, it will never be offered to the guest. But
> > > > > > > > we want to offer it always, since we will need it for
> > > > > > > > _F_GUEST_ANNOUNCE.
> > > > > > > >
> > > > > > > > Things get more complex because we actually need to ack it back if the
> > > > > > > > device offers it, so the vdpa device can report link_down. We will
> > > > > > > > only emulate LINK_UP always in the case the device does not support
> > > > > > > > _F_STATUS.
> > > > > > > >
> > > > > > > > > So if we remove _F_STATUS, Qemu vhost code won't validate if
> > > > > > > > > vhost-vdpa device has this support:
> > > > > > > > >
> > > > > > > > > uint64_t vhost_get_features(struct vhost_dev *hdev, const int *feature_bits,
> > > > > > > > >                             uint64_t features)
> > > > > > > > > {
> > > > > > > > >     const int *bit = feature_bits;
> > > > > > > > >     while (*bit != VHOST_INVALID_FEATURE_BIT) {
> > > > > > > > >         uint64_t bit_mask = (1ULL << *bit);
> > > > > > > > >         if (!(hdev->features & bit_mask)) {
> > > > > > > > >             features &= ~bit_mask;
> > > > > > > > >         }
> > > > > > > > >         bit++;
> > > > > > > > >     }
> > > > > > > > >     return features;
> > > > > > > > > }
> > > > > > > > >
> > > > > > > >
> > > > > > > > Now maybe I'm the one missing something, but why is this not done as a
> > > > > > > > masking directly?
> > > > > > >
> > > > > > > Not sure, the code has been there since day 0.
> > > > > > >
> > > > > > > But you can see from the code:
> > > > > > >
> > > > > > > 1) if STATUS is in feature_bits, we need validate the hdev->features
> > > > > > > and mask it if the vhost doesn't have the support
> > > > > > > 2) if STATUS is not, we don't do the check and driver may still see STATUS
> > > > > > >
> > > > > >
> > > > > > That's useful for _F_GUEST_ANNOUNCE, but we need to ack _F_STATUS for
> > > > > > the device if it supports it.
> > > > >
> > > > > Rethink about this, I don't see why ANNOUNCE depends on STATUS (spec
> > > > > doesn't say so).
> > > > >
> > > >
> > > > It is needed for the guest to read the status bit:
> > > > """
> > > > status only exists if VIRTIO_NET_F_STATUS is set. Two read-only bits
> > > > (for the driver) are currently defined for the status field:
> > > > VIRTIO_NET_S_LINK_UP and VIRTIO_NET_S_ANNOUNCE.
> > > > """
> > > >
> > > > A change on the standard could be possible, like "status only exists
> > > > if VIRTIO_NET_F_STATUS or VIRTIO_NET_F_GUEST_ANNOUNCE is set".
> > > > However, Linux drivers already expect _F_STATUS to read _S_ANNOUNCE
> > > > and to emulate _F_STATUS in case the device doesn't support it should
> > > > not be a big deal in my opinion.
> > >
> > > RIght, so I think we need a spec patch to clarify the dependency,
> > > currently, spec said ANNOUNCE depends on CTRL_VQ.
> > >
> >
> > Would it be enough to expand it under the "Feature bit requirements" section?
> >
>
> Yes.
>
> > > >
> > > > > > QEMU cannot detect by itself when the
> > > > > > link is not up. I think that setting unconditionally
> > > > > > VIRTIO_NET_S_LINK_UP is actually a regression, since the guest cannot
> > > > > > detect the link down that way.
> > > > >
> > > > > I think the idea is to still read status from config if the device
> > > > > supports this.
> > > > >
> > > >
> > > > Yes, that's my point. If I delete it from vdpa_feature_bits, it will
> > > > not be acked to the device, so we cannot read status from the device.
> > > >
> > > > > >
> > > > > > To enable _F_STATUS unconditionally is only done in the case the
> > > > > > device does not support it, because its emulation is very easy. That
> > > > > > way we support _F_GUEST_ANNOUNCE in all cases without device's
> > > > > > cooperation.
> > > > > >
> > > > > > Having said that, should we go the opposite route and ack _F_STATE as
> > > > > > long as the device supports it? As an advantage, all backends should
> > > > > > support that at this moment, isn't it?
> > > > >
> > > > > So I think the method used in this patch is fine, but I wonder if it's
> > > > > better to move it to the vhost layer instead of doing it in vhost_net
> > > > > since we do the features validation there. We probably need another
> > > > > table as input for get/set features there?
> > > > >
> > > >
> > > > We can discuss how to do it for sure. But as you pointed out,
> > > > vhost_net and virtio_net already modify the features received from the
> > > > devices, so it makes sense to me to modify the features set by the
> > > > guest.
> > > >
> > > > The problem is that we need to transmit to vhost when ack _F_STATUS
> > > > and when not. The first proposal was to add a new member of vhost_vdpa
> > > > but this is not optimal.
> > > >
> > > > If we add a new table it should be a static const one, and vhost_vdpa
> > > > should have a pointer to it, isn't it?
> > >
> > > Yes.
> > >
> > > > Something like features that
> > > > are emulated by qemu so they must be offered always to the guest?
> > >
> > > Kind of, actually it should be the features:
> > >
> > > 1) could be always seen by guest
> > > 2) when vhost device have this feature, use that
> > > 3) when vhost device doesn't have this feature, emulate one
> > >
> >
> > I'm fine with that approach, but restricting the changes to either
> > vhost_net or virtio_net makes more sense to me. The net config space
> > interception goes to virtio_net anyway, not to vhost-vdpa.
> >
> > I'll try to prepare the patches with a new array.
>
> I'm ok if Michael is ok with this. I think it might help if we add a
> comment in general vhost code like vhost_get_features(), then readers
> know that each individual vhost implementation can mediate the feature
> by their own.
>
> >
> > > But a question still, is there a vDPA parent that can't do _F_STATUS
> > > now (if not, we probably don't need to bother now).
> > >
> >
> > Only mlx have _F_STATUS at this moment.
> >
> > If I understand you correctly, you are proposing not to emulate
> > _F_STATUS if the device does not support it? To emulate _F_STATUS is
> > not a big deal emulating _F_GUEST_ANNOUNCE on top anyway.
>
> I meant if most of the vDPA parent supports _F_STATUS, there's
> probably no need to emulate it:
>
> 1) simulator, not hard to add status
> 2) vp_vdpa, should support _F_STATUS
> 3) IFCVF, I guess it should have this support, Ling Shan, can you clarify this?
> 4) ENI, not sure but anyhow it's a legacy parent
>

If this is the case then yes, it might make more sense to add
_F_STATUS support to at least vdpa simulator.

We can always emulate it in qemu later for sure.

Thanks!

> Thanks
>
> >
> > Thanks!
> >
> > > Thanks
> > >
> > > >
> > > > Thanks!
> > > >
> > > > > Thanks
> > > > >
> > > > > >
> > > > > > Thanks!
> > > > > >
> > > > > >
> > > > > >
> > > > > >
> > > > > > > Thanks
> > > > > > >
> > > > > > > >
> > > > > > > > Instead of making feature_bits an array of ints, to declare it as a
> > > > > > > > uint64_t with the valid feature bits and simply return features &
> > > > > > > > feature_bits.
> > > > > > > >
> > > > > > > > Thanks!
> > > > > > > >
> > > > > > > > > Thanks
> > > > > > > > >
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > >
> > > > > > > > > > The goal with this patch series is to let the guest access the status
> > > > > > > > > > always, even if the device doesn't support _F_STATUS.
> > > > > > > > > >
> > > > > > > > > > > Thanks
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > > ---
> > > > > > > > > > > >   include/net/vhost-vdpa.h |  1 +
> > > > > > > > > > > >   hw/net/vhost_net.c       | 16 ++++++++++++++--
> > > > > > > > > > > >   net/vhost-vdpa.c         |  3 +++
> > > > > > > > > > > >   3 files changed, 18 insertions(+), 2 deletions(-)
> > > > > > > > > > > >
> > > > > > > > > > > > diff --git a/include/net/vhost-vdpa.h b/include/net/vhost-vdpa.h
> > > > > > > > > > > > index b81f9a6f2a..cfbcce6427 100644
> > > > > > > > > > > > --- a/include/net/vhost-vdpa.h
> > > > > > > > > > > > +++ b/include/net/vhost-vdpa.h
> > > > > > > > > > > > @@ -17,5 +17,6 @@
> > > > > > > > > > > >   struct vhost_net *vhost_vdpa_get_vhost_net(NetClientState *nc);
> > > > > > > > > > > >
> > > > > > > > > > > >   extern const int vdpa_feature_bits[];
> > > > > > > > > > > > +extern const uint64_t vhost_vdpa_net_added_feature_bits;
> > > > > > > > > > > >
> > > > > > > > > > > >   #endif /* VHOST_VDPA_H */
> > > > > > > > > > > > diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
> > > > > > > > > > > > index d28f8b974b..7c15cc6e8f 100644
> > > > > > > > > > > > --- a/hw/net/vhost_net.c
> > > > > > > > > > > > +++ b/hw/net/vhost_net.c
> > > > > > > > > > > > @@ -109,10 +109,22 @@ static const int *vhost_net_get_feature_bits(struct vhost_net *net)
> > > > > > > > > > > >       return feature_bits;
> > > > > > > > > > > >   }
> > > > > > > > > > > >
> > > > > > > > > > > > +static uint64_t vhost_net_add_feature_bits(struct vhost_net *net)
> > > > > > > > > > > > +{
> > > > > > > > > > > > +    if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
> > > > > > > > > > > > +        return vhost_vdpa_net_added_feature_bits;
> > > > > > > > > > > > +    }
> > > > > > > > > > > > +
> > > > > > > > > > > > +    return 0;
> > > > > > > > > > > > +}
> > > > > > > > > > > > +
> > > > > > > > > > > >   uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
> > > > > > > > > > > >   {
> > > > > > > > > > > > -    return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net),
> > > > > > > > > > > > -            features);
> > > > > > > > > > > > +    uint64_t ret = vhost_get_features(&net->dev,
> > > > > > > > > > > > +                                      vhost_net_get_feature_bits(net),
> > > > > > > > > > > > +                                      features);
> > > > > > > > > > > > +
> > > > > > > > > > > > +    return ret | vhost_net_add_feature_bits(net);
> > > > > > > > > > > >   }
> > > > > > > > > > > >   int vhost_net_get_config(struct vhost_net *net,  uint8_t *config,
> > > > > > > > > > > >                            uint32_t config_len)
> > > > > > > > > > > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > > > > > > > > > > > index 6d64000202..24d2857593 100644
> > > > > > > > > > > > --- a/net/vhost-vdpa.c
> > > > > > > > > > > > +++ b/net/vhost-vdpa.c
> > > > > > > > > > > > @@ -99,6 +99,9 @@ static const uint64_t vdpa_svq_device_features =
> > > > > > > > > > > >       BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
> > > > > > > > > > > >       BIT_ULL(VIRTIO_NET_F_STANDBY);
> > > > > > > > > > > >
> > > > > > > > > > > > +const uint64_t vhost_vdpa_net_added_feature_bits =
> > > > > > > > > > > > +    BIT_ULL(VIRTIO_NET_F_STATUS);
> > > > > > > > > > > > +
> > > > > > > > > > > >   VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
> > > > > > > > > > > >   {
> > > > > > > > > > > >       VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
> > > > > > > > > > >
> > > > > > > > > >
> > > > > > > > >
> > > > > > > >
> > > > > > >
> > > > > >
> > > > >
> > > >
> > >
> >
>
diff mbox series

Patch

diff --git a/include/net/vhost-vdpa.h b/include/net/vhost-vdpa.h
index b81f9a6f2a..cfbcce6427 100644
--- a/include/net/vhost-vdpa.h
+++ b/include/net/vhost-vdpa.h
@@ -17,5 +17,6 @@ 
 struct vhost_net *vhost_vdpa_get_vhost_net(NetClientState *nc);
 
 extern const int vdpa_feature_bits[];
+extern const uint64_t vhost_vdpa_net_added_feature_bits;
 
 #endif /* VHOST_VDPA_H */
diff --git a/hw/net/vhost_net.c b/hw/net/vhost_net.c
index d28f8b974b..7c15cc6e8f 100644
--- a/hw/net/vhost_net.c
+++ b/hw/net/vhost_net.c
@@ -109,10 +109,22 @@  static const int *vhost_net_get_feature_bits(struct vhost_net *net)
     return feature_bits;
 }
 
+static uint64_t vhost_net_add_feature_bits(struct vhost_net *net)
+{
+    if (net->nc->info->type == NET_CLIENT_DRIVER_VHOST_VDPA) {
+        return vhost_vdpa_net_added_feature_bits;
+    }
+
+    return 0;
+}
+
 uint64_t vhost_net_get_features(struct vhost_net *net, uint64_t features)
 {
-    return vhost_get_features(&net->dev, vhost_net_get_feature_bits(net),
-            features);
+    uint64_t ret = vhost_get_features(&net->dev,
+                                      vhost_net_get_feature_bits(net),
+                                      features);
+
+    return ret | vhost_net_add_feature_bits(net);
 }
 int vhost_net_get_config(struct vhost_net *net,  uint8_t *config,
                          uint32_t config_len)
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 6d64000202..24d2857593 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -99,6 +99,9 @@  static const uint64_t vdpa_svq_device_features =
     BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
     BIT_ULL(VIRTIO_NET_F_STANDBY);
 
+const uint64_t vhost_vdpa_net_added_feature_bits =
+    BIT_ULL(VIRTIO_NET_F_STATUS);
+
 VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
 {
     VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);