diff mbox series

[RFC,v2,05/13] vdpa net: add migration blocker if cannot migrate cvq

Message ID 20230112172434.760850-6-eperezma@redhat.com (mailing list archive)
State New, archived
Headers show
Series Dinamycally switch to vhost shadow virtqueues at vdpa net migration | expand

Commit Message

Eugenio Perez Martin Jan. 12, 2023, 5:24 p.m. UTC
A vdpa net device must initialize with SVQ in order to be migratable,
and initialization code verifies conditions.  If the device is not
initialized with the x-svq parameter, it will not expose _F_LOG so vhost
sybsystem will block VM migration from its initialization.

Next patches change this. Net data VQs will be shadowed only at
migration time and vdpa net devices need to expose _F_LOG as long as it
can go to SVQ.

Since we don't know that at initialization time but at start, add an
independent blocker at CVQ.

Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
 net/vhost-vdpa.c | 35 +++++++++++++++++++++++++++++------
 1 file changed, 29 insertions(+), 6 deletions(-)

Comments

Jason Wang Jan. 13, 2023, 4:24 a.m. UTC | #1
在 2023/1/13 01:24, Eugenio Pérez 写道:
> A vdpa net device must initialize with SVQ in order to be migratable,
> and initialization code verifies conditions.  If the device is not
> initialized with the x-svq parameter, it will not expose _F_LOG so vhost
> sybsystem will block VM migration from its initialization.
>
> Next patches change this. Net data VQs will be shadowed only at
> migration time and vdpa net devices need to expose _F_LOG as long as it
> can go to SVQ.
>
> Since we don't know that at initialization time but at start, add an
> independent blocker at CVQ.
>
> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> ---
>   net/vhost-vdpa.c | 35 +++++++++++++++++++++++++++++------
>   1 file changed, 29 insertions(+), 6 deletions(-)
>
> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> index 631424d9c4..2ca93e850a 100644
> --- a/net/vhost-vdpa.c
> +++ b/net/vhost-vdpa.c
> @@ -26,12 +26,14 @@
>   #include <err.h>
>   #include "standard-headers/linux/virtio_net.h"
>   #include "monitor/monitor.h"
> +#include "migration/blocker.h"
>   #include "hw/virtio/vhost.h"
>   
>   /* Todo:need to add the multiqueue support here */
>   typedef struct VhostVDPAState {
>       NetClientState nc;
>       struct vhost_vdpa vhost_vdpa;
> +    Error *migration_blocker;


Any reason we can't use the mivration_blocker in vhost_dev structure?

I believe we don't need to wait until start to know we can't migrate.

Thanks


>       VHostNetState *vhost_net;
>   
>       /* Control commands shadow buffers */
> @@ -433,9 +435,15 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
>               g_strerror(errno), errno);
>           return -1;
>       }
> -    if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) ||
> -        !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
> -        return 0;
> +    if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) {
> +        error_setg(&s->migration_blocker,
> +                   "vdpa device %s does not support ASID",
> +                   nc->name);
> +        goto out;
> +    }
> +    if (!vhost_vdpa_net_valid_svq_features(v->dev->features,
> +                                           &s->migration_blocker)) {
> +        goto out;
>       }
>   
>       /*
> @@ -455,7 +463,10 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
>           }
>   
>           if (group == cvq_group) {
> -            return 0;
> +            error_setg(&s->migration_blocker,
> +                "vdpa %s vq %d group %"PRId64" is the same as cvq group "
> +                "%"PRId64, nc->name, i, group, cvq_group);
> +            goto out;
>           }
>       }
>   
> @@ -468,8 +479,15 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
>       s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID;
>   
>   out:
> -    if (!s->vhost_vdpa.shadow_vqs_enabled) {
> -        return 0;
> +    if (s->migration_blocker) {
> +        Error *errp = NULL;
> +        r = migrate_add_blocker(s->migration_blocker, &errp);
> +        if (unlikely(r != 0)) {
> +            g_clear_pointer(&s->migration_blocker, error_free);
> +            error_report_err(errp);
> +        }
> +
> +        return r;
>       }
>   
>       s0 = vhost_vdpa_net_first_nc_vdpa(s);
> @@ -513,6 +531,11 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
>           vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status);
>       }
>   
> +    if (s->migration_blocker) {
> +        migrate_del_blocker(s->migration_blocker);
> +        g_clear_pointer(&s->migration_blocker, error_free);
> +    }
> +
>       vhost_vdpa_net_client_stop(nc);
>   }
>
Eugenio Perez Martin Jan. 13, 2023, 7:46 a.m. UTC | #2
On Fri, Jan 13, 2023 at 5:25 AM Jason Wang <jasowang@redhat.com> wrote:
>
>
> 在 2023/1/13 01:24, Eugenio Pérez 写道:
> > A vdpa net device must initialize with SVQ in order to be migratable,
> > and initialization code verifies conditions.  If the device is not
> > initialized with the x-svq parameter, it will not expose _F_LOG so vhost
> > sybsystem will block VM migration from its initialization.
> >
> > Next patches change this. Net data VQs will be shadowed only at
> > migration time and vdpa net devices need to expose _F_LOG as long as it
> > can go to SVQ.
> >
> > Since we don't know that at initialization time but at start, add an
> > independent blocker at CVQ.
> >
> > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > ---
> >   net/vhost-vdpa.c | 35 +++++++++++++++++++++++++++++------
> >   1 file changed, 29 insertions(+), 6 deletions(-)
> >
> > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > index 631424d9c4..2ca93e850a 100644
> > --- a/net/vhost-vdpa.c
> > +++ b/net/vhost-vdpa.c
> > @@ -26,12 +26,14 @@
> >   #include <err.h>
> >   #include "standard-headers/linux/virtio_net.h"
> >   #include "monitor/monitor.h"
> > +#include "migration/blocker.h"
> >   #include "hw/virtio/vhost.h"
> >
> >   /* Todo:need to add the multiqueue support here */
> >   typedef struct VhostVDPAState {
> >       NetClientState nc;
> >       struct vhost_vdpa vhost_vdpa;
> > +    Error *migration_blocker;
>
>
> Any reason we can't use the mivration_blocker in vhost_dev structure?
>
> I believe we don't need to wait until start to know we can't migrate.
>

Device migratability also depends on features that the guest acks.

For example, if the device does not support ASID it can be migrated as
long as _F_CVQ is not acked.

Thanks!

>
> Thanks
>
>
> >       VHostNetState *vhost_net;
> >
> >       /* Control commands shadow buffers */
> > @@ -433,9 +435,15 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
> >               g_strerror(errno), errno);
> >           return -1;
> >       }
> > -    if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) ||
> > -        !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
> > -        return 0;
> > +    if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) {
> > +        error_setg(&s->migration_blocker,
> > +                   "vdpa device %s does not support ASID",
> > +                   nc->name);
> > +        goto out;
> > +    }
> > +    if (!vhost_vdpa_net_valid_svq_features(v->dev->features,
> > +                                           &s->migration_blocker)) {
> > +        goto out;
> >       }
> >
> >       /*
> > @@ -455,7 +463,10 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
> >           }
> >
> >           if (group == cvq_group) {
> > -            return 0;
> > +            error_setg(&s->migration_blocker,
> > +                "vdpa %s vq %d group %"PRId64" is the same as cvq group "
> > +                "%"PRId64, nc->name, i, group, cvq_group);
> > +            goto out;
> >           }
> >       }
> >
> > @@ -468,8 +479,15 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
> >       s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID;
> >
> >   out:
> > -    if (!s->vhost_vdpa.shadow_vqs_enabled) {
> > -        return 0;
> > +    if (s->migration_blocker) {
> > +        Error *errp = NULL;
> > +        r = migrate_add_blocker(s->migration_blocker, &errp);
> > +        if (unlikely(r != 0)) {
> > +            g_clear_pointer(&s->migration_blocker, error_free);
> > +            error_report_err(errp);
> > +        }
> > +
> > +        return r;
> >       }
> >
> >       s0 = vhost_vdpa_net_first_nc_vdpa(s);
> > @@ -513,6 +531,11 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
> >           vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status);
> >       }
> >
> > +    if (s->migration_blocker) {
> > +        migrate_del_blocker(s->migration_blocker);
> > +        g_clear_pointer(&s->migration_blocker, error_free);
> > +    }
> > +
> >       vhost_vdpa_net_client_stop(nc);
> >   }
> >
>
Jason Wang Jan. 16, 2023, 3:34 a.m. UTC | #3
在 2023/1/13 15:46, Eugenio Perez Martin 写道:
> On Fri, Jan 13, 2023 at 5:25 AM Jason Wang <jasowang@redhat.com> wrote:
>>
>> 在 2023/1/13 01:24, Eugenio Pérez 写道:
>>> A vdpa net device must initialize with SVQ in order to be migratable,
>>> and initialization code verifies conditions.  If the device is not
>>> initialized with the x-svq parameter, it will not expose _F_LOG so vhost
>>> sybsystem will block VM migration from its initialization.
>>>
>>> Next patches change this. Net data VQs will be shadowed only at
>>> migration time and vdpa net devices need to expose _F_LOG as long as it
>>> can go to SVQ.
>>>
>>> Since we don't know that at initialization time but at start, add an
>>> independent blocker at CVQ.
>>>
>>> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
>>> ---
>>>    net/vhost-vdpa.c | 35 +++++++++++++++++++++++++++++------
>>>    1 file changed, 29 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
>>> index 631424d9c4..2ca93e850a 100644
>>> --- a/net/vhost-vdpa.c
>>> +++ b/net/vhost-vdpa.c
>>> @@ -26,12 +26,14 @@
>>>    #include <err.h>
>>>    #include "standard-headers/linux/virtio_net.h"
>>>    #include "monitor/monitor.h"
>>> +#include "migration/blocker.h"
>>>    #include "hw/virtio/vhost.h"
>>>
>>>    /* Todo:need to add the multiqueue support here */
>>>    typedef struct VhostVDPAState {
>>>        NetClientState nc;
>>>        struct vhost_vdpa vhost_vdpa;
>>> +    Error *migration_blocker;
>>
>> Any reason we can't use the mivration_blocker in vhost_dev structure?
>>
>> I believe we don't need to wait until start to know we can't migrate.
>>
> Device migratability also depends on features that the guest acks.


This sounds a little bit tricky, more below:


>
> For example, if the device does not support ASID it can be migrated as
> long as _F_CVQ is not acked.


The management may notice a non-consistent behavior in this case. I 
wonder if we can simply check the host features.

Thanks


>
> Thanks!
>
>> Thanks
>>
>>
>>>        VHostNetState *vhost_net;
>>>
>>>        /* Control commands shadow buffers */
>>> @@ -433,9 +435,15 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
>>>                g_strerror(errno), errno);
>>>            return -1;
>>>        }
>>> -    if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) ||
>>> -        !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
>>> -        return 0;
>>> +    if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) {
>>> +        error_setg(&s->migration_blocker,
>>> +                   "vdpa device %s does not support ASID",
>>> +                   nc->name);
>>> +        goto out;
>>> +    }
>>> +    if (!vhost_vdpa_net_valid_svq_features(v->dev->features,
>>> +                                           &s->migration_blocker)) {
>>> +        goto out;
>>>        }
>>>
>>>        /*
>>> @@ -455,7 +463,10 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
>>>            }
>>>
>>>            if (group == cvq_group) {
>>> -            return 0;
>>> +            error_setg(&s->migration_blocker,
>>> +                "vdpa %s vq %d group %"PRId64" is the same as cvq group "
>>> +                "%"PRId64, nc->name, i, group, cvq_group);
>>> +            goto out;
>>>            }
>>>        }
>>>
>>> @@ -468,8 +479,15 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
>>>        s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID;
>>>
>>>    out:
>>> -    if (!s->vhost_vdpa.shadow_vqs_enabled) {
>>> -        return 0;
>>> +    if (s->migration_blocker) {
>>> +        Error *errp = NULL;
>>> +        r = migrate_add_blocker(s->migration_blocker, &errp);
>>> +        if (unlikely(r != 0)) {
>>> +            g_clear_pointer(&s->migration_blocker, error_free);
>>> +            error_report_err(errp);
>>> +        }
>>> +
>>> +        return r;
>>>        }
>>>
>>>        s0 = vhost_vdpa_net_first_nc_vdpa(s);
>>> @@ -513,6 +531,11 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
>>>            vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status);
>>>        }
>>>
>>> +    if (s->migration_blocker) {
>>> +        migrate_del_blocker(s->migration_blocker);
>>> +        g_clear_pointer(&s->migration_blocker, error_free);
>>> +    }
>>> +
>>>        vhost_vdpa_net_client_stop(nc);
>>>    }
>>>
Michael S. Tsirkin Jan. 16, 2023, 5:23 a.m. UTC | #4
On Mon, Jan 16, 2023 at 11:34:20AM +0800, Jason Wang wrote:
> 
> 在 2023/1/13 15:46, Eugenio Perez Martin 写道:
> > On Fri, Jan 13, 2023 at 5:25 AM Jason Wang <jasowang@redhat.com> wrote:
> > > 
> > > 在 2023/1/13 01:24, Eugenio Pérez 写道:
> > > > A vdpa net device must initialize with SVQ in order to be migratable,
> > > > and initialization code verifies conditions.  If the device is not
> > > > initialized with the x-svq parameter, it will not expose _F_LOG so vhost
> > > > sybsystem will block VM migration from its initialization.
> > > > 
> > > > Next patches change this. Net data VQs will be shadowed only at
> > > > migration time and vdpa net devices need to expose _F_LOG as long as it
> > > > can go to SVQ.
> > > > 
> > > > Since we don't know that at initialization time but at start, add an
> > > > independent blocker at CVQ.
> > > > 
> > > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > > > ---
> > > >    net/vhost-vdpa.c | 35 +++++++++++++++++++++++++++++------
> > > >    1 file changed, 29 insertions(+), 6 deletions(-)
> > > > 
> > > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > > > index 631424d9c4..2ca93e850a 100644
> > > > --- a/net/vhost-vdpa.c
> > > > +++ b/net/vhost-vdpa.c
> > > > @@ -26,12 +26,14 @@
> > > >    #include <err.h>
> > > >    #include "standard-headers/linux/virtio_net.h"
> > > >    #include "monitor/monitor.h"
> > > > +#include "migration/blocker.h"
> > > >    #include "hw/virtio/vhost.h"
> > > > 
> > > >    /* Todo:need to add the multiqueue support here */
> > > >    typedef struct VhostVDPAState {
> > > >        NetClientState nc;
> > > >        struct vhost_vdpa vhost_vdpa;
> > > > +    Error *migration_blocker;
> > > 
> > > Any reason we can't use the mivration_blocker in vhost_dev structure?
> > > 
> > > I believe we don't need to wait until start to know we can't migrate.
> > > 
> > Device migratability also depends on features that the guest acks.
> 
> 
> This sounds a little bit tricky, more below:
> 
> 
> > 
> > For example, if the device does not support ASID it can be migrated as
> > long as _F_CVQ is not acked.
> 
> 
> The management may notice a non-consistent behavior in this case. I wonder
> if we can simply check the host features.
> 
> Thanks


Yes the issue is that ack can happen after migration started.
I don't think this kind of blocker appearing during migration
is currently expected/supported well. Is it?

> 
> > 
> > Thanks!
> > 
> > > Thanks
> > > 
> > > 
> > > >        VHostNetState *vhost_net;
> > > > 
> > > >        /* Control commands shadow buffers */
> > > > @@ -433,9 +435,15 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
> > > >                g_strerror(errno), errno);
> > > >            return -1;
> > > >        }
> > > > -    if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) ||
> > > > -        !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
> > > > -        return 0;
> > > > +    if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) {
> > > > +        error_setg(&s->migration_blocker,
> > > > +                   "vdpa device %s does not support ASID",
> > > > +                   nc->name);
> > > > +        goto out;
> > > > +    }
> > > > +    if (!vhost_vdpa_net_valid_svq_features(v->dev->features,
> > > > +                                           &s->migration_blocker)) {
> > > > +        goto out;
> > > >        }
> > > > 
> > > >        /*
> > > > @@ -455,7 +463,10 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
> > > >            }
> > > > 
> > > >            if (group == cvq_group) {
> > > > -            return 0;
> > > > +            error_setg(&s->migration_blocker,
> > > > +                "vdpa %s vq %d group %"PRId64" is the same as cvq group "
> > > > +                "%"PRId64, nc->name, i, group, cvq_group);
> > > > +            goto out;
> > > >            }
> > > >        }
> > > > 
> > > > @@ -468,8 +479,15 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
> > > >        s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID;
> > > > 
> > > >    out:
> > > > -    if (!s->vhost_vdpa.shadow_vqs_enabled) {
> > > > -        return 0;
> > > > +    if (s->migration_blocker) {
> > > > +        Error *errp = NULL;
> > > > +        r = migrate_add_blocker(s->migration_blocker, &errp);
> > > > +        if (unlikely(r != 0)) {
> > > > +            g_clear_pointer(&s->migration_blocker, error_free);
> > > > +            error_report_err(errp);
> > > > +        }
> > > > +
> > > > +        return r;
> > > >        }
> > > > 
> > > >        s0 = vhost_vdpa_net_first_nc_vdpa(s);
> > > > @@ -513,6 +531,11 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
> > > >            vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status);
> > > >        }
> > > > 
> > > > +    if (s->migration_blocker) {
> > > > +        migrate_del_blocker(s->migration_blocker);
> > > > +        g_clear_pointer(&s->migration_blocker, error_free);
> > > > +    }
> > > > +
> > > >        vhost_vdpa_net_client_stop(nc);
> > > >    }
> > > >
Eugenio Perez Martin Jan. 16, 2023, 9:33 a.m. UTC | #5
On Mon, Jan 16, 2023 at 6:24 AM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Mon, Jan 16, 2023 at 11:34:20AM +0800, Jason Wang wrote:
> >
> > 在 2023/1/13 15:46, Eugenio Perez Martin 写道:
> > > On Fri, Jan 13, 2023 at 5:25 AM Jason Wang <jasowang@redhat.com> wrote:
> > > >
> > > > 在 2023/1/13 01:24, Eugenio Pérez 写道:
> > > > > A vdpa net device must initialize with SVQ in order to be migratable,
> > > > > and initialization code verifies conditions.  If the device is not
> > > > > initialized with the x-svq parameter, it will not expose _F_LOG so vhost
> > > > > sybsystem will block VM migration from its initialization.
> > > > >
> > > > > Next patches change this. Net data VQs will be shadowed only at
> > > > > migration time and vdpa net devices need to expose _F_LOG as long as it
> > > > > can go to SVQ.
> > > > >
> > > > > Since we don't know that at initialization time but at start, add an
> > > > > independent blocker at CVQ.
> > > > >
> > > > > Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> > > > > ---
> > > > >    net/vhost-vdpa.c | 35 +++++++++++++++++++++++++++++------
> > > > >    1 file changed, 29 insertions(+), 6 deletions(-)
> > > > >
> > > > > diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
> > > > > index 631424d9c4..2ca93e850a 100644
> > > > > --- a/net/vhost-vdpa.c
> > > > > +++ b/net/vhost-vdpa.c
> > > > > @@ -26,12 +26,14 @@
> > > > >    #include <err.h>
> > > > >    #include "standard-headers/linux/virtio_net.h"
> > > > >    #include "monitor/monitor.h"
> > > > > +#include "migration/blocker.h"
> > > > >    #include "hw/virtio/vhost.h"
> > > > >
> > > > >    /* Todo:need to add the multiqueue support here */
> > > > >    typedef struct VhostVDPAState {
> > > > >        NetClientState nc;
> > > > >        struct vhost_vdpa vhost_vdpa;
> > > > > +    Error *migration_blocker;
> > > >
> > > > Any reason we can't use the mivration_blocker in vhost_dev structure?
> > > >
> > > > I believe we don't need to wait until start to know we can't migrate.
> > > >
> > > Device migratability also depends on features that the guest acks.
> >
> >
> > This sounds a little bit tricky, more below:
> >
> >
> > >
> > > For example, if the device does not support ASID it can be migrated as
> > > long as _F_CVQ is not acked.
> >
> >
> > The management may notice a non-consistent behavior in this case. I wonder
> > if we can simply check the host features.
> >

That's right, and I can see how that can be an issue.

However, the check for the ASID is based on queue indexes at the
moment. If we want to register the blocker at the initialization
moment the only option I see is to do two features ack & reset cycle:
one with MQ and another one without MQ.

Would it be more correct to assume the device will assign the right
ASID only probing one configuration? I don't think so but I'm ok to
leave the code that way if we agree it is more viable.

> > Thanks
>
>
> Yes the issue is that ack can happen after migration started.
> I don't think this kind of blocker appearing during migration
> is currently expected/supported well. Is it?
>

In that case the guest cannot DRIVER_OK the device, because the call
to migrate_add_blocker fails and the error propagates from
vhost_net_start up to the virtio device.

But I can also see how this is inconvenient and to add a migration
blocker at initialization can simplify things here. As long as we
agree on the right way to probe I can send a new version that way for
sure.

Thanks!

> >
> > >
> > > Thanks!
> > >
> > > > Thanks
> > > >
> > > >
> > > > >        VHostNetState *vhost_net;
> > > > >
> > > > >        /* Control commands shadow buffers */
> > > > > @@ -433,9 +435,15 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
> > > > >                g_strerror(errno), errno);
> > > > >            return -1;
> > > > >        }
> > > > > -    if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) ||
> > > > > -        !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
> > > > > -        return 0;
> > > > > +    if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) {
> > > > > +        error_setg(&s->migration_blocker,
> > > > > +                   "vdpa device %s does not support ASID",
> > > > > +                   nc->name);
> > > > > +        goto out;
> > > > > +    }
> > > > > +    if (!vhost_vdpa_net_valid_svq_features(v->dev->features,
> > > > > +                                           &s->migration_blocker)) {
> > > > > +        goto out;
> > > > >        }
> > > > >
> > > > >        /*
> > > > > @@ -455,7 +463,10 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
> > > > >            }
> > > > >
> > > > >            if (group == cvq_group) {
> > > > > -            return 0;
> > > > > +            error_setg(&s->migration_blocker,
> > > > > +                "vdpa %s vq %d group %"PRId64" is the same as cvq group "
> > > > > +                "%"PRId64, nc->name, i, group, cvq_group);
> > > > > +            goto out;
> > > > >            }
> > > > >        }
> > > > >
> > > > > @@ -468,8 +479,15 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
> > > > >        s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID;
> > > > >
> > > > >    out:
> > > > > -    if (!s->vhost_vdpa.shadow_vqs_enabled) {
> > > > > -        return 0;
> > > > > +    if (s->migration_blocker) {
> > > > > +        Error *errp = NULL;
> > > > > +        r = migrate_add_blocker(s->migration_blocker, &errp);
> > > > > +        if (unlikely(r != 0)) {
> > > > > +            g_clear_pointer(&s->migration_blocker, error_free);
> > > > > +            error_report_err(errp);
> > > > > +        }
> > > > > +
> > > > > +        return r;
> > > > >        }
> > > > >
> > > > >        s0 = vhost_vdpa_net_first_nc_vdpa(s);
> > > > > @@ -513,6 +531,11 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
> > > > >            vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status);
> > > > >        }
> > > > >
> > > > > +    if (s->migration_blocker) {
> > > > > +        migrate_del_blocker(s->migration_blocker);
> > > > > +        g_clear_pointer(&s->migration_blocker, error_free);
> > > > > +    }
> > > > > +
> > > > >        vhost_vdpa_net_client_stop(nc);
> > > > >    }
> > > > >
>
Jason Wang Jan. 17, 2023, 5:42 a.m. UTC | #6
在 2023/1/16 17:33, Eugenio Perez Martin 写道:
> On Mon, Jan 16, 2023 at 6:24 AM Michael S. Tsirkin <mst@redhat.com> wrote:
>> On Mon, Jan 16, 2023 at 11:34:20AM +0800, Jason Wang wrote:
>>> 在 2023/1/13 15:46, Eugenio Perez Martin 写道:
>>>> On Fri, Jan 13, 2023 at 5:25 AM Jason Wang <jasowang@redhat.com> wrote:
>>>>> 在 2023/1/13 01:24, Eugenio Pérez 写道:
>>>>>> A vdpa net device must initialize with SVQ in order to be migratable,
>>>>>> and initialization code verifies conditions.  If the device is not
>>>>>> initialized with the x-svq parameter, it will not expose _F_LOG so vhost
>>>>>> sybsystem will block VM migration from its initialization.
>>>>>>
>>>>>> Next patches change this. Net data VQs will be shadowed only at
>>>>>> migration time and vdpa net devices need to expose _F_LOG as long as it
>>>>>> can go to SVQ.
>>>>>>
>>>>>> Since we don't know that at initialization time but at start, add an
>>>>>> independent blocker at CVQ.
>>>>>>
>>>>>> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
>>>>>> ---
>>>>>>     net/vhost-vdpa.c | 35 +++++++++++++++++++++++++++++------
>>>>>>     1 file changed, 29 insertions(+), 6 deletions(-)
>>>>>>
>>>>>> diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
>>>>>> index 631424d9c4..2ca93e850a 100644
>>>>>> --- a/net/vhost-vdpa.c
>>>>>> +++ b/net/vhost-vdpa.c
>>>>>> @@ -26,12 +26,14 @@
>>>>>>     #include <err.h>
>>>>>>     #include "standard-headers/linux/virtio_net.h"
>>>>>>     #include "monitor/monitor.h"
>>>>>> +#include "migration/blocker.h"
>>>>>>     #include "hw/virtio/vhost.h"
>>>>>>
>>>>>>     /* Todo:need to add the multiqueue support here */
>>>>>>     typedef struct VhostVDPAState {
>>>>>>         NetClientState nc;
>>>>>>         struct vhost_vdpa vhost_vdpa;
>>>>>> +    Error *migration_blocker;
>>>>> Any reason we can't use the mivration_blocker in vhost_dev structure?
>>>>>
>>>>> I believe we don't need to wait until start to know we can't migrate.
>>>>>
>>>> Device migratability also depends on features that the guest acks.
>>>
>>> This sounds a little bit tricky, more below:
>>>
>>>
>>>> For example, if the device does not support ASID it can be migrated as
>>>> long as _F_CVQ is not acked.
>>>
>>> The management may notice a non-consistent behavior in this case. I wonder
>>> if we can simply check the host features.
>>>
> That's right, and I can see how that can be an issue.
>
> However, the check for the ASID is based on queue indexes at the
> moment. If we want to register the blocker at the initialization
> moment the only option I see is to do two features ack & reset cycle:
> one with MQ and another one without MQ.


That's should be fine, or any issue you saw?

Thanks


>
> Would it be more correct to assume the device will assign the right
> ASID only probing one configuration? I don't think so but I'm ok to
> leave the code that way if we agree it is more viable.
>
>>> Thanks
>>
>> Yes the issue is that ack can happen after migration started.
>> I don't think this kind of blocker appearing during migration
>> is currently expected/supported well. Is it?
>>
> In that case the guest cannot DRIVER_OK the device, because the call
> to migrate_add_blocker fails and the error propagates from
> vhost_net_start up to the virtio device.
>
> But I can also see how this is inconvenient and to add a migration
> blocker at initialization can simplify things here. As long as we
> agree on the right way to probe I can send a new version that way for
> sure.
>
> Thanks!
>
>>>> Thanks!
>>>>
>>>>> Thanks
>>>>>
>>>>>
>>>>>>         VHostNetState *vhost_net;
>>>>>>
>>>>>>         /* Control commands shadow buffers */
>>>>>> @@ -433,9 +435,15 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
>>>>>>                 g_strerror(errno), errno);
>>>>>>             return -1;
>>>>>>         }
>>>>>> -    if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) ||
>>>>>> -        !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
>>>>>> -        return 0;
>>>>>> +    if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) {
>>>>>> +        error_setg(&s->migration_blocker,
>>>>>> +                   "vdpa device %s does not support ASID",
>>>>>> +                   nc->name);
>>>>>> +        goto out;
>>>>>> +    }
>>>>>> +    if (!vhost_vdpa_net_valid_svq_features(v->dev->features,
>>>>>> +                                           &s->migration_blocker)) {
>>>>>> +        goto out;
>>>>>>         }
>>>>>>
>>>>>>         /*
>>>>>> @@ -455,7 +463,10 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
>>>>>>             }
>>>>>>
>>>>>>             if (group == cvq_group) {
>>>>>> -            return 0;
>>>>>> +            error_setg(&s->migration_blocker,
>>>>>> +                "vdpa %s vq %d group %"PRId64" is the same as cvq group "
>>>>>> +                "%"PRId64, nc->name, i, group, cvq_group);
>>>>>> +            goto out;
>>>>>>             }
>>>>>>         }
>>>>>>
>>>>>> @@ -468,8 +479,15 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc)
>>>>>>         s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID;
>>>>>>
>>>>>>     out:
>>>>>> -    if (!s->vhost_vdpa.shadow_vqs_enabled) {
>>>>>> -        return 0;
>>>>>> +    if (s->migration_blocker) {
>>>>>> +        Error *errp = NULL;
>>>>>> +        r = migrate_add_blocker(s->migration_blocker, &errp);
>>>>>> +        if (unlikely(r != 0)) {
>>>>>> +            g_clear_pointer(&s->migration_blocker, error_free);
>>>>>> +            error_report_err(errp);
>>>>>> +        }
>>>>>> +
>>>>>> +        return r;
>>>>>>         }
>>>>>>
>>>>>>         s0 = vhost_vdpa_net_first_nc_vdpa(s);
>>>>>> @@ -513,6 +531,11 @@ static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
>>>>>>             vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status);
>>>>>>         }
>>>>>>
>>>>>> +    if (s->migration_blocker) {
>>>>>> +        migrate_del_blocker(s->migration_blocker);
>>>>>> +        g_clear_pointer(&s->migration_blocker, error_free);
>>>>>> +    }
>>>>>> +
>>>>>>         vhost_vdpa_net_client_stop(nc);
>>>>>>     }
>>>>>>
diff mbox series

Patch

diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
index 631424d9c4..2ca93e850a 100644
--- a/net/vhost-vdpa.c
+++ b/net/vhost-vdpa.c
@@ -26,12 +26,14 @@ 
 #include <err.h>
 #include "standard-headers/linux/virtio_net.h"
 #include "monitor/monitor.h"
+#include "migration/blocker.h"
 #include "hw/virtio/vhost.h"
 
 /* Todo:need to add the multiqueue support here */
 typedef struct VhostVDPAState {
     NetClientState nc;
     struct vhost_vdpa vhost_vdpa;
+    Error *migration_blocker;
     VHostNetState *vhost_net;
 
     /* Control commands shadow buffers */
@@ -433,9 +435,15 @@  static int vhost_vdpa_net_cvq_start(NetClientState *nc)
             g_strerror(errno), errno);
         return -1;
     }
-    if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) ||
-        !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) {
-        return 0;
+    if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID))) {
+        error_setg(&s->migration_blocker,
+                   "vdpa device %s does not support ASID",
+                   nc->name);
+        goto out;
+    }
+    if (!vhost_vdpa_net_valid_svq_features(v->dev->features,
+                                           &s->migration_blocker)) {
+        goto out;
     }
 
     /*
@@ -455,7 +463,10 @@  static int vhost_vdpa_net_cvq_start(NetClientState *nc)
         }
 
         if (group == cvq_group) {
-            return 0;
+            error_setg(&s->migration_blocker,
+                "vdpa %s vq %d group %"PRId64" is the same as cvq group "
+                "%"PRId64, nc->name, i, group, cvq_group);
+            goto out;
         }
     }
 
@@ -468,8 +479,15 @@  static int vhost_vdpa_net_cvq_start(NetClientState *nc)
     s->vhost_vdpa.address_space_id = VHOST_VDPA_NET_CVQ_ASID;
 
 out:
-    if (!s->vhost_vdpa.shadow_vqs_enabled) {
-        return 0;
+    if (s->migration_blocker) {
+        Error *errp = NULL;
+        r = migrate_add_blocker(s->migration_blocker, &errp);
+        if (unlikely(r != 0)) {
+            g_clear_pointer(&s->migration_blocker, error_free);
+            error_report_err(errp);
+        }
+
+        return r;
     }
 
     s0 = vhost_vdpa_net_first_nc_vdpa(s);
@@ -513,6 +531,11 @@  static void vhost_vdpa_net_cvq_stop(NetClientState *nc)
         vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->status);
     }
 
+    if (s->migration_blocker) {
+        migrate_del_blocker(s->migration_blocker);
+        g_clear_pointer(&s->migration_blocker, error_free);
+    }
+
     vhost_vdpa_net_client_stop(nc);
 }