diff mbox series

[vhost,20/23] vdpa/mlx5: Pre-create hardware VQs at vdpa .dev_add time

Message ID 20240617-stage-vdpa-vq-precreate-v1-20-8c0483f0ca2a@nvidia.com (mailing list archive)
State Superseded
Headers show
Series vdpa/mlx5: Pre-create HW VQs to reduce LM downtime | expand

Commit Message

Dragos Tatulea June 17, 2024, 3:07 p.m. UTC
Currently, hardware VQs are created right when the vdpa device gets into
DRIVER_OK state. That is easier because most of the VQ state is known by
then.

This patch switches to creating all VQs and their associated resources
at device creation time. The motivation is to reduce the vdpa device
live migration downtime by moving the expensive operation of creating
all the hardware VQs and their associated resources out of downtime on
the destination VM.

The VQs are now created in a blank state. The VQ configuration will
happen later, on DRIVER_OK. Then the configuration will be applied when
the VQs are moved to the Ready state.

When .set_vq_ready() is called on a VQ before DRIVER_OK, special care is
needed: now that the VQ is already created a resume_vq() will be
triggered too early when no mr has been configured yet. Skip calling
resume_vq() in this case, let it be handled during DRIVER_OK.

For virtio-vdpa, the device configuration is done earlier during
.vdpa_dev_add() by vdpa_register_device(). Avoid calling
setup_vq_resources() a second time in that case.

Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
---
 drivers/vdpa/mlx5/net/mlx5_vnet.c | 37 ++++++++++++++++++++++++++++++++-----
 1 file changed, 32 insertions(+), 5 deletions(-)

Comments

Eugenio Perez Martin June 19, 2024, 3:54 p.m. UTC | #1
On Mon, Jun 17, 2024 at 5:09 PM Dragos Tatulea <dtatulea@nvidia.com> wrote:
>
> Currently, hardware VQs are created right when the vdpa device gets into
> DRIVER_OK state. That is easier because most of the VQ state is known by
> then.
>
> This patch switches to creating all VQs and their associated resources
> at device creation time. The motivation is to reduce the vdpa device
> live migration downtime by moving the expensive operation of creating
> all the hardware VQs and their associated resources out of downtime on
> the destination VM.
>
> The VQs are now created in a blank state. The VQ configuration will
> happen later, on DRIVER_OK. Then the configuration will be applied when
> the VQs are moved to the Ready state.
>
> When .set_vq_ready() is called on a VQ before DRIVER_OK, special care is
> needed: now that the VQ is already created a resume_vq() will be
> triggered too early when no mr has been configured yet. Skip calling
> resume_vq() in this case, let it be handled during DRIVER_OK.
>
> For virtio-vdpa, the device configuration is done earlier during
> .vdpa_dev_add() by vdpa_register_device(). Avoid calling
> setup_vq_resources() a second time in that case.
>

I guess this happens if virtio_vdpa is already loaded, but I cannot
see how this is different here. Apart from the IOTLB, what else does
it change from the mlx5_vdpa POV?

> Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
> Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
> ---
>  drivers/vdpa/mlx5/net/mlx5_vnet.c | 37 ++++++++++++++++++++++++++++++++-----
>  1 file changed, 32 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index 249b5afbe34a..b2836fd3d1dd 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -2444,7 +2444,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
>         mvq = &ndev->vqs[idx];
>         if (!ready) {
>                 suspend_vq(ndev, mvq);
> -       } else {
> +       } else if (mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) {
>                 if (resume_vq(ndev, mvq))
>                         ready = false;
>         }
> @@ -3078,10 +3078,18 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
>                                 goto err_setup;
>                         }
>                         register_link_notifier(ndev);
> -                       err = setup_vq_resources(ndev, true);
> -                       if (err) {
> -                               mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
> -                               goto err_driver;
> +                       if (ndev->setup) {
> +                               err = resume_vqs(ndev);
> +                               if (err) {
> +                                       mlx5_vdpa_warn(mvdev, "failed to resume VQs\n");
> +                                       goto err_driver;
> +                               }
> +                       } else {
> +                               err = setup_vq_resources(ndev, true);
> +                               if (err) {
> +                                       mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
> +                                       goto err_driver;
> +                               }
>                         }
>                 } else {
>                         mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
> @@ -3142,6 +3150,7 @@ static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags)
>                 if (mlx5_vdpa_create_dma_mr(mvdev))
>                         mlx5_vdpa_warn(mvdev, "create MR failed\n");
>         }
> +       setup_vq_resources(ndev, false);
>         up_write(&ndev->reslock);
>
>         return 0;
> @@ -3836,8 +3845,21 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
>                 goto err_reg;
>
>         mgtdev->ndev = ndev;
> +
> +       /* For virtio-vdpa, the device was set up during device register. */
> +       if (ndev->setup)
> +               return 0;
> +
> +       down_write(&ndev->reslock);
> +       err = setup_vq_resources(ndev, false);
> +       up_write(&ndev->reslock);
> +       if (err)
> +               goto err_setup_vq_res;
> +
>         return 0;
>
> +err_setup_vq_res:
> +       _vdpa_unregister_device(&mvdev->vdev);
>  err_reg:
>         destroy_workqueue(mvdev->wq);
>  err_res2:
> @@ -3863,6 +3885,11 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *
>
>         unregister_link_notifier(ndev);
>         _vdpa_unregister_device(dev);
> +
> +       down_write(&ndev->reslock);
> +       teardown_vq_resources(ndev);
> +       up_write(&ndev->reslock);
> +
>         wq = mvdev->wq;
>         mvdev->wq = NULL;
>         destroy_workqueue(wq);
>
> --
> 2.45.1
>
Dragos Tatulea June 26, 2024, 9:27 a.m. UTC | #2
On Wed, 2024-06-19 at 17:54 +0200, Eugenio Perez Martin wrote:
> On Mon, Jun 17, 2024 at 5:09 PM Dragos Tatulea <dtatulea@nvidia.com> wrote:
> > 
> > Currently, hardware VQs are created right when the vdpa device gets into
> > DRIVER_OK state. That is easier because most of the VQ state is known by
> > then.
> > 
> > This patch switches to creating all VQs and their associated resources
> > at device creation time. The motivation is to reduce the vdpa device
> > live migration downtime by moving the expensive operation of creating
> > all the hardware VQs and their associated resources out of downtime on
> > the destination VM.
> > 
> > The VQs are now created in a blank state. The VQ configuration will
> > happen later, on DRIVER_OK. Then the configuration will be applied when
> > the VQs are moved to the Ready state.
> > 
> > When .set_vq_ready() is called on a VQ before DRIVER_OK, special care is
> > needed: now that the VQ is already created a resume_vq() will be
> > triggered too early when no mr has been configured yet. Skip calling
> > resume_vq() in this case, let it be handled during DRIVER_OK.
> > 
> > For virtio-vdpa, the device configuration is done earlier during
> > .vdpa_dev_add() by vdpa_register_device(). Avoid calling
> > setup_vq_resources() a second time in that case.
> > 
> 
> I guess this happens if virtio_vdpa is already loaded, but I cannot
> see how this is different here. Apart from the IOTLB, what else does
> it change from the mlx5_vdpa POV?
> 
I don't understand your question, could you rephrase or provide more context
please?

Thanks,
Dragos

> > Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
> > Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
> > ---
> >  drivers/vdpa/mlx5/net/mlx5_vnet.c | 37 ++++++++++++++++++++++++++++++++-----
> >  1 file changed, 32 insertions(+), 5 deletions(-)
> > 
> > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > index 249b5afbe34a..b2836fd3d1dd 100644
> > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > @@ -2444,7 +2444,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> >         mvq = &ndev->vqs[idx];
> >         if (!ready) {
> >                 suspend_vq(ndev, mvq);
> > -       } else {
> > +       } else if (mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) {
> >                 if (resume_vq(ndev, mvq))
> >                         ready = false;
> >         }
> > @@ -3078,10 +3078,18 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
> >                                 goto err_setup;
> >                         }
> >                         register_link_notifier(ndev);
> > -                       err = setup_vq_resources(ndev, true);
> > -                       if (err) {
> > -                               mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
> > -                               goto err_driver;
> > +                       if (ndev->setup) {
> > +                               err = resume_vqs(ndev);
> > +                               if (err) {
> > +                                       mlx5_vdpa_warn(mvdev, "failed to resume VQs\n");
> > +                                       goto err_driver;
> > +                               }
> > +                       } else {
> > +                               err = setup_vq_resources(ndev, true);
> > +                               if (err) {
> > +                                       mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
> > +                                       goto err_driver;
> > +                               }
> >                         }
> >                 } else {
> >                         mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
> > @@ -3142,6 +3150,7 @@ static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags)
> >                 if (mlx5_vdpa_create_dma_mr(mvdev))
> >                         mlx5_vdpa_warn(mvdev, "create MR failed\n");
> >         }
> > +       setup_vq_resources(ndev, false);
> >         up_write(&ndev->reslock);
> > 
> >         return 0;
> > @@ -3836,8 +3845,21 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
> >                 goto err_reg;
> > 
> >         mgtdev->ndev = ndev;
> > +
> > +       /* For virtio-vdpa, the device was set up during device register. */
> > +       if (ndev->setup)
> > +               return 0;
> > +
> > +       down_write(&ndev->reslock);
> > +       err = setup_vq_resources(ndev, false);
> > +       up_write(&ndev->reslock);
> > +       if (err)
> > +               goto err_setup_vq_res;
> > +
> >         return 0;
> > 
> > +err_setup_vq_res:
> > +       _vdpa_unregister_device(&mvdev->vdev);
> >  err_reg:
> >         destroy_workqueue(mvdev->wq);
> >  err_res2:
> > @@ -3863,6 +3885,11 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *
> > 
> >         unregister_link_notifier(ndev);
> >         _vdpa_unregister_device(dev);
> > +
> > +       down_write(&ndev->reslock);
> > +       teardown_vq_resources(ndev);
> > +       up_write(&ndev->reslock);
> > +
> >         wq = mvdev->wq;
> >         mvdev->wq = NULL;
> >         destroy_workqueue(wq);
> > 
> > --
> > 2.45.1
> > 
>
Eugenio Perez Martin July 3, 2024, 4:01 p.m. UTC | #3
On Wed, Jun 26, 2024 at 11:27 AM Dragos Tatulea <dtatulea@nvidia.com> wrote:
>
> On Wed, 2024-06-19 at 17:54 +0200, Eugenio Perez Martin wrote:
> > On Mon, Jun 17, 2024 at 5:09 PM Dragos Tatulea <dtatulea@nvidia.com> wrote:
> > >
> > > Currently, hardware VQs are created right when the vdpa device gets into
> > > DRIVER_OK state. That is easier because most of the VQ state is known by
> > > then.
> > >
> > > This patch switches to creating all VQs and their associated resources
> > > at device creation time. The motivation is to reduce the vdpa device
> > > live migration downtime by moving the expensive operation of creating
> > > all the hardware VQs and their associated resources out of downtime on
> > > the destination VM.
> > >
> > > The VQs are now created in a blank state. The VQ configuration will
> > > happen later, on DRIVER_OK. Then the configuration will be applied when
> > > the VQs are moved to the Ready state.
> > >
> > > When .set_vq_ready() is called on a VQ before DRIVER_OK, special care is
> > > needed: now that the VQ is already created a resume_vq() will be
> > > triggered too early when no mr has been configured yet. Skip calling
> > > resume_vq() in this case, let it be handled during DRIVER_OK.
> > >
> > > For virtio-vdpa, the device configuration is done earlier during
> > > .vdpa_dev_add() by vdpa_register_device(). Avoid calling
> > > setup_vq_resources() a second time in that case.
> > >
> >
> > I guess this happens if virtio_vdpa is already loaded, but I cannot
> > see how this is different here. Apart from the IOTLB, what else does
> > it change from the mlx5_vdpa POV?
> >
> I don't understand your question, could you rephrase or provide more context
> please?
>

My main point is that the vdpa parent driver should not be able to
tell the difference between vhost_vdpa and virtio_vdpa. The only
difference I can think of is because of the vhost IOTLB handling.

Do you also observe this behavior if you add the device with "vdpa
add" without the virtio_vdpa module loaded, and then modprobe
virtio_vdpa?

At least the comment should be something in the line of "If we have
all the information to initialize the device, pre-warm it here" or
similar.

> Thanks,
> Dragos
>
> > > Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
> > > Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
> > > ---
> > >  drivers/vdpa/mlx5/net/mlx5_vnet.c | 37 ++++++++++++++++++++++++++++++++-----
> > >  1 file changed, 32 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > index 249b5afbe34a..b2836fd3d1dd 100644
> > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > @@ -2444,7 +2444,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > >         mvq = &ndev->vqs[idx];
> > >         if (!ready) {
> > >                 suspend_vq(ndev, mvq);
> > > -       } else {
> > > +       } else if (mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) {
> > >                 if (resume_vq(ndev, mvq))
> > >                         ready = false;
> > >         }
> > > @@ -3078,10 +3078,18 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
> > >                                 goto err_setup;
> > >                         }
> > >                         register_link_notifier(ndev);
> > > -                       err = setup_vq_resources(ndev, true);
> > > -                       if (err) {
> > > -                               mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
> > > -                               goto err_driver;
> > > +                       if (ndev->setup) {
> > > +                               err = resume_vqs(ndev);
> > > +                               if (err) {
> > > +                                       mlx5_vdpa_warn(mvdev, "failed to resume VQs\n");
> > > +                                       goto err_driver;
> > > +                               }
> > > +                       } else {
> > > +                               err = setup_vq_resources(ndev, true);
> > > +                               if (err) {
> > > +                                       mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
> > > +                                       goto err_driver;
> > > +                               }
> > >                         }
> > >                 } else {
> > >                         mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
> > > @@ -3142,6 +3150,7 @@ static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags)
> > >                 if (mlx5_vdpa_create_dma_mr(mvdev))
> > >                         mlx5_vdpa_warn(mvdev, "create MR failed\n");
> > >         }
> > > +       setup_vq_resources(ndev, false);
> > >         up_write(&ndev->reslock);
> > >
> > >         return 0;
> > > @@ -3836,8 +3845,21 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
> > >                 goto err_reg;
> > >
> > >         mgtdev->ndev = ndev;
> > > +
> > > +       /* For virtio-vdpa, the device was set up during device register. */
> > > +       if (ndev->setup)
> > > +               return 0;
> > > +
> > > +       down_write(&ndev->reslock);
> > > +       err = setup_vq_resources(ndev, false);
> > > +       up_write(&ndev->reslock);
> > > +       if (err)
> > > +               goto err_setup_vq_res;
> > > +
> > >         return 0;
> > >
> > > +err_setup_vq_res:
> > > +       _vdpa_unregister_device(&mvdev->vdev);
> > >  err_reg:
> > >         destroy_workqueue(mvdev->wq);
> > >  err_res2:
> > > @@ -3863,6 +3885,11 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *
> > >
> > >         unregister_link_notifier(ndev);
> > >         _vdpa_unregister_device(dev);
> > > +
> > > +       down_write(&ndev->reslock);
> > > +       teardown_vq_resources(ndev);
> > > +       up_write(&ndev->reslock);
> > > +
> > >         wq = mvdev->wq;
> > >         mvdev->wq = NULL;
> > >         destroy_workqueue(wq);
> > >
> > > --
> > > 2.45.1
> > >
> >
>
Dragos Tatulea July 8, 2024, 11:01 a.m. UTC | #4
On Wed, 2024-07-03 at 18:01 +0200, Eugenio Perez Martin wrote:
> On Wed, Jun 26, 2024 at 11:27 AM Dragos Tatulea <dtatulea@nvidia.com> wrote:
> > 
> > On Wed, 2024-06-19 at 17:54 +0200, Eugenio Perez Martin wrote:
> > > On Mon, Jun 17, 2024 at 5:09 PM Dragos Tatulea <dtatulea@nvidia.com> wrote:
> > > > 
> > > > Currently, hardware VQs are created right when the vdpa device gets into
> > > > DRIVER_OK state. That is easier because most of the VQ state is known by
> > > > then.
> > > > 
> > > > This patch switches to creating all VQs and their associated resources
> > > > at device creation time. The motivation is to reduce the vdpa device
> > > > live migration downtime by moving the expensive operation of creating
> > > > all the hardware VQs and their associated resources out of downtime on
> > > > the destination VM.
> > > > 
> > > > The VQs are now created in a blank state. The VQ configuration will
> > > > happen later, on DRIVER_OK. Then the configuration will be applied when
> > > > the VQs are moved to the Ready state.
> > > > 
> > > > When .set_vq_ready() is called on a VQ before DRIVER_OK, special care is
> > > > needed: now that the VQ is already created a resume_vq() will be
> > > > triggered too early when no mr has been configured yet. Skip calling
> > > > resume_vq() in this case, let it be handled during DRIVER_OK.
> > > > 
> > > > For virtio-vdpa, the device configuration is done earlier during
> > > > .vdpa_dev_add() by vdpa_register_device(). Avoid calling
> > > > setup_vq_resources() a second time in that case.
> > > > 
> > > 
> > > I guess this happens if virtio_vdpa is already loaded, but I cannot
> > > see how this is different here. Apart from the IOTLB, what else does
> > > it change from the mlx5_vdpa POV?
> > > 
> > I don't understand your question, could you rephrase or provide more context
> > please?
> > 
> 
> My main point is that the vdpa parent driver should not be able to
> tell the difference between vhost_vdpa and virtio_vdpa. The only
> difference I can think of is because of the vhost IOTLB handling.
> 
> Do you also observe this behavior if you add the device with "vdpa
> add" without the virtio_vdpa module loaded, and then modprobe
> virtio_vdpa?
> 
Aah, now I understand what you mean. Indeed in my tests I was loading the
virtio_vdpa module before adding the device. When doing it the other way around
the device doesn't get configured during probe.
 

> At least the comment should be something in the line of "If we have
> all the information to initialize the device, pre-warm it here" or
> similar.
Makes sense. I will send a v3 with the commit + comment message update.

> 
> > Thanks,
> > Dragos
> > 
> > > > Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
> > > > Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
> > > > ---
> > > >  drivers/vdpa/mlx5/net/mlx5_vnet.c | 37 ++++++++++++++++++++++++++++++++-----
> > > >  1 file changed, 32 insertions(+), 5 deletions(-)
> > > > 
> > > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > index 249b5afbe34a..b2836fd3d1dd 100644
> > > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > @@ -2444,7 +2444,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > >         mvq = &ndev->vqs[idx];
> > > >         if (!ready) {
> > > >                 suspend_vq(ndev, mvq);
> > > > -       } else {
> > > > +       } else if (mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) {
> > > >                 if (resume_vq(ndev, mvq))
> > > >                         ready = false;
> > > >         }
> > > > @@ -3078,10 +3078,18 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
> > > >                                 goto err_setup;
> > > >                         }
> > > >                         register_link_notifier(ndev);
> > > > -                       err = setup_vq_resources(ndev, true);
> > > > -                       if (err) {
> > > > -                               mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
> > > > -                               goto err_driver;
> > > > +                       if (ndev->setup) {
> > > > +                               err = resume_vqs(ndev);
> > > > +                               if (err) {
> > > > +                                       mlx5_vdpa_warn(mvdev, "failed to resume VQs\n");
> > > > +                                       goto err_driver;
> > > > +                               }
> > > > +                       } else {
> > > > +                               err = setup_vq_resources(ndev, true);
> > > > +                               if (err) {
> > > > +                                       mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
> > > > +                                       goto err_driver;
> > > > +                               }
> > > >                         }
> > > >                 } else {
> > > >                         mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
> > > > @@ -3142,6 +3150,7 @@ static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags)
> > > >                 if (mlx5_vdpa_create_dma_mr(mvdev))
> > > >                         mlx5_vdpa_warn(mvdev, "create MR failed\n");
> > > >         }
> > > > +       setup_vq_resources(ndev, false);
> > > >         up_write(&ndev->reslock);
> > > > 
> > > >         return 0;
> > > > @@ -3836,8 +3845,21 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
> > > >                 goto err_reg;
> > > > 
> > > >         mgtdev->ndev = ndev;
> > > > +
> > > > +       /* For virtio-vdpa, the device was set up during device register. */
> > > > +       if (ndev->setup)
> > > > +               return 0;
> > > > +
> > > > +       down_write(&ndev->reslock);
> > > > +       err = setup_vq_resources(ndev, false);
> > > > +       up_write(&ndev->reslock);
> > > > +       if (err)
> > > > +               goto err_setup_vq_res;
> > > > +
> > > >         return 0;
> > > > 
> > > > +err_setup_vq_res:
> > > > +       _vdpa_unregister_device(&mvdev->vdev);
> > > >  err_reg:
> > > >         destroy_workqueue(mvdev->wq);
> > > >  err_res2:
> > > > @@ -3863,6 +3885,11 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *
> > > > 
> > > >         unregister_link_notifier(ndev);
> > > >         _vdpa_unregister_device(dev);
> > > > +
> > > > +       down_write(&ndev->reslock);
> > > > +       teardown_vq_resources(ndev);
> > > > +       up_write(&ndev->reslock);
> > > > +
> > > >         wq = mvdev->wq;
> > > >         mvdev->wq = NULL;
> > > >         destroy_workqueue(wq);
> > > > 
> > > > --
> > > > 2.45.1
> > > > 
> > > 
> > 
>
Michael S. Tsirkin July 8, 2024, 11:11 a.m. UTC | #5
On Mon, Jul 08, 2024 at 11:01:39AM +0000, Dragos Tatulea wrote:
> On Wed, 2024-07-03 at 18:01 +0200, Eugenio Perez Martin wrote:
> > On Wed, Jun 26, 2024 at 11:27 AM Dragos Tatulea <dtatulea@nvidia.com> wrote:
> > > 
> > > On Wed, 2024-06-19 at 17:54 +0200, Eugenio Perez Martin wrote:
> > > > On Mon, Jun 17, 2024 at 5:09 PM Dragos Tatulea <dtatulea@nvidia.com> wrote:
> > > > > 
> > > > > Currently, hardware VQs are created right when the vdpa device gets into
> > > > > DRIVER_OK state. That is easier because most of the VQ state is known by
> > > > > then.
> > > > > 
> > > > > This patch switches to creating all VQs and their associated resources
> > > > > at device creation time. The motivation is to reduce the vdpa device
> > > > > live migration downtime by moving the expensive operation of creating
> > > > > all the hardware VQs and their associated resources out of downtime on
> > > > > the destination VM.
> > > > > 
> > > > > The VQs are now created in a blank state. The VQ configuration will
> > > > > happen later, on DRIVER_OK. Then the configuration will be applied when
> > > > > the VQs are moved to the Ready state.
> > > > > 
> > > > > When .set_vq_ready() is called on a VQ before DRIVER_OK, special care is
> > > > > needed: now that the VQ is already created a resume_vq() will be
> > > > > triggered too early when no mr has been configured yet. Skip calling
> > > > > resume_vq() in this case, let it be handled during DRIVER_OK.
> > > > > 
> > > > > For virtio-vdpa, the device configuration is done earlier during
> > > > > .vdpa_dev_add() by vdpa_register_device(). Avoid calling
> > > > > setup_vq_resources() a second time in that case.
> > > > > 
> > > > 
> > > > I guess this happens if virtio_vdpa is already loaded, but I cannot
> > > > see how this is different here. Apart from the IOTLB, what else does
> > > > it change from the mlx5_vdpa POV?
> > > > 
> > > I don't understand your question, could you rephrase or provide more context
> > > please?
> > > 
> > 
> > My main point is that the vdpa parent driver should not be able to
> > tell the difference between vhost_vdpa and virtio_vdpa. The only
> > difference I can think of is because of the vhost IOTLB handling.
> > 
> > Do you also observe this behavior if you add the device with "vdpa
> > add" without the virtio_vdpa module loaded, and then modprobe
> > virtio_vdpa?
> > 
> Aah, now I understand what you mean. Indeed in my tests I was loading the
> virtio_vdpa module before adding the device. When doing it the other way around
> the device doesn't get configured during probe.
>  
> 
> > At least the comment should be something in the line of "If we have
> > all the information to initialize the device, pre-warm it here" or
> > similar.
> Makes sense. I will send a v3 with the commit + comment message update.


Is commit update the only change then?

> > 
> > > Thanks,
> > > Dragos
> > > 
> > > > > Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
> > > > > Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
> > > > > ---
> > > > >  drivers/vdpa/mlx5/net/mlx5_vnet.c | 37 ++++++++++++++++++++++++++++++++-----
> > > > >  1 file changed, 32 insertions(+), 5 deletions(-)
> > > > > 
> > > > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > index 249b5afbe34a..b2836fd3d1dd 100644
> > > > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > @@ -2444,7 +2444,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > > >         mvq = &ndev->vqs[idx];
> > > > >         if (!ready) {
> > > > >                 suspend_vq(ndev, mvq);
> > > > > -       } else {
> > > > > +       } else if (mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) {
> > > > >                 if (resume_vq(ndev, mvq))
> > > > >                         ready = false;
> > > > >         }
> > > > > @@ -3078,10 +3078,18 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
> > > > >                                 goto err_setup;
> > > > >                         }
> > > > >                         register_link_notifier(ndev);
> > > > > -                       err = setup_vq_resources(ndev, true);
> > > > > -                       if (err) {
> > > > > -                               mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
> > > > > -                               goto err_driver;
> > > > > +                       if (ndev->setup) {
> > > > > +                               err = resume_vqs(ndev);
> > > > > +                               if (err) {
> > > > > +                                       mlx5_vdpa_warn(mvdev, "failed to resume VQs\n");
> > > > > +                                       goto err_driver;
> > > > > +                               }
> > > > > +                       } else {
> > > > > +                               err = setup_vq_resources(ndev, true);
> > > > > +                               if (err) {
> > > > > +                                       mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
> > > > > +                                       goto err_driver;
> > > > > +                               }
> > > > >                         }
> > > > >                 } else {
> > > > >                         mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
> > > > > @@ -3142,6 +3150,7 @@ static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags)
> > > > >                 if (mlx5_vdpa_create_dma_mr(mvdev))
> > > > >                         mlx5_vdpa_warn(mvdev, "create MR failed\n");
> > > > >         }
> > > > > +       setup_vq_resources(ndev, false);
> > > > >         up_write(&ndev->reslock);
> > > > > 
> > > > >         return 0;
> > > > > @@ -3836,8 +3845,21 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
> > > > >                 goto err_reg;
> > > > > 
> > > > >         mgtdev->ndev = ndev;
> > > > > +
> > > > > +       /* For virtio-vdpa, the device was set up during device register. */
> > > > > +       if (ndev->setup)
> > > > > +               return 0;
> > > > > +
> > > > > +       down_write(&ndev->reslock);
> > > > > +       err = setup_vq_resources(ndev, false);
> > > > > +       up_write(&ndev->reslock);
> > > > > +       if (err)
> > > > > +               goto err_setup_vq_res;
> > > > > +
> > > > >         return 0;
> > > > > 
> > > > > +err_setup_vq_res:
> > > > > +       _vdpa_unregister_device(&mvdev->vdev);
> > > > >  err_reg:
> > > > >         destroy_workqueue(mvdev->wq);
> > > > >  err_res2:
> > > > > @@ -3863,6 +3885,11 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *
> > > > > 
> > > > >         unregister_link_notifier(ndev);
> > > > >         _vdpa_unregister_device(dev);
> > > > > +
> > > > > +       down_write(&ndev->reslock);
> > > > > +       teardown_vq_resources(ndev);
> > > > > +       up_write(&ndev->reslock);
> > > > > +
> > > > >         wq = mvdev->wq;
> > > > >         mvdev->wq = NULL;
> > > > >         destroy_workqueue(wq);
> > > > > 
> > > > > --
> > > > > 2.45.1
> > > > > 
> > > > 
> > > 
> > 
>
Dragos Tatulea July 8, 2024, 11:17 a.m. UTC | #6
On Mon, 2024-07-08 at 07:11 -0400, Michael S. Tsirkin wrote:
> On Mon, Jul 08, 2024 at 11:01:39AM +0000, Dragos Tatulea wrote:
> > On Wed, 2024-07-03 at 18:01 +0200, Eugenio Perez Martin wrote:
> > > On Wed, Jun 26, 2024 at 11:27 AM Dragos Tatulea <dtatulea@nvidia.com> wrote:
> > > > 
> > > > On Wed, 2024-06-19 at 17:54 +0200, Eugenio Perez Martin wrote:
> > > > > On Mon, Jun 17, 2024 at 5:09 PM Dragos Tatulea <dtatulea@nvidia.com> wrote:
> > > > > > 
> > > > > > Currently, hardware VQs are created right when the vdpa device gets into
> > > > > > DRIVER_OK state. That is easier because most of the VQ state is known by
> > > > > > then.
> > > > > > 
> > > > > > This patch switches to creating all VQs and their associated resources
> > > > > > at device creation time. The motivation is to reduce the vdpa device
> > > > > > live migration downtime by moving the expensive operation of creating
> > > > > > all the hardware VQs and their associated resources out of downtime on
> > > > > > the destination VM.
> > > > > > 
> > > > > > The VQs are now created in a blank state. The VQ configuration will
> > > > > > happen later, on DRIVER_OK. Then the configuration will be applied when
> > > > > > the VQs are moved to the Ready state.
> > > > > > 
> > > > > > When .set_vq_ready() is called on a VQ before DRIVER_OK, special care is
> > > > > > needed: now that the VQ is already created a resume_vq() will be
> > > > > > triggered too early when no mr has been configured yet. Skip calling
> > > > > > resume_vq() in this case, let it be handled during DRIVER_OK.
> > > > > > 
> > > > > > For virtio-vdpa, the device configuration is done earlier during
> > > > > > .vdpa_dev_add() by vdpa_register_device(). Avoid calling
> > > > > > setup_vq_resources() a second time in that case.
> > > > > > 
> > > > > 
> > > > > I guess this happens if virtio_vdpa is already loaded, but I cannot
> > > > > see how this is different here. Apart from the IOTLB, what else does
> > > > > it change from the mlx5_vdpa POV?
> > > > > 
> > > > I don't understand your question, could you rephrase or provide more context
> > > > please?
> > > > 
> > > 
> > > My main point is that the vdpa parent driver should not be able to
> > > tell the difference between vhost_vdpa and virtio_vdpa. The only
> > > difference I can think of is because of the vhost IOTLB handling.
> > > 
> > > Do you also observe this behavior if you add the device with "vdpa
> > > add" without the virtio_vdpa module loaded, and then modprobe
> > > virtio_vdpa?
> > > 
> > Aah, now I understand what you mean. Indeed in my tests I was loading the
> > virtio_vdpa module before adding the device. When doing it the other way around
> > the device doesn't get configured during probe.
> >  
> > 
> > > At least the comment should be something in the line of "If we have
> > > all the information to initialize the device, pre-warm it here" or
> > > similar.
> > Makes sense. I will send a v3 with the commit + comment message update.
> 
> 
> Is commit update the only change then?
I was planning to drop the paragraph in the commit message (it is confusing) and
edit the comment below (scroll down to see which).

Let me know if I should send the v3 or not. I have it prepared.

> 
> > > 
> > > > Thanks,
> > > > Dragos
> > > > 
> > > > > > Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
> > > > > > Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
> > > > > > ---
> > > > > >  drivers/vdpa/mlx5/net/mlx5_vnet.c | 37 ++++++++++++++++++++++++++++++++-----
> > > > > >  1 file changed, 32 insertions(+), 5 deletions(-)
> > > > > > 
> > > > > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > index 249b5afbe34a..b2836fd3d1dd 100644
> > > > > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > @@ -2444,7 +2444,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > > > >         mvq = &ndev->vqs[idx];
> > > > > >         if (!ready) {
> > > > > >                 suspend_vq(ndev, mvq);
> > > > > > -       } else {
> > > > > > +       } else if (mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) {
> > > > > >                 if (resume_vq(ndev, mvq))
> > > > > >                         ready = false;
> > > > > >         }
> > > > > > @@ -3078,10 +3078,18 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
> > > > > >                                 goto err_setup;
> > > > > >                         }
> > > > > >                         register_link_notifier(ndev);
> > > > > > -                       err = setup_vq_resources(ndev, true);
> > > > > > -                       if (err) {
> > > > > > -                               mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
> > > > > > -                               goto err_driver;
> > > > > > +                       if (ndev->setup) {
> > > > > > +                               err = resume_vqs(ndev);
> > > > > > +                               if (err) {
> > > > > > +                                       mlx5_vdpa_warn(mvdev, "failed to resume VQs\n");
> > > > > > +                                       goto err_driver;
> > > > > > +                               }
> > > > > > +                       } else {
> > > > > > +                               err = setup_vq_resources(ndev, true);
> > > > > > +                               if (err) {
> > > > > > +                                       mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
> > > > > > +                                       goto err_driver;
> > > > > > +                               }
> > > > > >                         }
> > > > > >                 } else {
> > > > > >                         mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
> > > > > > @@ -3142,6 +3150,7 @@ static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags)
> > > > > >                 if (mlx5_vdpa_create_dma_mr(mvdev))
> > > > > >                         mlx5_vdpa_warn(mvdev, "create MR failed\n");
> > > > > >         }
> > > > > > +       setup_vq_resources(ndev, false);
> > > > > >         up_write(&ndev->reslock);
> > > > > > 
> > > > > >         return 0;
> > > > > > @@ -3836,8 +3845,21 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
> > > > > >                 goto err_reg;
> > > > > > 
> > > > > >         mgtdev->ndev = ndev;
> > > > > > +
> > > > > > +       /* For virtio-vdpa, the device was set up during device register. */
> > > > > > +       if (ndev->setup)
> > > > > > +               return 0;
> > > > > > +
This comment updated to:

/* The VQs might have been pre-created during device register.
 * This happens when virtio_vdpa is loaded before the vdpa device is added.
 */


> > > > > > +       down_write(&ndev->reslock);
> > > > > > +       err = setup_vq_resources(ndev, false);
> > > > > > +       up_write(&ndev->reslock);
> > > > > > +       if (err)
> > > > > > +               goto err_setup_vq_res;
> > > > > > +
> > > > > >         return 0;
> > > > > > 
> > > > > > +err_setup_vq_res:
> > > > > > +       _vdpa_unregister_device(&mvdev->vdev);
> > > > > >  err_reg:
> > > > > >         destroy_workqueue(mvdev->wq);
> > > > > >  err_res2:
> > > > > > @@ -3863,6 +3885,11 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *
> > > > > > 
> > > > > >         unregister_link_notifier(ndev);
> > > > > >         _vdpa_unregister_device(dev);
> > > > > > +
> > > > > > +       down_write(&ndev->reslock);
> > > > > > +       teardown_vq_resources(ndev);
> > > > > > +       up_write(&ndev->reslock);
> > > > > > +
> > > > > >         wq = mvdev->wq;
> > > > > >         mvdev->wq = NULL;
> > > > > >         destroy_workqueue(wq);
> > > > > > 
> > > > > > --
> > > > > > 2.45.1
> > > > > > 
> > > > > 
> > > > 
> > > 
> > 
> 
Thanks,
Dragos
Michael S. Tsirkin July 8, 2024, 11:25 a.m. UTC | #7
On Mon, Jul 08, 2024 at 11:17:06AM +0000, Dragos Tatulea wrote:
> On Mon, 2024-07-08 at 07:11 -0400, Michael S. Tsirkin wrote:
> > On Mon, Jul 08, 2024 at 11:01:39AM +0000, Dragos Tatulea wrote:
> > > On Wed, 2024-07-03 at 18:01 +0200, Eugenio Perez Martin wrote:
> > > > On Wed, Jun 26, 2024 at 11:27 AM Dragos Tatulea <dtatulea@nvidia.com> wrote:
> > > > > 
> > > > > On Wed, 2024-06-19 at 17:54 +0200, Eugenio Perez Martin wrote:
> > > > > > On Mon, Jun 17, 2024 at 5:09 PM Dragos Tatulea <dtatulea@nvidia.com> wrote:
> > > > > > > 
> > > > > > > Currently, hardware VQs are created right when the vdpa device gets into
> > > > > > > DRIVER_OK state. That is easier because most of the VQ state is known by
> > > > > > > then.
> > > > > > > 
> > > > > > > This patch switches to creating all VQs and their associated resources
> > > > > > > at device creation time. The motivation is to reduce the vdpa device
> > > > > > > live migration downtime by moving the expensive operation of creating
> > > > > > > all the hardware VQs and their associated resources out of downtime on
> > > > > > > the destination VM.
> > > > > > > 
> > > > > > > The VQs are now created in a blank state. The VQ configuration will
> > > > > > > happen later, on DRIVER_OK. Then the configuration will be applied when
> > > > > > > the VQs are moved to the Ready state.
> > > > > > > 
> > > > > > > When .set_vq_ready() is called on a VQ before DRIVER_OK, special care is
> > > > > > > needed: now that the VQ is already created a resume_vq() will be
> > > > > > > triggered too early when no mr has been configured yet. Skip calling
> > > > > > > resume_vq() in this case, let it be handled during DRIVER_OK.
> > > > > > > 
> > > > > > > For virtio-vdpa, the device configuration is done earlier during
> > > > > > > .vdpa_dev_add() by vdpa_register_device(). Avoid calling
> > > > > > > setup_vq_resources() a second time in that case.
> > > > > > > 
> > > > > > 
> > > > > > I guess this happens if virtio_vdpa is already loaded, but I cannot
> > > > > > see how this is different here. Apart from the IOTLB, what else does
> > > > > > it change from the mlx5_vdpa POV?
> > > > > > 
> > > > > I don't understand your question, could you rephrase or provide more context
> > > > > please?
> > > > > 
> > > > 
> > > > My main point is that the vdpa parent driver should not be able to
> > > > tell the difference between vhost_vdpa and virtio_vdpa. The only
> > > > difference I can think of is because of the vhost IOTLB handling.
> > > > 
> > > > Do you also observe this behavior if you add the device with "vdpa
> > > > add" without the virtio_vdpa module loaded, and then modprobe
> > > > virtio_vdpa?
> > > > 
> > > Aah, now I understand what you mean. Indeed in my tests I was loading the
> > > virtio_vdpa module before adding the device. When doing it the other way around
> > > the device doesn't get configured during probe.
> > >  
> > > 
> > > > At least the comment should be something in the line of "If we have
> > > > all the information to initialize the device, pre-warm it here" or
> > > > similar.
> > > Makes sense. I will send a v3 with the commit + comment message update.
> > 
> > 
> > Is commit update the only change then?
> I was planning to drop the paragraph in the commit message (it is confusing) and
> edit the comment below (scroll down to see which).
> 
> Let me know if I should send the v3 or not. I have it prepared.

You can do this but pls document that the only change is in commit log.


> > 
> > > > 
> > > > > Thanks,
> > > > > Dragos
> > > > > 
> > > > > > > Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
> > > > > > > Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
> > > > > > > ---
> > > > > > >  drivers/vdpa/mlx5/net/mlx5_vnet.c | 37 ++++++++++++++++++++++++++++++++-----
> > > > > > >  1 file changed, 32 insertions(+), 5 deletions(-)
> > > > > > > 
> > > > > > > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > index 249b5afbe34a..b2836fd3d1dd 100644
> > > > > > > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > > > > > > @@ -2444,7 +2444,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> > > > > > >         mvq = &ndev->vqs[idx];
> > > > > > >         if (!ready) {
> > > > > > >                 suspend_vq(ndev, mvq);
> > > > > > > -       } else {
> > > > > > > +       } else if (mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) {
> > > > > > >                 if (resume_vq(ndev, mvq))
> > > > > > >                         ready = false;
> > > > > > >         }
> > > > > > > @@ -3078,10 +3078,18 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
> > > > > > >                                 goto err_setup;
> > > > > > >                         }
> > > > > > >                         register_link_notifier(ndev);
> > > > > > > -                       err = setup_vq_resources(ndev, true);
> > > > > > > -                       if (err) {
> > > > > > > -                               mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
> > > > > > > -                               goto err_driver;
> > > > > > > +                       if (ndev->setup) {
> > > > > > > +                               err = resume_vqs(ndev);
> > > > > > > +                               if (err) {
> > > > > > > +                                       mlx5_vdpa_warn(mvdev, "failed to resume VQs\n");
> > > > > > > +                                       goto err_driver;
> > > > > > > +                               }
> > > > > > > +                       } else {
> > > > > > > +                               err = setup_vq_resources(ndev, true);
> > > > > > > +                               if (err) {
> > > > > > > +                                       mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
> > > > > > > +                                       goto err_driver;
> > > > > > > +                               }
> > > > > > >                         }
> > > > > > >                 } else {
> > > > > > >                         mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
> > > > > > > @@ -3142,6 +3150,7 @@ static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags)
> > > > > > >                 if (mlx5_vdpa_create_dma_mr(mvdev))
> > > > > > >                         mlx5_vdpa_warn(mvdev, "create MR failed\n");
> > > > > > >         }
> > > > > > > +       setup_vq_resources(ndev, false);
> > > > > > >         up_write(&ndev->reslock);
> > > > > > > 
> > > > > > >         return 0;
> > > > > > > @@ -3836,8 +3845,21 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
> > > > > > >                 goto err_reg;
> > > > > > > 
> > > > > > >         mgtdev->ndev = ndev;
> > > > > > > +
> > > > > > > +       /* For virtio-vdpa, the device was set up during device register. */
> > > > > > > +       if (ndev->setup)
> > > > > > > +               return 0;
> > > > > > > +
> This comment updated to:
> 
> /* The VQs might have been pre-created during device register.
>  * This happens when virtio_vdpa is loaded before the vdpa device is added.
>  */
> 
> 
> > > > > > > +       down_write(&ndev->reslock);
> > > > > > > +       err = setup_vq_resources(ndev, false);
> > > > > > > +       up_write(&ndev->reslock);
> > > > > > > +       if (err)
> > > > > > > +               goto err_setup_vq_res;
> > > > > > > +
> > > > > > >         return 0;
> > > > > > > 
> > > > > > > +err_setup_vq_res:
> > > > > > > +       _vdpa_unregister_device(&mvdev->vdev);
> > > > > > >  err_reg:
> > > > > > >         destroy_workqueue(mvdev->wq);
> > > > > > >  err_res2:
> > > > > > > @@ -3863,6 +3885,11 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *
> > > > > > > 
> > > > > > >         unregister_link_notifier(ndev);
> > > > > > >         _vdpa_unregister_device(dev);
> > > > > > > +
> > > > > > > +       down_write(&ndev->reslock);
> > > > > > > +       teardown_vq_resources(ndev);
> > > > > > > +       up_write(&ndev->reslock);
> > > > > > > +
> > > > > > >         wq = mvdev->wq;
> > > > > > >         mvdev->wq = NULL;
> > > > > > >         destroy_workqueue(wq);
> > > > > > > 
> > > > > > > --
> > > > > > > 2.45.1
> > > > > > > 
> > > > > > 
> > > > > 
> > > > 
> > > 
> > 
> Thanks,
> Dragos
>
Zhu Yanjun July 8, 2024, 4:22 p.m. UTC | #8
在 2024/6/17 17:07, Dragos Tatulea 写道:
> Currently, hardware VQs are created right when the vdpa device gets into
> DRIVER_OK state. That is easier because most of the VQ state is known by
> then.
> 
> This patch switches to creating all VQs and their associated resources
> at device creation time. The motivation is to reduce the vdpa device
> live migration downtime by moving the expensive operation of creating
> all the hardware VQs and their associated resources out of downtime on
> the destination VM.

Hi, Dragos Tatulea

 From the above, when a device is created, all the VQs and their 
associated resources are also created.
If VM live migration does not occur, how much resources are wasted?

I mean, to achieve a better downtime, how much resource are used?

"
On a 64 CPU, 256 GB VM with 1 vDPA device of 16 VQps, the full VQ
resource creation + resume time was ~370ms. Now it's down to 60 ms
(only VQ config and resume). The measurements were done on a ConnectX6DX
based vDPA device.
"
 From the above, the performance is amazing.
If we expect to use it in the production hosts, how much resources 
should we prepare to achieve this downtime?

Zhu Yanjun

> 
> The VQs are now created in a blank state. The VQ configuration will
> happen later, on DRIVER_OK. Then the configuration will be applied when
> the VQs are moved to the Ready state.
> 
> When .set_vq_ready() is called on a VQ before DRIVER_OK, special care is
> needed: now that the VQ is already created a resume_vq() will be
> triggered too early when no mr has been configured yet. Skip calling
> resume_vq() in this case, let it be handled during DRIVER_OK.
> 
> For virtio-vdpa, the device configuration is done earlier during
> .vdpa_dev_add() by vdpa_register_device(). Avoid calling
> setup_vq_resources() a second time in that case.
> 
> Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
> Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
> ---
>   drivers/vdpa/mlx5/net/mlx5_vnet.c | 37 ++++++++++++++++++++++++++++++++-----
>   1 file changed, 32 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> index 249b5afbe34a..b2836fd3d1dd 100644
> --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> @@ -2444,7 +2444,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
>   	mvq = &ndev->vqs[idx];
>   	if (!ready) {
>   		suspend_vq(ndev, mvq);
> -	} else {
> +	} else if (mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) {
>   		if (resume_vq(ndev, mvq))
>   			ready = false;
>   	}
> @@ -3078,10 +3078,18 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
>   				goto err_setup;
>   			}
>   			register_link_notifier(ndev);
> -			err = setup_vq_resources(ndev, true);
> -			if (err) {
> -				mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
> -				goto err_driver;
> +			if (ndev->setup) {
> +				err = resume_vqs(ndev);
> +				if (err) {
> +					mlx5_vdpa_warn(mvdev, "failed to resume VQs\n");
> +					goto err_driver;
> +				}
> +			} else {
> +				err = setup_vq_resources(ndev, true);
> +				if (err) {
> +					mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
> +					goto err_driver;
> +				}
>   			}
>   		} else {
>   			mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
> @@ -3142,6 +3150,7 @@ static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags)
>   		if (mlx5_vdpa_create_dma_mr(mvdev))
>   			mlx5_vdpa_warn(mvdev, "create MR failed\n");
>   	}
> +	setup_vq_resources(ndev, false);
>   	up_write(&ndev->reslock);
>   
>   	return 0;
> @@ -3836,8 +3845,21 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
>   		goto err_reg;
>   
>   	mgtdev->ndev = ndev;
> +
> +	/* For virtio-vdpa, the device was set up during device register. */
> +	if (ndev->setup)
> +		return 0;
> +
> +	down_write(&ndev->reslock);
> +	err = setup_vq_resources(ndev, false);
> +	up_write(&ndev->reslock);
> +	if (err)
> +		goto err_setup_vq_res;
> +
>   	return 0;
>   
> +err_setup_vq_res:
> +	_vdpa_unregister_device(&mvdev->vdev);
>   err_reg:
>   	destroy_workqueue(mvdev->wq);
>   err_res2:
> @@ -3863,6 +3885,11 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *
>   
>   	unregister_link_notifier(ndev);
>   	_vdpa_unregister_device(dev);
> +
> +	down_write(&ndev->reslock);
> +	teardown_vq_resources(ndev);
> +	up_write(&ndev->reslock);
> +
>   	wq = mvdev->wq;
>   	mvdev->wq = NULL;
>   	destroy_workqueue(wq);
>
Dragos Tatulea July 8, 2024, 4:43 p.m. UTC | #9
Hi Zhu Yanjun,

On Mon, 2024-07-08 at 18:22 +0200, Zhu Yanjun wrote:
> 在 2024/6/17 17:07, Dragos Tatulea 写道:
> > Currently, hardware VQs are created right when the vdpa device gets into
> > DRIVER_OK state. That is easier because most of the VQ state is known by
> > then.
> > 
> > This patch switches to creating all VQs and their associated resources
> > at device creation time. The motivation is to reduce the vdpa device
> > live migration downtime by moving the expensive operation of creating
> > all the hardware VQs and their associated resources out of downtime on
> > the destination VM.
> 
> Hi, Dragos Tatulea
> 
>  From the above, when a device is created, all the VQs and their 
> associated resources are also created.
> If VM live migration does not occur, how much resources are wasted?
> 
> I mean, to achieve a better downtime, how much resource are used?
> 
When you use the vdpa device there are no resources wasted. The HW VQs that were
previously created at VM boot (during DRIVER_OK state) are now created at vdpa
device add time.

The trade-off here is that if you configure different VQ sizes then you will pay
the price of re-creating the VQs.

This could be mitigated by adding a default VQ size parameter that is setable
via the vdpa tool. But this part is not implemented in this series.

Ah, one more thing to keep in mind: the MSIX interrupts will be now allocated at
vdpa device creation time instead of VM startup.

> "
> On a 64 CPU, 256 GB VM with 1 vDPA device of 16 VQps, the full VQ
> resource creation + resume time was ~370ms. Now it's down to 60 ms
> (only VQ config and resume). The measurements were done on a ConnectX6DX
> based vDPA device.
> "
>  From the above, the performance is amazing.
> If we expect to use it in the production hosts, how much resources 
> should we prepare to achieve this downtime?
> 

You do need to have the latest FW (22.41.1000) to be able to get the full
benefit of the optimization.

Thanks,
Dragos
> Zhu Yanjun
> 
> > 
> > The VQs are now created in a blank state. The VQ configuration will
> > happen later, on DRIVER_OK. Then the configuration will be applied when
> > the VQs are moved to the Ready state.
> > 
> > When .set_vq_ready() is called on a VQ before DRIVER_OK, special care is
> > needed: now that the VQ is already created a resume_vq() will be
> > triggered too early when no mr has been configured yet. Skip calling
> > resume_vq() in this case, let it be handled during DRIVER_OK.
> > 
> > For virtio-vdpa, the device configuration is done earlier during
> > .vdpa_dev_add() by vdpa_register_device(). Avoid calling
> > setup_vq_resources() a second time in that case.
> > 
> > Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
> > Reviewed-by: Cosmin Ratiu <cratiu@nvidia.com>
> > ---
> >   drivers/vdpa/mlx5/net/mlx5_vnet.c | 37 ++++++++++++++++++++++++++++++++-----
> >   1 file changed, 32 insertions(+), 5 deletions(-)
> > 
> > diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > index 249b5afbe34a..b2836fd3d1dd 100644
> > --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
> > @@ -2444,7 +2444,7 @@ static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
> >   	mvq = &ndev->vqs[idx];
> >   	if (!ready) {
> >   		suspend_vq(ndev, mvq);
> > -	} else {
> > +	} else if (mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) {
> >   		if (resume_vq(ndev, mvq))
> >   			ready = false;
> >   	}
> > @@ -3078,10 +3078,18 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
> >   				goto err_setup;
> >   			}
> >   			register_link_notifier(ndev);
> > -			err = setup_vq_resources(ndev, true);
> > -			if (err) {
> > -				mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
> > -				goto err_driver;
> > +			if (ndev->setup) {
> > +				err = resume_vqs(ndev);
> > +				if (err) {
> > +					mlx5_vdpa_warn(mvdev, "failed to resume VQs\n");
> > +					goto err_driver;
> > +				}
> > +			} else {
> > +				err = setup_vq_resources(ndev, true);
> > +				if (err) {
> > +					mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
> > +					goto err_driver;
> > +				}
> >   			}
> >   		} else {
> >   			mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
> > @@ -3142,6 +3150,7 @@ static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags)
> >   		if (mlx5_vdpa_create_dma_mr(mvdev))
> >   			mlx5_vdpa_warn(mvdev, "create MR failed\n");
> >   	}
> > +	setup_vq_resources(ndev, false);
> >   	up_write(&ndev->reslock);
> >   
> >   	return 0;
> > @@ -3836,8 +3845,21 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
> >   		goto err_reg;
> >   
> >   	mgtdev->ndev = ndev;
> > +
> > +	/* For virtio-vdpa, the device was set up during device register. */
> > +	if (ndev->setup)
> > +		return 0;
> > +
> > +	down_write(&ndev->reslock);
> > +	err = setup_vq_resources(ndev, false);
> > +	up_write(&ndev->reslock);
> > +	if (err)
> > +		goto err_setup_vq_res;
> > +
> >   	return 0;
> >   
> > +err_setup_vq_res:
> > +	_vdpa_unregister_device(&mvdev->vdev);
> >   err_reg:
> >   	destroy_workqueue(mvdev->wq);
> >   err_res2:
> > @@ -3863,6 +3885,11 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *
> >   
> >   	unregister_link_notifier(ndev);
> >   	_vdpa_unregister_device(dev);
> > +
> > +	down_write(&ndev->reslock);
> > +	teardown_vq_resources(ndev);
> > +	up_write(&ndev->reslock);
> > +
> >   	wq = mvdev->wq;
> >   	mvdev->wq = NULL;
> >   	destroy_workqueue(wq);
> > 
>
diff mbox series

Patch

diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 249b5afbe34a..b2836fd3d1dd 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -2444,7 +2444,7 @@  static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready
 	mvq = &ndev->vqs[idx];
 	if (!ready) {
 		suspend_vq(ndev, mvq);
-	} else {
+	} else if (mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) {
 		if (resume_vq(ndev, mvq))
 			ready = false;
 	}
@@ -3078,10 +3078,18 @@  static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
 				goto err_setup;
 			}
 			register_link_notifier(ndev);
-			err = setup_vq_resources(ndev, true);
-			if (err) {
-				mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
-				goto err_driver;
+			if (ndev->setup) {
+				err = resume_vqs(ndev);
+				if (err) {
+					mlx5_vdpa_warn(mvdev, "failed to resume VQs\n");
+					goto err_driver;
+				}
+			} else {
+				err = setup_vq_resources(ndev, true);
+				if (err) {
+					mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
+					goto err_driver;
+				}
 			}
 		} else {
 			mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
@@ -3142,6 +3150,7 @@  static int mlx5_vdpa_compat_reset(struct vdpa_device *vdev, u32 flags)
 		if (mlx5_vdpa_create_dma_mr(mvdev))
 			mlx5_vdpa_warn(mvdev, "create MR failed\n");
 	}
+	setup_vq_resources(ndev, false);
 	up_write(&ndev->reslock);
 
 	return 0;
@@ -3836,8 +3845,21 @@  static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
 		goto err_reg;
 
 	mgtdev->ndev = ndev;
+
+	/* For virtio-vdpa, the device was set up during device register. */
+	if (ndev->setup)
+		return 0;
+
+	down_write(&ndev->reslock);
+	err = setup_vq_resources(ndev, false);
+	up_write(&ndev->reslock);
+	if (err)
+		goto err_setup_vq_res;
+
 	return 0;
 
+err_setup_vq_res:
+	_vdpa_unregister_device(&mvdev->vdev);
 err_reg:
 	destroy_workqueue(mvdev->wq);
 err_res2:
@@ -3863,6 +3885,11 @@  static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *
 
 	unregister_link_notifier(ndev);
 	_vdpa_unregister_device(dev);
+
+	down_write(&ndev->reslock);
+	teardown_vq_resources(ndev);
+	up_write(&ndev->reslock);
+
 	wq = mvdev->wq;
 	mvdev->wq = NULL;
 	destroy_workqueue(wq);