diff mbox series

[RFC] vhost_vdpa: assign irq bypass producer token correctly

Message ID 20240808082044.11356-1-jasowang@redhat.com (mailing list archive)
State New, archived
Headers show
Series [RFC] vhost_vdpa: assign irq bypass producer token correctly | expand

Commit Message

Jason Wang Aug. 8, 2024, 8:20 a.m. UTC
We used to call irq_bypass_unregister_producer() in
vhost_vdpa_setup_vq_irq() which is problematic as we don't know if the
token pointer is still valid or not.

Actually, we use the eventfd_ctx as the token so the life cycle of the
token should be bound to the VHOST_SET_VRING_CALL instead of
vhost_vdpa_setup_vq_irq() which could be called by set_status().

Fixing this by setting up  irq bypass producer's token when handling
VHOST_SET_VRING_CALL and un-registering the producer before calling
vhost_vring_ioctl() to prevent a possible use after free as eventfd
could have been released in vhost_vring_ioctl().

Fixes: 2cf1ba9a4d15 ("vhost_vdpa: implement IRQ offloading in vhost_vdpa")
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
Note for Dragos: Please check whether this fixes your issue. I
slightly test it with vp_vdpa in L2.
---
 drivers/vhost/vdpa.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

Comments

Dragos Tatulea Aug. 8, 2024, 6:04 p.m. UTC | #1
On 08.08.24 10:20, Jason Wang wrote:
> We used to call irq_bypass_unregister_producer() in
> vhost_vdpa_setup_vq_irq() which is problematic as we don't know if the
> token pointer is still valid or not.
> 
> Actually, we use the eventfd_ctx as the token so the life cycle of the
> token should be bound to the VHOST_SET_VRING_CALL instead of
> vhost_vdpa_setup_vq_irq() which could be called by set_status().
> 
> Fixing this by setting up  irq bypass producer's token when handling
> VHOST_SET_VRING_CALL and un-registering the producer before calling
> vhost_vring_ioctl() to prevent a possible use after free as eventfd
> could have been released in vhost_vring_ioctl().
> 
> Fixes: 2cf1ba9a4d15 ("vhost_vdpa: implement IRQ offloading in vhost_vdpa")
> Signed-off-by: Jason Wang <jasowang@redhat.com>
> ---
> Note for Dragos: Please check whether this fixes your issue. I
> slightly test it with vp_vdpa in L2.
> ---
>  drivers/vhost/vdpa.c | 12 +++++++++---
>  1 file changed, 9 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> index e31ec9ebc4ce..388226a48bcc 100644
> --- a/drivers/vhost/vdpa.c
> +++ b/drivers/vhost/vdpa.c
> @@ -209,11 +209,9 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
>  	if (irq < 0)
>  		return;
>  
> -	irq_bypass_unregister_producer(&vq->call_ctx.producer);
>  	if (!vq->call_ctx.ctx)
>  		return;
>  
> -	vq->call_ctx.producer.token = vq->call_ctx.ctx;
>  	vq->call_ctx.producer.irq = irq;
>  	ret = irq_bypass_register_producer(&vq->call_ctx.producer);
>  	if (unlikely(ret))
> @@ -709,6 +707,12 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
>  			vq->last_avail_idx = vq_state.split.avail_index;
>  		}
>  		break;
> +	case VHOST_SET_VRING_CALL:
> +		if (vq->call_ctx.ctx) {
> +			vhost_vdpa_unsetup_vq_irq(v, idx);
> +			vq->call_ctx.producer.token = NULL;
> +		}
> +		break;
>  	}
>  
>  	r = vhost_vring_ioctl(&v->vdev, cmd, argp);
> @@ -747,13 +751,14 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
>  			cb.callback = vhost_vdpa_virtqueue_cb;
>  			cb.private = vq;
>  			cb.trigger = vq->call_ctx.ctx;
> +			vq->call_ctx.producer.token = vq->call_ctx.ctx;
> +			vhost_vdpa_setup_vq_irq(v, idx);
>  		} else {
>  			cb.callback = NULL;
>  			cb.private = NULL;
>  			cb.trigger = NULL;
>  		}
>  		ops->set_vq_cb(vdpa, idx, &cb);
> -		vhost_vdpa_setup_vq_irq(v, idx);
>  		break;
>  
>  	case VHOST_SET_VRING_NUM:
> @@ -1419,6 +1424,7 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep)
>  	for (i = 0; i < nvqs; i++) {
>  		vqs[i] = &v->vqs[i];
>  		vqs[i]->handle_kick = handle_vq_kick;
> +		vqs[i]->call_ctx.ctx = NULL;
>  	}
>  	vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false,
>  		       vhost_vdpa_process_iotlb_msg);

No more crashes, but now getting a lot of:
 vhost-vdpa-X: vq Y, irq bypass producer (token 00000000a66e28ab) registration fails, ret =  -16

... seems like the irq_bypass_unregister_producer() that was removed
might still be needed somewhere?

Thanks,
Dragos
Jason Wang Aug. 12, 2024, 5:47 a.m. UTC | #2
On Fri, Aug 9, 2024 at 2:04 AM Dragos Tatulea <dtatulea@nvidia.com> wrote:
>
>
>
> On 08.08.24 10:20, Jason Wang wrote:
> > We used to call irq_bypass_unregister_producer() in
> > vhost_vdpa_setup_vq_irq() which is problematic as we don't know if the
> > token pointer is still valid or not.
> >
> > Actually, we use the eventfd_ctx as the token so the life cycle of the
> > token should be bound to the VHOST_SET_VRING_CALL instead of
> > vhost_vdpa_setup_vq_irq() which could be called by set_status().
> >
> > Fixing this by setting up  irq bypass producer's token when handling
> > VHOST_SET_VRING_CALL and un-registering the producer before calling
> > vhost_vring_ioctl() to prevent a possible use after free as eventfd
> > could have been released in vhost_vring_ioctl().
> >
> > Fixes: 2cf1ba9a4d15 ("vhost_vdpa: implement IRQ offloading in vhost_vdpa")
> > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > ---
> > Note for Dragos: Please check whether this fixes your issue. I
> > slightly test it with vp_vdpa in L2.
> > ---
> >  drivers/vhost/vdpa.c | 12 +++++++++---
> >  1 file changed, 9 insertions(+), 3 deletions(-)
> >
> > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> > index e31ec9ebc4ce..388226a48bcc 100644
> > --- a/drivers/vhost/vdpa.c
> > +++ b/drivers/vhost/vdpa.c
> > @@ -209,11 +209,9 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
> >       if (irq < 0)
> >               return;
> >
> > -     irq_bypass_unregister_producer(&vq->call_ctx.producer);
> >       if (!vq->call_ctx.ctx)
> >               return;
> >
> > -     vq->call_ctx.producer.token = vq->call_ctx.ctx;
> >       vq->call_ctx.producer.irq = irq;
> >       ret = irq_bypass_register_producer(&vq->call_ctx.producer);
> >       if (unlikely(ret))
> > @@ -709,6 +707,12 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
> >                       vq->last_avail_idx = vq_state.split.avail_index;
> >               }
> >               break;
> > +     case VHOST_SET_VRING_CALL:
> > +             if (vq->call_ctx.ctx) {
> > +                     vhost_vdpa_unsetup_vq_irq(v, idx);
> > +                     vq->call_ctx.producer.token = NULL;
> > +             }
> > +             break;
> >       }
> >
> >       r = vhost_vring_ioctl(&v->vdev, cmd, argp);
> > @@ -747,13 +751,14 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
> >                       cb.callback = vhost_vdpa_virtqueue_cb;
> >                       cb.private = vq;
> >                       cb.trigger = vq->call_ctx.ctx;
> > +                     vq->call_ctx.producer.token = vq->call_ctx.ctx;
> > +                     vhost_vdpa_setup_vq_irq(v, idx);
> >               } else {
> >                       cb.callback = NULL;
> >                       cb.private = NULL;
> >                       cb.trigger = NULL;
> >               }
> >               ops->set_vq_cb(vdpa, idx, &cb);
> > -             vhost_vdpa_setup_vq_irq(v, idx);
> >               break;
> >
> >       case VHOST_SET_VRING_NUM:
> > @@ -1419,6 +1424,7 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep)
> >       for (i = 0; i < nvqs; i++) {
> >               vqs[i] = &v->vqs[i];
> >               vqs[i]->handle_kick = handle_vq_kick;
> > +             vqs[i]->call_ctx.ctx = NULL;
> >       }
> >       vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false,
> >                      vhost_vdpa_process_iotlb_msg);
>
> No more crashes, but now getting a lot of:
>  vhost-vdpa-X: vq Y, irq bypass producer (token 00000000a66e28ab) registration fails, ret =  -16
>
> ... seems like the irq_bypass_unregister_producer() that was removed
> might still be needed somewhere?

Probably, but I didn't see this when testing vp_vdpa.

When did you meet those warnings? Is it during the boot or migration?

Thanks

>
> Thanks,
> Dragos
>
>
Jason Wang Aug. 12, 2024, 6:49 a.m. UTC | #3
On Mon, Aug 12, 2024 at 1:47 PM Jason Wang <jasowang@redhat.com> wrote:
>
> On Fri, Aug 9, 2024 at 2:04 AM Dragos Tatulea <dtatulea@nvidia.com> wrote:
> >
> >
> >
> > On 08.08.24 10:20, Jason Wang wrote:
> > > We used to call irq_bypass_unregister_producer() in
> > > vhost_vdpa_setup_vq_irq() which is problematic as we don't know if the
> > > token pointer is still valid or not.
> > >
> > > Actually, we use the eventfd_ctx as the token so the life cycle of the
> > > token should be bound to the VHOST_SET_VRING_CALL instead of
> > > vhost_vdpa_setup_vq_irq() which could be called by set_status().
> > >
> > > Fixing this by setting up  irq bypass producer's token when handling
> > > VHOST_SET_VRING_CALL and un-registering the producer before calling
> > > vhost_vring_ioctl() to prevent a possible use after free as eventfd
> > > could have been released in vhost_vring_ioctl().
> > >
> > > Fixes: 2cf1ba9a4d15 ("vhost_vdpa: implement IRQ offloading in vhost_vdpa")
> > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > ---
> > > Note for Dragos: Please check whether this fixes your issue. I
> > > slightly test it with vp_vdpa in L2.
> > > ---
> > >  drivers/vhost/vdpa.c | 12 +++++++++---
> > >  1 file changed, 9 insertions(+), 3 deletions(-)
> > >
> > > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> > > index e31ec9ebc4ce..388226a48bcc 100644
> > > --- a/drivers/vhost/vdpa.c
> > > +++ b/drivers/vhost/vdpa.c
> > > @@ -209,11 +209,9 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
> > >       if (irq < 0)
> > >               return;
> > >
> > > -     irq_bypass_unregister_producer(&vq->call_ctx.producer);
> > >       if (!vq->call_ctx.ctx)
> > >               return;
> > >
> > > -     vq->call_ctx.producer.token = vq->call_ctx.ctx;
> > >       vq->call_ctx.producer.irq = irq;
> > >       ret = irq_bypass_register_producer(&vq->call_ctx.producer);
> > >       if (unlikely(ret))
> > > @@ -709,6 +707,12 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
> > >                       vq->last_avail_idx = vq_state.split.avail_index;
> > >               }
> > >               break;
> > > +     case VHOST_SET_VRING_CALL:
> > > +             if (vq->call_ctx.ctx) {
> > > +                     vhost_vdpa_unsetup_vq_irq(v, idx);
> > > +                     vq->call_ctx.producer.token = NULL;
> > > +             }
> > > +             break;
> > >       }
> > >
> > >       r = vhost_vring_ioctl(&v->vdev, cmd, argp);
> > > @@ -747,13 +751,14 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
> > >                       cb.callback = vhost_vdpa_virtqueue_cb;
> > >                       cb.private = vq;
> > >                       cb.trigger = vq->call_ctx.ctx;
> > > +                     vq->call_ctx.producer.token = vq->call_ctx.ctx;
> > > +                     vhost_vdpa_setup_vq_irq(v, idx);
> > >               } else {
> > >                       cb.callback = NULL;
> > >                       cb.private = NULL;
> > >                       cb.trigger = NULL;
> > >               }
> > >               ops->set_vq_cb(vdpa, idx, &cb);
> > > -             vhost_vdpa_setup_vq_irq(v, idx);
> > >               break;
> > >
> > >       case VHOST_SET_VRING_NUM:
> > > @@ -1419,6 +1424,7 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep)
> > >       for (i = 0; i < nvqs; i++) {
> > >               vqs[i] = &v->vqs[i];
> > >               vqs[i]->handle_kick = handle_vq_kick;
> > > +             vqs[i]->call_ctx.ctx = NULL;
> > >       }
> > >       vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false,
> > >                      vhost_vdpa_process_iotlb_msg);
> >
> > No more crashes, but now getting a lot of:
> >  vhost-vdpa-X: vq Y, irq bypass producer (token 00000000a66e28ab) registration fails, ret =  -16
> >
> > ... seems like the irq_bypass_unregister_producer() that was removed
> > might still be needed somewhere?
>
> Probably, but I didn't see this when testing vp_vdpa.
>
> When did you meet those warnings? Is it during the boot or migration?

Btw, it would be helpful to check if mlx5_get_vq_irq() works
correctly. I believe it should return an error if the virtqueue
interrupt is not allocated. After a glance at the code, it seems not
straightforward to me.

Thanks

>
> Thanks
>
> >
> > Thanks,
> > Dragos
> >
> >
Dragos Tatulea Aug. 12, 2024, 11:21 a.m. UTC | #4
On 12.08.24 08:49, Jason Wang wrote:
> On Mon, Aug 12, 2024 at 1:47 PM Jason Wang <jasowang@redhat.com> wrote:
>>
>> On Fri, Aug 9, 2024 at 2:04 AM Dragos Tatulea <dtatulea@nvidia.com> wrote:
>>>
>>>
>>>
>>> On 08.08.24 10:20, Jason Wang wrote:
>>>> We used to call irq_bypass_unregister_producer() in
>>>> vhost_vdpa_setup_vq_irq() which is problematic as we don't know if the
>>>> token pointer is still valid or not.
>>>>
>>>> Actually, we use the eventfd_ctx as the token so the life cycle of the
>>>> token should be bound to the VHOST_SET_VRING_CALL instead of
>>>> vhost_vdpa_setup_vq_irq() which could be called by set_status().
>>>>
>>>> Fixing this by setting up  irq bypass producer's token when handling
>>>> VHOST_SET_VRING_CALL and un-registering the producer before calling
>>>> vhost_vring_ioctl() to prevent a possible use after free as eventfd
>>>> could have been released in vhost_vring_ioctl().
>>>>
>>>> Fixes: 2cf1ba9a4d15 ("vhost_vdpa: implement IRQ offloading in vhost_vdpa")
>>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>>>> ---
>>>> Note for Dragos: Please check whether this fixes your issue. I
>>>> slightly test it with vp_vdpa in L2.
>>>> ---
>>>>  drivers/vhost/vdpa.c | 12 +++++++++---
>>>>  1 file changed, 9 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
>>>> index e31ec9ebc4ce..388226a48bcc 100644
>>>> --- a/drivers/vhost/vdpa.c
>>>> +++ b/drivers/vhost/vdpa.c
>>>> @@ -209,11 +209,9 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
>>>>       if (irq < 0)
>>>>               return;
>>>>
>>>> -     irq_bypass_unregister_producer(&vq->call_ctx.producer);
>>>>       if (!vq->call_ctx.ctx)
>>>>               return;
>>>>
>>>> -     vq->call_ctx.producer.token = vq->call_ctx.ctx;
>>>>       vq->call_ctx.producer.irq = irq;
>>>>       ret = irq_bypass_register_producer(&vq->call_ctx.producer);
>>>>       if (unlikely(ret))
>>>> @@ -709,6 +707,12 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
>>>>                       vq->last_avail_idx = vq_state.split.avail_index;
>>>>               }
>>>>               break;
>>>> +     case VHOST_SET_VRING_CALL:
>>>> +             if (vq->call_ctx.ctx) {
>>>> +                     vhost_vdpa_unsetup_vq_irq(v, idx);
>>>> +                     vq->call_ctx.producer.token = NULL;
>>>> +             }
>>>> +             break;
>>>>       }
>>>>
>>>>       r = vhost_vring_ioctl(&v->vdev, cmd, argp);
>>>> @@ -747,13 +751,14 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
>>>>                       cb.callback = vhost_vdpa_virtqueue_cb;
>>>>                       cb.private = vq;
>>>>                       cb.trigger = vq->call_ctx.ctx;
>>>> +                     vq->call_ctx.producer.token = vq->call_ctx.ctx;
>>>> +                     vhost_vdpa_setup_vq_irq(v, idx);
>>>>               } else {
>>>>                       cb.callback = NULL;
>>>>                       cb.private = NULL;
>>>>                       cb.trigger = NULL;
>>>>               }
>>>>               ops->set_vq_cb(vdpa, idx, &cb);
>>>> -             vhost_vdpa_setup_vq_irq(v, idx);
>>>>               break;
>>>>
>>>>       case VHOST_SET_VRING_NUM:
>>>> @@ -1419,6 +1424,7 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep)
>>>>       for (i = 0; i < nvqs; i++) {
>>>>               vqs[i] = &v->vqs[i];
>>>>               vqs[i]->handle_kick = handle_vq_kick;
>>>> +             vqs[i]->call_ctx.ctx = NULL;
>>>>       }
>>>>       vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false,
>>>>                      vhost_vdpa_process_iotlb_msg);
>>>
>>> No more crashes, but now getting a lot of:
>>>  vhost-vdpa-X: vq Y, irq bypass producer (token 00000000a66e28ab) registration fails, ret =  -16
>>>
>>> ... seems like the irq_bypass_unregister_producer() that was removed
>>> might still be needed somewhere?
>>
My statement above was not quite correct. The error comes from the
VQ irq being registered twice:

1) VHOST_SET_VRING_CALL ioctl gets called for vq 0. VQ irq is unregistered
   (vhost_vdpa_unsetup_vq_irq() and re-registered (vhost_vdpa_setup_vq_irq())
   successfully. So far so good.

2) set status !DRIVER_OK -> DRIVER_OK happens. VQ irq setup is done
   once again (vhost_vdpa_setup_vq_irq()). As the producer unregister
   was removed in this patch, the register will complain because the producer
   token already exists.


>> Probably, but I didn't see this when testing vp_vdpa.
>>
>> When did you meet those warnings? Is it during the boot or migration?
During boot, on the first 2 VQs only (so before the QPs are resized).
Traffic does work though when the VM is booted.

> 
> Btw, it would be helpful to check if mlx5_get_vq_irq() works
> correctly. I believe it should return an error if the virtqueue
> interrupt is not allocated. After a glance at the code, it seems not
> straightforward to me.
> 
I think we're good on that front:
mlx5_get_vq_irq() returns EOPNOTSUPP if the vq irq is not allocated.


Thanks,
Dragos
Jason Wang Aug. 13, 2024, 6:26 a.m. UTC | #5
On Mon, Aug 12, 2024 at 7:22 PM Dragos Tatulea <dtatulea@nvidia.com> wrote:
>
>
>
> On 12.08.24 08:49, Jason Wang wrote:
> > On Mon, Aug 12, 2024 at 1:47 PM Jason Wang <jasowang@redhat.com> wrote:
> >>
> >> On Fri, Aug 9, 2024 at 2:04 AM Dragos Tatulea <dtatulea@nvidia.com> wrote:
> >>>
> >>>
> >>>
> >>> On 08.08.24 10:20, Jason Wang wrote:
> >>>> We used to call irq_bypass_unregister_producer() in
> >>>> vhost_vdpa_setup_vq_irq() which is problematic as we don't know if the
> >>>> token pointer is still valid or not.
> >>>>
> >>>> Actually, we use the eventfd_ctx as the token so the life cycle of the
> >>>> token should be bound to the VHOST_SET_VRING_CALL instead of
> >>>> vhost_vdpa_setup_vq_irq() which could be called by set_status().
> >>>>
> >>>> Fixing this by setting up  irq bypass producer's token when handling
> >>>> VHOST_SET_VRING_CALL and un-registering the producer before calling
> >>>> vhost_vring_ioctl() to prevent a possible use after free as eventfd
> >>>> could have been released in vhost_vring_ioctl().
> >>>>
> >>>> Fixes: 2cf1ba9a4d15 ("vhost_vdpa: implement IRQ offloading in vhost_vdpa")
> >>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
> >>>> ---
> >>>> Note for Dragos: Please check whether this fixes your issue. I
> >>>> slightly test it with vp_vdpa in L2.
> >>>> ---
> >>>>  drivers/vhost/vdpa.c | 12 +++++++++---
> >>>>  1 file changed, 9 insertions(+), 3 deletions(-)
> >>>>
> >>>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> >>>> index e31ec9ebc4ce..388226a48bcc 100644
> >>>> --- a/drivers/vhost/vdpa.c
> >>>> +++ b/drivers/vhost/vdpa.c
> >>>> @@ -209,11 +209,9 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
> >>>>       if (irq < 0)
> >>>>               return;
> >>>>
> >>>> -     irq_bypass_unregister_producer(&vq->call_ctx.producer);
> >>>>       if (!vq->call_ctx.ctx)
> >>>>               return;
> >>>>
> >>>> -     vq->call_ctx.producer.token = vq->call_ctx.ctx;
> >>>>       vq->call_ctx.producer.irq = irq;
> >>>>       ret = irq_bypass_register_producer(&vq->call_ctx.producer);
> >>>>       if (unlikely(ret))
> >>>> @@ -709,6 +707,12 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
> >>>>                       vq->last_avail_idx = vq_state.split.avail_index;
> >>>>               }
> >>>>               break;
> >>>> +     case VHOST_SET_VRING_CALL:
> >>>> +             if (vq->call_ctx.ctx) {
> >>>> +                     vhost_vdpa_unsetup_vq_irq(v, idx);
> >>>> +                     vq->call_ctx.producer.token = NULL;
> >>>> +             }
> >>>> +             break;
> >>>>       }
> >>>>
> >>>>       r = vhost_vring_ioctl(&v->vdev, cmd, argp);
> >>>> @@ -747,13 +751,14 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
> >>>>                       cb.callback = vhost_vdpa_virtqueue_cb;
> >>>>                       cb.private = vq;
> >>>>                       cb.trigger = vq->call_ctx.ctx;
> >>>> +                     vq->call_ctx.producer.token = vq->call_ctx.ctx;
> >>>> +                     vhost_vdpa_setup_vq_irq(v, idx);
> >>>>               } else {
> >>>>                       cb.callback = NULL;
> >>>>                       cb.private = NULL;
> >>>>                       cb.trigger = NULL;
> >>>>               }
> >>>>               ops->set_vq_cb(vdpa, idx, &cb);
> >>>> -             vhost_vdpa_setup_vq_irq(v, idx);
> >>>>               break;
> >>>>
> >>>>       case VHOST_SET_VRING_NUM:
> >>>> @@ -1419,6 +1424,7 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep)
> >>>>       for (i = 0; i < nvqs; i++) {
> >>>>               vqs[i] = &v->vqs[i];
> >>>>               vqs[i]->handle_kick = handle_vq_kick;
> >>>> +             vqs[i]->call_ctx.ctx = NULL;
> >>>>       }
> >>>>       vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false,
> >>>>                      vhost_vdpa_process_iotlb_msg);
> >>>
> >>> No more crashes, but now getting a lot of:
> >>>  vhost-vdpa-X: vq Y, irq bypass producer (token 00000000a66e28ab) registration fails, ret =  -16
> >>>
> >>> ... seems like the irq_bypass_unregister_producer() that was removed
> >>> might still be needed somewhere?
> >>
> My statement above was not quite correct. The error comes from the
> VQ irq being registered twice:
>
> 1) VHOST_SET_VRING_CALL ioctl gets called for vq 0. VQ irq is unregistered
>    (vhost_vdpa_unsetup_vq_irq() and re-registered (vhost_vdpa_setup_vq_irq())
>    successfully. So far so good.
>
> 2) set status !DRIVER_OK -> DRIVER_OK happens. VQ irq setup is done
>    once again (vhost_vdpa_setup_vq_irq()). As the producer unregister
>    was removed in this patch, the register will complain because the producer
>    token already exists.

I see. I think it's probably too tricky to try to register/unregister
a producer during set_vring_call if DRIVER_OK is not set.

Does it work if we only do vhost_vdpa_unsetup/setup_vq_irq() if
DRIVER_OK is set in vhost_vdpa_vring_ioctl() (on top of this patch)?

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index 388226a48bcc..ab441b8ccd2e 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -709,7 +709,9 @@ static long vhost_vdpa_vring_ioctl(struct
vhost_vdpa *v, unsigned int cmd,
                break;
        case VHOST_SET_VRING_CALL:
                if (vq->call_ctx.ctx) {
-                       vhost_vdpa_unsetup_vq_irq(v, idx);
+                       if (ops->get_status(vdpa) &
+                           VIRTIO_CONFIG_S_DRIVER_OK)
+                               vhost_vdpa_unsetup_vq_irq(v, idx);
                        vq->call_ctx.producer.token = NULL;
                }
                break;
@@ -752,7 +754,9 @@ static long vhost_vdpa_vring_ioctl(struct
vhost_vdpa *v, unsigned int cmd,
                        cb.private = vq;
                        cb.trigger = vq->call_ctx.ctx;
                        vq->call_ctx.producer.token = vq->call_ctx.ctx;
-                       vhost_vdpa_setup_vq_irq(v, idx);
+                       if (ops->get_status(vdpa) &
+                           VIRTIO_CONFIG_S_DRIVER_OK)
+                               vhost_vdpa_setup_vq_irq(v, idx);
                } else {
                        cb.callback = NULL;
                        cb.private = NULL;

>
>
> >> Probably, but I didn't see this when testing vp_vdpa.
> >>
> >> When did you meet those warnings? Is it during the boot or migration?
> During boot, on the first 2 VQs only (so before the QPs are resized).
> Traffic does work though when the VM is booted.

Right.

>
> >
> > Btw, it would be helpful to check if mlx5_get_vq_irq() works
> > correctly. I believe it should return an error if the virtqueue
> > interrupt is not allocated. After a glance at the code, it seems not
> > straightforward to me.
> >
> I think we're good on that front:
> mlx5_get_vq_irq() returns EOPNOTSUPP if the vq irq is not allocated.

Good to know that.

Thanks

>
>
> Thanks,
> Dragos
>
Dragos Tatulea Aug. 13, 2024, 12:52 p.m. UTC | #6
On 13.08.24 08:26, Jason Wang wrote:
> On Mon, Aug 12, 2024 at 7:22 PM Dragos Tatulea <dtatulea@nvidia.com> wrote:
>>
>>
>>
>> On 12.08.24 08:49, Jason Wang wrote:
>>> On Mon, Aug 12, 2024 at 1:47 PM Jason Wang <jasowang@redhat.com> wrote:
>>>>
>>>> On Fri, Aug 9, 2024 at 2:04 AM Dragos Tatulea <dtatulea@nvidia.com> wrote:
>>>>>
>>>>>
>>>>>
>>>>> On 08.08.24 10:20, Jason Wang wrote:
>>>>>> We used to call irq_bypass_unregister_producer() in
>>>>>> vhost_vdpa_setup_vq_irq() which is problematic as we don't know if the
>>>>>> token pointer is still valid or not.
>>>>>>
>>>>>> Actually, we use the eventfd_ctx as the token so the life cycle of the
>>>>>> token should be bound to the VHOST_SET_VRING_CALL instead of
>>>>>> vhost_vdpa_setup_vq_irq() which could be called by set_status().
>>>>>>
>>>>>> Fixing this by setting up  irq bypass producer's token when handling
>>>>>> VHOST_SET_VRING_CALL and un-registering the producer before calling
>>>>>> vhost_vring_ioctl() to prevent a possible use after free as eventfd
>>>>>> could have been released in vhost_vring_ioctl().
>>>>>>
>>>>>> Fixes: 2cf1ba9a4d15 ("vhost_vdpa: implement IRQ offloading in vhost_vdpa")
>>>>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>>>>>> ---
>>>>>> Note for Dragos: Please check whether this fixes your issue. I
>>>>>> slightly test it with vp_vdpa in L2.
>>>>>> ---
>>>>>>  drivers/vhost/vdpa.c | 12 +++++++++---
>>>>>>  1 file changed, 9 insertions(+), 3 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
>>>>>> index e31ec9ebc4ce..388226a48bcc 100644
>>>>>> --- a/drivers/vhost/vdpa.c
>>>>>> +++ b/drivers/vhost/vdpa.c
>>>>>> @@ -209,11 +209,9 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
>>>>>>       if (irq < 0)
>>>>>>               return;
>>>>>>
>>>>>> -     irq_bypass_unregister_producer(&vq->call_ctx.producer);
>>>>>>       if (!vq->call_ctx.ctx)
>>>>>>               return;
>>>>>>
>>>>>> -     vq->call_ctx.producer.token = vq->call_ctx.ctx;
>>>>>>       vq->call_ctx.producer.irq = irq;
>>>>>>       ret = irq_bypass_register_producer(&vq->call_ctx.producer);
>>>>>>       if (unlikely(ret))
>>>>>> @@ -709,6 +707,12 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
>>>>>>                       vq->last_avail_idx = vq_state.split.avail_index;
>>>>>>               }
>>>>>>               break;
>>>>>> +     case VHOST_SET_VRING_CALL:
>>>>>> +             if (vq->call_ctx.ctx) {
>>>>>> +                     vhost_vdpa_unsetup_vq_irq(v, idx);
>>>>>> +                     vq->call_ctx.producer.token = NULL;
>>>>>> +             }
>>>>>> +             break;
>>>>>>       }
>>>>>>
>>>>>>       r = vhost_vring_ioctl(&v->vdev, cmd, argp);
>>>>>> @@ -747,13 +751,14 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
>>>>>>                       cb.callback = vhost_vdpa_virtqueue_cb;
>>>>>>                       cb.private = vq;
>>>>>>                       cb.trigger = vq->call_ctx.ctx;
>>>>>> +                     vq->call_ctx.producer.token = vq->call_ctx.ctx;
>>>>>> +                     vhost_vdpa_setup_vq_irq(v, idx);
>>>>>>               } else {
>>>>>>                       cb.callback = NULL;
>>>>>>                       cb.private = NULL;
>>>>>>                       cb.trigger = NULL;
>>>>>>               }
>>>>>>               ops->set_vq_cb(vdpa, idx, &cb);
>>>>>> -             vhost_vdpa_setup_vq_irq(v, idx);
>>>>>>               break;
>>>>>>
>>>>>>       case VHOST_SET_VRING_NUM:
>>>>>> @@ -1419,6 +1424,7 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep)
>>>>>>       for (i = 0; i < nvqs; i++) {
>>>>>>               vqs[i] = &v->vqs[i];
>>>>>>               vqs[i]->handle_kick = handle_vq_kick;
>>>>>> +             vqs[i]->call_ctx.ctx = NULL;
>>>>>>       }
>>>>>>       vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false,
>>>>>>                      vhost_vdpa_process_iotlb_msg);
>>>>>
>>>>> No more crashes, but now getting a lot of:
>>>>>  vhost-vdpa-X: vq Y, irq bypass producer (token 00000000a66e28ab) registration fails, ret =  -16
>>>>>
>>>>> ... seems like the irq_bypass_unregister_producer() that was removed
>>>>> might still be needed somewhere?
>>>>
>> My statement above was not quite correct. The error comes from the
>> VQ irq being registered twice:
>>
>> 1) VHOST_SET_VRING_CALL ioctl gets called for vq 0. VQ irq is unregistered
>>    (vhost_vdpa_unsetup_vq_irq() and re-registered (vhost_vdpa_setup_vq_irq())
>>    successfully. So far so good.
>>
>> 2) set status !DRIVER_OK -> DRIVER_OK happens. VQ irq setup is done
>>    once again (vhost_vdpa_setup_vq_irq()). As the producer unregister
>>    was removed in this patch, the register will complain because the producer
>>    token already exists.
> 
> I see. I think it's probably too tricky to try to register/unregister
> a producer during set_vring_call if DRIVER_OK is not set.
> 
> Does it work if we only do vhost_vdpa_unsetup/setup_vq_irq() if
> DRIVER_OK is set in vhost_vdpa_vring_ioctl() (on top of this patch)?
> 
> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> index 388226a48bcc..ab441b8ccd2e 100644
> --- a/drivers/vhost/vdpa.c
> +++ b/drivers/vhost/vdpa.c
> @@ -709,7 +709,9 @@ static long vhost_vdpa_vring_ioctl(struct
> vhost_vdpa *v, unsigned int cmd,
>                 break;
>         case VHOST_SET_VRING_CALL:
>                 if (vq->call_ctx.ctx) {
> -                       vhost_vdpa_unsetup_vq_irq(v, idx);
> +                       if (ops->get_status(vdpa) &
> +                           VIRTIO_CONFIG_S_DRIVER_OK)
> +                               vhost_vdpa_unsetup_vq_irq(v, idx);
>                         vq->call_ctx.producer.token = NULL;
I was wondering if it's safe to set NULL also for !DRIVER_OK case,
but then I noticed that the !DRIVER_OK transition doesn't set .token to
NULL so this is actually beneficial. Did I get it right?

>                 }
>                 break;
> @@ -752,7 +754,9 @@ static long vhost_vdpa_vring_ioctl(struct
> vhost_vdpa *v, unsigned int cmd,
>                         cb.private = vq;
>                         cb.trigger = vq->call_ctx.ctx;
>                         vq->call_ctx.producer.token = vq->call_ctx.ctx;
> -                       vhost_vdpa_setup_vq_irq(v, idx);
> +                       if (ops->get_status(vdpa) &
> +                           VIRTIO_CONFIG_S_DRIVER_OK)
> +                               vhost_vdpa_setup_vq_irq(v, idx);
>                 } else {
>                         cb.callback = NULL;
>                         cb.private = NULL;
> 
Yup, this works.  

I do understand the fix, but I don't fully understand why this is
better than setting the .token to NULL in vhost_vdpa_unsetup_vq_irq()
and keeping the token logic inside the vhost_vdpa_setup/unsetup_vq_irq()
API.

In any case, if you send this fix:
Tested-by: Dragos Tatulea <dtatulea@nvidia.com>

Thanks,
Dragos
>>
>>
>>>> Probably, but I didn't see this when testing vp_vdpa.
>>>>
>>>> When did you meet those warnings? Is it during the boot or migration?
>> During boot, on the first 2 VQs only (so before the QPs are resized).
>> Traffic does work though when the VM is booted.
> 
> Right.
> 
>>
>>>
>>> Btw, it would be helpful to check if mlx5_get_vq_irq() works
>>> correctly. I believe it should return an error if the virtqueue
>>> interrupt is not allocated. After a glance at the code, it seems not
>>> straightforward to me.
>>>
>> I think we're good on that front:
>> mlx5_get_vq_irq() returns EOPNOTSUPP if the vq irq is not allocated.
> 
> Good to know that.
> 
> Thanks
> 
>>
>>
>> Thanks,
>> Dragos
>>
>
Jason Wang Aug. 14, 2024, 5:29 a.m. UTC | #7
On Tue, Aug 13, 2024 at 8:53 PM Dragos Tatulea <dtatulea@nvidia.com> wrote:
>
>
>
> On 13.08.24 08:26, Jason Wang wrote:
> > On Mon, Aug 12, 2024 at 7:22 PM Dragos Tatulea <dtatulea@nvidia.com> wrote:
> >>
> >>
> >>
> >> On 12.08.24 08:49, Jason Wang wrote:
> >>> On Mon, Aug 12, 2024 at 1:47 PM Jason Wang <jasowang@redhat.com> wrote:
> >>>>
> >>>> On Fri, Aug 9, 2024 at 2:04 AM Dragos Tatulea <dtatulea@nvidia.com> wrote:
> >>>>>
> >>>>>
> >>>>>
> >>>>> On 08.08.24 10:20, Jason Wang wrote:
> >>>>>> We used to call irq_bypass_unregister_producer() in
> >>>>>> vhost_vdpa_setup_vq_irq() which is problematic as we don't know if the
> >>>>>> token pointer is still valid or not.
> >>>>>>
> >>>>>> Actually, we use the eventfd_ctx as the token so the life cycle of the
> >>>>>> token should be bound to the VHOST_SET_VRING_CALL instead of
> >>>>>> vhost_vdpa_setup_vq_irq() which could be called by set_status().
> >>>>>>
> >>>>>> Fixing this by setting up  irq bypass producer's token when handling
> >>>>>> VHOST_SET_VRING_CALL and un-registering the producer before calling
> >>>>>> vhost_vring_ioctl() to prevent a possible use after free as eventfd
> >>>>>> could have been released in vhost_vring_ioctl().
> >>>>>>
> >>>>>> Fixes: 2cf1ba9a4d15 ("vhost_vdpa: implement IRQ offloading in vhost_vdpa")
> >>>>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
> >>>>>> ---
> >>>>>> Note for Dragos: Please check whether this fixes your issue. I
> >>>>>> slightly test it with vp_vdpa in L2.
> >>>>>> ---
> >>>>>>  drivers/vhost/vdpa.c | 12 +++++++++---
> >>>>>>  1 file changed, 9 insertions(+), 3 deletions(-)
> >>>>>>
> >>>>>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> >>>>>> index e31ec9ebc4ce..388226a48bcc 100644
> >>>>>> --- a/drivers/vhost/vdpa.c
> >>>>>> +++ b/drivers/vhost/vdpa.c
> >>>>>> @@ -209,11 +209,9 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
> >>>>>>       if (irq < 0)
> >>>>>>               return;
> >>>>>>
> >>>>>> -     irq_bypass_unregister_producer(&vq->call_ctx.producer);
> >>>>>>       if (!vq->call_ctx.ctx)
> >>>>>>               return;
> >>>>>>
> >>>>>> -     vq->call_ctx.producer.token = vq->call_ctx.ctx;
> >>>>>>       vq->call_ctx.producer.irq = irq;
> >>>>>>       ret = irq_bypass_register_producer(&vq->call_ctx.producer);
> >>>>>>       if (unlikely(ret))
> >>>>>> @@ -709,6 +707,12 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
> >>>>>>                       vq->last_avail_idx = vq_state.split.avail_index;
> >>>>>>               }
> >>>>>>               break;
> >>>>>> +     case VHOST_SET_VRING_CALL:
> >>>>>> +             if (vq->call_ctx.ctx) {
> >>>>>> +                     vhost_vdpa_unsetup_vq_irq(v, idx);
> >>>>>> +                     vq->call_ctx.producer.token = NULL;
> >>>>>> +             }
> >>>>>> +             break;
> >>>>>>       }
> >>>>>>
> >>>>>>       r = vhost_vring_ioctl(&v->vdev, cmd, argp);
> >>>>>> @@ -747,13 +751,14 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
> >>>>>>                       cb.callback = vhost_vdpa_virtqueue_cb;
> >>>>>>                       cb.private = vq;
> >>>>>>                       cb.trigger = vq->call_ctx.ctx;
> >>>>>> +                     vq->call_ctx.producer.token = vq->call_ctx.ctx;
> >>>>>> +                     vhost_vdpa_setup_vq_irq(v, idx);
> >>>>>>               } else {
> >>>>>>                       cb.callback = NULL;
> >>>>>>                       cb.private = NULL;
> >>>>>>                       cb.trigger = NULL;
> >>>>>>               }
> >>>>>>               ops->set_vq_cb(vdpa, idx, &cb);
> >>>>>> -             vhost_vdpa_setup_vq_irq(v, idx);
> >>>>>>               break;
> >>>>>>
> >>>>>>       case VHOST_SET_VRING_NUM:
> >>>>>> @@ -1419,6 +1424,7 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep)
> >>>>>>       for (i = 0; i < nvqs; i++) {
> >>>>>>               vqs[i] = &v->vqs[i];
> >>>>>>               vqs[i]->handle_kick = handle_vq_kick;
> >>>>>> +             vqs[i]->call_ctx.ctx = NULL;
> >>>>>>       }
> >>>>>>       vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false,
> >>>>>>                      vhost_vdpa_process_iotlb_msg);
> >>>>>
> >>>>> No more crashes, but now getting a lot of:
> >>>>>  vhost-vdpa-X: vq Y, irq bypass producer (token 00000000a66e28ab) registration fails, ret =  -16
> >>>>>
> >>>>> ... seems like the irq_bypass_unregister_producer() that was removed
> >>>>> might still be needed somewhere?
> >>>>
> >> My statement above was not quite correct. The error comes from the
> >> VQ irq being registered twice:
> >>
> >> 1) VHOST_SET_VRING_CALL ioctl gets called for vq 0. VQ irq is unregistered
> >>    (vhost_vdpa_unsetup_vq_irq() and re-registered (vhost_vdpa_setup_vq_irq())
> >>    successfully. So far so good.
> >>
> >> 2) set status !DRIVER_OK -> DRIVER_OK happens. VQ irq setup is done
> >>    once again (vhost_vdpa_setup_vq_irq()). As the producer unregister
> >>    was removed in this patch, the register will complain because the producer
> >>    token already exists.
> >
> > I see. I think it's probably too tricky to try to register/unregister
> > a producer during set_vring_call if DRIVER_OK is not set.
> >
> > Does it work if we only do vhost_vdpa_unsetup/setup_vq_irq() if
> > DRIVER_OK is set in vhost_vdpa_vring_ioctl() (on top of this patch)?
> >
> > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> > index 388226a48bcc..ab441b8ccd2e 100644
> > --- a/drivers/vhost/vdpa.c
> > +++ b/drivers/vhost/vdpa.c
> > @@ -709,7 +709,9 @@ static long vhost_vdpa_vring_ioctl(struct
> > vhost_vdpa *v, unsigned int cmd,
> >                 break;
> >         case VHOST_SET_VRING_CALL:
> >                 if (vq->call_ctx.ctx) {
> > -                       vhost_vdpa_unsetup_vq_irq(v, idx);
> > +                       if (ops->get_status(vdpa) &
> > +                           VIRTIO_CONFIG_S_DRIVER_OK)
> > +                               vhost_vdpa_unsetup_vq_irq(v, idx);
> >                         vq->call_ctx.producer.token = NULL;
> I was wondering if it's safe to set NULL also for !DRIVER_OK case,
> but then I noticed that the !DRIVER_OK transition doesn't set .token to
> NULL so this is actually beneficial. Did I get it right?

Yes, actually the reason is that we use eventfd as the token, so the
life cycle of the token is tied to eventfd itself. If we don't set the
token to NULL here the vhost_vring_ioctl() may just release it which
may lead to a use-after-free. So this patch doesn't set a token during
DRIVER_OK transition but during SET_VRING_CALL.

>
> >                 }
> >                 break;
> > @@ -752,7 +754,9 @@ static long vhost_vdpa_vring_ioctl(struct
> > vhost_vdpa *v, unsigned int cmd,
> >                         cb.private = vq;
> >                         cb.trigger = vq->call_ctx.ctx;
> >                         vq->call_ctx.producer.token = vq->call_ctx.ctx;
> > -                       vhost_vdpa_setup_vq_irq(v, idx);
> > +                       if (ops->get_status(vdpa) &
> > +                           VIRTIO_CONFIG_S_DRIVER_OK)
> > +                               vhost_vdpa_setup_vq_irq(v, idx);
> >                 } else {
> >                         cb.callback = NULL;
> >                         cb.private = NULL;
> >
> Yup, this works.
>
> I do understand the fix, but I don't fully understand why this is
> better than setting the .token to NULL in vhost_vdpa_unsetup_vq_irq()
> and keeping the token logic inside the vhost_vdpa_setup/unsetup_vq_irq()
> API.

See the above explanation, hope it clarifies things.

>
> In any case, if you send this fix:
> Tested-by: Dragos Tatulea <dtatulea@nvidia.com>

Thanks

>
> Thanks,
> Dragos
> >>
> >>
> >>>> Probably, but I didn't see this when testing vp_vdpa.
> >>>>
> >>>> When did you meet those warnings? Is it during the boot or migration?
> >> During boot, on the first 2 VQs only (so before the QPs are resized).
> >> Traffic does work though when the VM is booted.
> >
> > Right.
> >
> >>
> >>>
> >>> Btw, it would be helpful to check if mlx5_get_vq_irq() works
> >>> correctly. I believe it should return an error if the virtqueue
> >>> interrupt is not allocated. After a glance at the code, it seems not
> >>> straightforward to me.
> >>>
> >> I think we're good on that front:
> >> mlx5_get_vq_irq() returns EOPNOTSUPP if the vq irq is not allocated.
> >
> > Good to know that.
> >
> > Thanks
> >
> >>
> >>
> >> Thanks,
> >> Dragos
> >>
> >
>
Michael S. Tsirkin Aug. 14, 2024, 7:27 a.m. UTC | #8
On Thu, Aug 08, 2024 at 04:20:44PM +0800, Jason Wang wrote:
> We used to call irq_bypass_unregister_producer() in
> vhost_vdpa_setup_vq_irq() which is problematic as we don't know if the
> token pointer is still valid or not.
> 
> Actually, we use the eventfd_ctx as the token so the life cycle of the
> token should be bound to the VHOST_SET_VRING_CALL instead of
> vhost_vdpa_setup_vq_irq() which could be called by set_status().
> 
> Fixing this by setting up  irq bypass producer's token when handling
> VHOST_SET_VRING_CALL and un-registering the producer before calling
> vhost_vring_ioctl() to prevent a possible use after free as eventfd
> could have been released in vhost_vring_ioctl().
> 
> Fixes: 2cf1ba9a4d15 ("vhost_vdpa: implement IRQ offloading in vhost_vdpa")
> Signed-off-by: Jason Wang <jasowang@redhat.com>

Want to post a non-RFC version?

> ---
> Note for Dragos: Please check whether this fixes your issue. I
> slightly test it with vp_vdpa in L2.
> ---
>  drivers/vhost/vdpa.c | 12 +++++++++---
>  1 file changed, 9 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> index e31ec9ebc4ce..388226a48bcc 100644
> --- a/drivers/vhost/vdpa.c
> +++ b/drivers/vhost/vdpa.c
> @@ -209,11 +209,9 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
>  	if (irq < 0)
>  		return;
>  
> -	irq_bypass_unregister_producer(&vq->call_ctx.producer);
>  	if (!vq->call_ctx.ctx)
>  		return;
>  
> -	vq->call_ctx.producer.token = vq->call_ctx.ctx;
>  	vq->call_ctx.producer.irq = irq;
>  	ret = irq_bypass_register_producer(&vq->call_ctx.producer);
>  	if (unlikely(ret))
> @@ -709,6 +707,12 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
>  			vq->last_avail_idx = vq_state.split.avail_index;
>  		}
>  		break;
> +	case VHOST_SET_VRING_CALL:
> +		if (vq->call_ctx.ctx) {
> +			vhost_vdpa_unsetup_vq_irq(v, idx);
> +			vq->call_ctx.producer.token = NULL;
> +		}
> +		break;
>  	}
>  
>  	r = vhost_vring_ioctl(&v->vdev, cmd, argp);
> @@ -747,13 +751,14 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
>  			cb.callback = vhost_vdpa_virtqueue_cb;
>  			cb.private = vq;
>  			cb.trigger = vq->call_ctx.ctx;
> +			vq->call_ctx.producer.token = vq->call_ctx.ctx;
> +			vhost_vdpa_setup_vq_irq(v, idx);
>  		} else {
>  			cb.callback = NULL;
>  			cb.private = NULL;
>  			cb.trigger = NULL;
>  		}
>  		ops->set_vq_cb(vdpa, idx, &cb);
> -		vhost_vdpa_setup_vq_irq(v, idx);
>  		break;
>  
>  	case VHOST_SET_VRING_NUM:
> @@ -1419,6 +1424,7 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep)
>  	for (i = 0; i < nvqs; i++) {
>  		vqs[i] = &v->vqs[i];
>  		vqs[i]->handle_kick = handle_vq_kick;
> +		vqs[i]->call_ctx.ctx = NULL;
>  	}
>  	vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false,
>  		       vhost_vdpa_process_iotlb_msg);
> -- 
> 2.31.1
Dragos Tatulea Aug. 14, 2024, 7:58 a.m. UTC | #9
On 14.08.24 07:29, Jason Wang wrote:
> On Tue, Aug 13, 2024 at 8:53 PM Dragos Tatulea <dtatulea@nvidia.com> wrote:
>>
>>
>>
>> On 13.08.24 08:26, Jason Wang wrote:
>>> On Mon, Aug 12, 2024 at 7:22 PM Dragos Tatulea <dtatulea@nvidia.com> wrote:
>>>>
>>>>
>>>>
>>>> On 12.08.24 08:49, Jason Wang wrote:
>>>>> On Mon, Aug 12, 2024 at 1:47 PM Jason Wang <jasowang@redhat.com> wrote:
>>>>>>
>>>>>> On Fri, Aug 9, 2024 at 2:04 AM Dragos Tatulea <dtatulea@nvidia.com> wrote:
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> On 08.08.24 10:20, Jason Wang wrote:
>>>>>>>> We used to call irq_bypass_unregister_producer() in
>>>>>>>> vhost_vdpa_setup_vq_irq() which is problematic as we don't know if the
>>>>>>>> token pointer is still valid or not.
>>>>>>>>
>>>>>>>> Actually, we use the eventfd_ctx as the token so the life cycle of the
>>>>>>>> token should be bound to the VHOST_SET_VRING_CALL instead of
>>>>>>>> vhost_vdpa_setup_vq_irq() which could be called by set_status().
>>>>>>>>
>>>>>>>> Fixing this by setting up  irq bypass producer's token when handling
>>>>>>>> VHOST_SET_VRING_CALL and un-registering the producer before calling
>>>>>>>> vhost_vring_ioctl() to prevent a possible use after free as eventfd
>>>>>>>> could have been released in vhost_vring_ioctl().
>>>>>>>>
>>>>>>>> Fixes: 2cf1ba9a4d15 ("vhost_vdpa: implement IRQ offloading in vhost_vdpa")
>>>>>>>> Signed-off-by: Jason Wang <jasowang@redhat.com>
>>>>>>>> ---
>>>>>>>> Note for Dragos: Please check whether this fixes your issue. I
>>>>>>>> slightly test it with vp_vdpa in L2.
>>>>>>>> ---
>>>>>>>>  drivers/vhost/vdpa.c | 12 +++++++++---
>>>>>>>>  1 file changed, 9 insertions(+), 3 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
>>>>>>>> index e31ec9ebc4ce..388226a48bcc 100644
>>>>>>>> --- a/drivers/vhost/vdpa.c
>>>>>>>> +++ b/drivers/vhost/vdpa.c
>>>>>>>> @@ -209,11 +209,9 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
>>>>>>>>       if (irq < 0)
>>>>>>>>               return;
>>>>>>>>
>>>>>>>> -     irq_bypass_unregister_producer(&vq->call_ctx.producer);
>>>>>>>>       if (!vq->call_ctx.ctx)
>>>>>>>>               return;
>>>>>>>>
>>>>>>>> -     vq->call_ctx.producer.token = vq->call_ctx.ctx;
>>>>>>>>       vq->call_ctx.producer.irq = irq;
>>>>>>>>       ret = irq_bypass_register_producer(&vq->call_ctx.producer);
>>>>>>>>       if (unlikely(ret))
>>>>>>>> @@ -709,6 +707,12 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
>>>>>>>>                       vq->last_avail_idx = vq_state.split.avail_index;
>>>>>>>>               }
>>>>>>>>               break;
>>>>>>>> +     case VHOST_SET_VRING_CALL:
>>>>>>>> +             if (vq->call_ctx.ctx) {
>>>>>>>> +                     vhost_vdpa_unsetup_vq_irq(v, idx);
>>>>>>>> +                     vq->call_ctx.producer.token = NULL;
>>>>>>>> +             }
>>>>>>>> +             break;
>>>>>>>>       }
>>>>>>>>
>>>>>>>>       r = vhost_vring_ioctl(&v->vdev, cmd, argp);
>>>>>>>> @@ -747,13 +751,14 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
>>>>>>>>                       cb.callback = vhost_vdpa_virtqueue_cb;
>>>>>>>>                       cb.private = vq;
>>>>>>>>                       cb.trigger = vq->call_ctx.ctx;
>>>>>>>> +                     vq->call_ctx.producer.token = vq->call_ctx.ctx;
>>>>>>>> +                     vhost_vdpa_setup_vq_irq(v, idx);
>>>>>>>>               } else {
>>>>>>>>                       cb.callback = NULL;
>>>>>>>>                       cb.private = NULL;
>>>>>>>>                       cb.trigger = NULL;
>>>>>>>>               }
>>>>>>>>               ops->set_vq_cb(vdpa, idx, &cb);
>>>>>>>> -             vhost_vdpa_setup_vq_irq(v, idx);
>>>>>>>>               break;
>>>>>>>>
>>>>>>>>       case VHOST_SET_VRING_NUM:
>>>>>>>> @@ -1419,6 +1424,7 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep)
>>>>>>>>       for (i = 0; i < nvqs; i++) {
>>>>>>>>               vqs[i] = &v->vqs[i];
>>>>>>>>               vqs[i]->handle_kick = handle_vq_kick;
>>>>>>>> +             vqs[i]->call_ctx.ctx = NULL;
>>>>>>>>       }
>>>>>>>>       vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false,
>>>>>>>>                      vhost_vdpa_process_iotlb_msg);
>>>>>>>
>>>>>>> No more crashes, but now getting a lot of:
>>>>>>>  vhost-vdpa-X: vq Y, irq bypass producer (token 00000000a66e28ab) registration fails, ret =  -16
>>>>>>>
>>>>>>> ... seems like the irq_bypass_unregister_producer() that was removed
>>>>>>> might still be needed somewhere?
>>>>>>
>>>> My statement above was not quite correct. The error comes from the
>>>> VQ irq being registered twice:
>>>>
>>>> 1) VHOST_SET_VRING_CALL ioctl gets called for vq 0. VQ irq is unregistered
>>>>    (vhost_vdpa_unsetup_vq_irq() and re-registered (vhost_vdpa_setup_vq_irq())
>>>>    successfully. So far so good.
>>>>
>>>> 2) set status !DRIVER_OK -> DRIVER_OK happens. VQ irq setup is done
>>>>    once again (vhost_vdpa_setup_vq_irq()). As the producer unregister
>>>>    was removed in this patch, the register will complain because the producer
>>>>    token already exists.
>>>
>>> I see. I think it's probably too tricky to try to register/unregister
>>> a producer during set_vring_call if DRIVER_OK is not set.
>>>
>>> Does it work if we only do vhost_vdpa_unsetup/setup_vq_irq() if
>>> DRIVER_OK is set in vhost_vdpa_vring_ioctl() (on top of this patch)?
>>>
>>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
>>> index 388226a48bcc..ab441b8ccd2e 100644
>>> --- a/drivers/vhost/vdpa.c
>>> +++ b/drivers/vhost/vdpa.c
>>> @@ -709,7 +709,9 @@ static long vhost_vdpa_vring_ioctl(struct
>>> vhost_vdpa *v, unsigned int cmd,
>>>                 break;
>>>         case VHOST_SET_VRING_CALL:
>>>                 if (vq->call_ctx.ctx) {
>>> -                       vhost_vdpa_unsetup_vq_irq(v, idx);
>>> +                       if (ops->get_status(vdpa) &
>>> +                           VIRTIO_CONFIG_S_DRIVER_OK)
>>> +                               vhost_vdpa_unsetup_vq_irq(v, idx);
>>>                         vq->call_ctx.producer.token = NULL;
>> I was wondering if it's safe to set NULL also for !DRIVER_OK case,
>> but then I noticed that the !DRIVER_OK transition doesn't set .token to
>> NULL so this is actually beneficial. Did I get it right?
> 
> Yes, actually the reason is that we use eventfd as the token, so the
> life cycle of the token is tied to eventfd itself. If we don't set the
> token to NULL here the vhost_vring_ioctl() may just release it which
> may lead to a use-after-free. So this patch doesn't set a token during
> DRIVER_OK transition but during SET_VRING_CALL.
> 
Ack. Thanks for the explanation.

>>
>>>                 }
>>>                 break;
>>> @@ -752,7 +754,9 @@ static long vhost_vdpa_vring_ioctl(struct
>>> vhost_vdpa *v, unsigned int cmd,
>>>                         cb.private = vq;
>>>                         cb.trigger = vq->call_ctx.ctx;
>>>                         vq->call_ctx.producer.token = vq->call_ctx.ctx;
>>> -                       vhost_vdpa_setup_vq_irq(v, idx);
>>> +                       if (ops->get_status(vdpa) &
>>> +                           VIRTIO_CONFIG_S_DRIVER_OK)
>>> +                               vhost_vdpa_setup_vq_irq(v, idx);
>>>                 } else {
>>>                         cb.callback = NULL;
>>>                         cb.private = NULL;
>>>
>> Yup, this works.
>>
>> I do understand the fix, but I don't fully understand why this is
>> better than setting the .token to NULL in vhost_vdpa_unsetup_vq_irq()
>> and keeping the token logic inside the vhost_vdpa_setup/unsetup_vq_irq()
>> API.
> 
> See the above explanation, hope it clarifies things.
> 
Yes it does.

>>
>> In any case, if you send this fix:
>> Tested-by: Dragos Tatulea <dtatulea@nvidia.com>
FWIW:
Reviewed-by: Dragos Tatulea <dtatulea@nvidia.com>

> 
> Thanks
> 
>>
>> Thanks,
>> Dragos
>>>>
>>>>
>>>>>> Probably, but I didn't see this when testing vp_vdpa.
>>>>>>
>>>>>> When did you meet those warnings? Is it during the boot or migration?
>>>> During boot, on the first 2 VQs only (so before the QPs are resized).
>>>> Traffic does work though when the VM is booted.
>>>
>>> Right.
>>>
>>>>
>>>>>
>>>>> Btw, it would be helpful to check if mlx5_get_vq_irq() works
>>>>> correctly. I believe it should return an error if the virtqueue
>>>>> interrupt is not allocated. After a glance at the code, it seems not
>>>>> straightforward to me.
>>>>>
>>>> I think we're good on that front:
>>>> mlx5_get_vq_irq() returns EOPNOTSUPP if the vq irq is not allocated.
>>>
>>> Good to know that.
>>>
>>> Thanks
>>>
>>>>
>>>>
>>>> Thanks,
>>>> Dragos
>>>>
>>>
>>
>
diff mbox series

Patch

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index e31ec9ebc4ce..388226a48bcc 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -209,11 +209,9 @@  static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
 	if (irq < 0)
 		return;
 
-	irq_bypass_unregister_producer(&vq->call_ctx.producer);
 	if (!vq->call_ctx.ctx)
 		return;
 
-	vq->call_ctx.producer.token = vq->call_ctx.ctx;
 	vq->call_ctx.producer.irq = irq;
 	ret = irq_bypass_register_producer(&vq->call_ctx.producer);
 	if (unlikely(ret))
@@ -709,6 +707,12 @@  static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
 			vq->last_avail_idx = vq_state.split.avail_index;
 		}
 		break;
+	case VHOST_SET_VRING_CALL:
+		if (vq->call_ctx.ctx) {
+			vhost_vdpa_unsetup_vq_irq(v, idx);
+			vq->call_ctx.producer.token = NULL;
+		}
+		break;
 	}
 
 	r = vhost_vring_ioctl(&v->vdev, cmd, argp);
@@ -747,13 +751,14 @@  static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
 			cb.callback = vhost_vdpa_virtqueue_cb;
 			cb.private = vq;
 			cb.trigger = vq->call_ctx.ctx;
+			vq->call_ctx.producer.token = vq->call_ctx.ctx;
+			vhost_vdpa_setup_vq_irq(v, idx);
 		} else {
 			cb.callback = NULL;
 			cb.private = NULL;
 			cb.trigger = NULL;
 		}
 		ops->set_vq_cb(vdpa, idx, &cb);
-		vhost_vdpa_setup_vq_irq(v, idx);
 		break;
 
 	case VHOST_SET_VRING_NUM:
@@ -1419,6 +1424,7 @@  static int vhost_vdpa_open(struct inode *inode, struct file *filep)
 	for (i = 0; i < nvqs; i++) {
 		vqs[i] = &v->vqs[i];
 		vqs[i]->handle_kick = handle_vq_kick;
+		vqs[i]->call_ctx.ctx = NULL;
 	}
 	vhost_dev_init(dev, vqs, nvqs, 0, 0, 0, false,
 		       vhost_vdpa_process_iotlb_msg);