diff mbox series

[v7,09/26] virtio_ring: split: implement virtqueue_reset_vring_split()

Message ID 20220308123518.33800-10-xuanzhuo@linux.alibaba.com (mailing list archive)
State Not Applicable
Headers show
Series virtio pci support VIRTIO_F_RING_RESET | expand

Commit Message

Xuan Zhuo March 8, 2022, 12:35 p.m. UTC
virtio ring supports reset.

Queue reset is divided into several stages.

1. notify device queue reset
2. vring release
3. attach new vring
4. notify device queue re-enable

After the first step is completed, the vring reset operation can be
performed. If the newly set vring num does not change, then just reset
the vq related value.

Otherwise, the vring will be released and the vring will be reallocated.
And the vring will be attached to the vq. If this process fails, the
function will exit, and the state of the vq will be the vring release
state. You can call this function again to reallocate the vring.

In addition, vring_align, may_reduce_num are necessary for reallocating
vring, so they are retained when creating vq.

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
 drivers/virtio/virtio_ring.c | 69 ++++++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)

Comments

Jason Wang March 9, 2022, 7:55 a.m. UTC | #1
在 2022/3/8 下午8:35, Xuan Zhuo 写道:
> virtio ring supports reset.
>
> Queue reset is divided into several stages.
>
> 1. notify device queue reset
> 2. vring release
> 3. attach new vring
> 4. notify device queue re-enable
>
> After the first step is completed, the vring reset operation can be
> performed. If the newly set vring num does not change, then just reset
> the vq related value.
>
> Otherwise, the vring will be released and the vring will be reallocated.
> And the vring will be attached to the vq. If this process fails, the
> function will exit, and the state of the vq will be the vring release
> state. You can call this function again to reallocate the vring.
>
> In addition, vring_align, may_reduce_num are necessary for reallocating
> vring, so they are retained when creating vq.
>
> Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> ---
>   drivers/virtio/virtio_ring.c | 69 ++++++++++++++++++++++++++++++++++++
>   1 file changed, 69 insertions(+)
>
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index e0422c04c903..148fb1fd3d5a 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -158,6 +158,12 @@ struct vring_virtqueue {
>   			/* DMA address and size information */
>   			dma_addr_t queue_dma_addr;
>   			size_t queue_size_in_bytes;
> +
> +			/* The parameters for creating vrings are reserved for
> +			 * creating new vrings when enabling reset queue.
> +			 */
> +			u32 vring_align;
> +			bool may_reduce_num;
>   		} split;
>   
>   		/* Available for packed ring */
> @@ -217,6 +223,12 @@ struct vring_virtqueue {
>   #endif
>   };
>   
> +static void vring_free(struct virtqueue *vq);
> +static void __vring_virtqueue_init_split(struct vring_virtqueue *vq,
> +					 struct virtio_device *vdev);
> +static int __vring_virtqueue_attach_split(struct vring_virtqueue *vq,
> +					  struct virtio_device *vdev,
> +					  struct vring vring);
>   
>   /*
>    * Helpers.
> @@ -1012,6 +1024,8 @@ static struct virtqueue *vring_create_virtqueue_split(
>   		return NULL;
>   	}
>   
> +	to_vvq(vq)->split.vring_align = vring_align;
> +	to_vvq(vq)->split.may_reduce_num = may_reduce_num;
>   	to_vvq(vq)->split.queue_dma_addr = vring.dma_addr;
>   	to_vvq(vq)->split.queue_size_in_bytes = vring.queue_size_in_bytes;
>   	to_vvq(vq)->we_own_ring = true;
> @@ -1019,6 +1033,59 @@ static struct virtqueue *vring_create_virtqueue_split(
>   	return vq;
>   }
>   
> +static int virtqueue_reset_vring_split(struct virtqueue *_vq, u32 num)
> +{


So what this function does is to resize the virtqueue actually, I 
suggest to rename it as virtqueue_resize_split().


> +	struct vring_virtqueue *vq = to_vvq(_vq);
> +	struct virtio_device *vdev = _vq->vdev;
> +	struct vring_split vring;
> +	int err;
> +
> +	if (num > _vq->num_max)
> +		return -E2BIG;
> +
> +	switch (vq->vq.reset) {
> +	case VIRTIO_VQ_RESET_STEP_NONE:
> +		return -ENOENT;
> +
> +	case VIRTIO_VQ_RESET_STEP_VRING_ATTACH:
> +	case VIRTIO_VQ_RESET_STEP_DEVICE:
> +		if (vq->split.vring.num == num || !num)
> +			break;
> +
> +		vring_free(_vq);
> +
> +		fallthrough;
> +
> +	case VIRTIO_VQ_RESET_STEP_VRING_RELEASE:
> +		if (!num)
> +			num = vq->split.vring.num;
> +
> +		err = vring_create_vring_split(&vring, vdev,
> +					       vq->split.vring_align,
> +					       vq->weak_barriers,
> +					       vq->split.may_reduce_num, num);
> +		if (err)
> +			return -ENOMEM;


We'd better need a safe fallback here like:

If we can't allocate new memory, we can keep using the current one. 
Otherwise an ethtool -G fail may make the device not usable.

This could be done by not freeing the old vring and virtqueue states 
until new is allocated.


> +
> +		err = __vring_virtqueue_attach_split(vq, vdev, vring.vring);
> +		if (err) {
> +			vring_free_queue(vdev, vring.queue_size_in_bytes,
> +					 vring.queue,
> +					 vring.dma_addr);
> +			return -ENOMEM;
> +		}
> +
> +		vq->split.queue_dma_addr = vring.dma_addr;
> +		vq->split.queue_size_in_bytes = vring.queue_size_in_bytes;
> +	}
> +
> +	__vring_virtqueue_init_split(vq, vdev);
> +	vq->we_own_ring = true;


This seems wrong, we have the transport (rproc/mlxtbf) that allocate the 
vring by themselves. I think we need to fail the resize for we_own_ring 
== false.

Thanks



> +	vq->vq.reset = VIRTIO_VQ_RESET_STEP_VRING_ATTACH;
> +
> +	return 0;
> +}
> +
>   
>   /*
>    * Packed ring specific functions - *_packed().
> @@ -2317,6 +2384,8 @@ static int __vring_virtqueue_attach_split(struct vring_virtqueue *vq,
>   static void __vring_virtqueue_init_split(struct vring_virtqueue *vq,
>   					 struct virtio_device *vdev)
>   {
> +	vq->vq.reset = VIRTIO_VQ_RESET_STEP_NONE;
> +
>   	vq->packed_ring = false;
>   	vq->we_own_ring = false;
>   	vq->broken = false;
Xuan Zhuo March 9, 2022, 9:24 a.m. UTC | #2
On Wed, 9 Mar 2022 15:55:44 +0800, Jason Wang <jasowang@redhat.com> wrote:
>
> 在 2022/3/8 下午8:35, Xuan Zhuo 写道:
> > virtio ring supports reset.
> >
> > Queue reset is divided into several stages.
> >
> > 1. notify device queue reset
> > 2. vring release
> > 3. attach new vring
> > 4. notify device queue re-enable
> >
> > After the first step is completed, the vring reset operation can be
> > performed. If the newly set vring num does not change, then just reset
> > the vq related value.
> >
> > Otherwise, the vring will be released and the vring will be reallocated.
> > And the vring will be attached to the vq. If this process fails, the
> > function will exit, and the state of the vq will be the vring release
> > state. You can call this function again to reallocate the vring.
> >
> > In addition, vring_align, may_reduce_num are necessary for reallocating
> > vring, so they are retained when creating vq.
> >
> > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > ---
> >   drivers/virtio/virtio_ring.c | 69 ++++++++++++++++++++++++++++++++++++
> >   1 file changed, 69 insertions(+)
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index e0422c04c903..148fb1fd3d5a 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -158,6 +158,12 @@ struct vring_virtqueue {
> >   			/* DMA address and size information */
> >   			dma_addr_t queue_dma_addr;
> >   			size_t queue_size_in_bytes;
> > +
> > +			/* The parameters for creating vrings are reserved for
> > +			 * creating new vrings when enabling reset queue.
> > +			 */
> > +			u32 vring_align;
> > +			bool may_reduce_num;
> >   		} split;
> >
> >   		/* Available for packed ring */
> > @@ -217,6 +223,12 @@ struct vring_virtqueue {
> >   #endif
> >   };
> >
> > +static void vring_free(struct virtqueue *vq);
> > +static void __vring_virtqueue_init_split(struct vring_virtqueue *vq,
> > +					 struct virtio_device *vdev);
> > +static int __vring_virtqueue_attach_split(struct vring_virtqueue *vq,
> > +					  struct virtio_device *vdev,
> > +					  struct vring vring);
> >
> >   /*
> >    * Helpers.
> > @@ -1012,6 +1024,8 @@ static struct virtqueue *vring_create_virtqueue_split(
> >   		return NULL;
> >   	}
> >
> > +	to_vvq(vq)->split.vring_align = vring_align;
> > +	to_vvq(vq)->split.may_reduce_num = may_reduce_num;
> >   	to_vvq(vq)->split.queue_dma_addr = vring.dma_addr;
> >   	to_vvq(vq)->split.queue_size_in_bytes = vring.queue_size_in_bytes;
> >   	to_vvq(vq)->we_own_ring = true;
> > @@ -1019,6 +1033,59 @@ static struct virtqueue *vring_create_virtqueue_split(
> >   	return vq;
> >   }
> >
> > +static int virtqueue_reset_vring_split(struct virtqueue *_vq, u32 num)
> > +{
>
>
> So what this function does is to resize the virtqueue actually, I
> suggest to rename it as virtqueue_resize_split().

OK.

>
>
> > +	struct vring_virtqueue *vq = to_vvq(_vq);
> > +	struct virtio_device *vdev = _vq->vdev;
> > +	struct vring_split vring;
> > +	int err;
> > +
> > +	if (num > _vq->num_max)
> > +		return -E2BIG;
> > +
> > +	switch (vq->vq.reset) {
> > +	case VIRTIO_VQ_RESET_STEP_NONE:
> > +		return -ENOENT;
> > +
> > +	case VIRTIO_VQ_RESET_STEP_VRING_ATTACH:
> > +	case VIRTIO_VQ_RESET_STEP_DEVICE:
> > +		if (vq->split.vring.num == num || !num)
> > +			break;
> > +
> > +		vring_free(_vq);
> > +
> > +		fallthrough;
> > +
> > +	case VIRTIO_VQ_RESET_STEP_VRING_RELEASE:
> > +		if (!num)
> > +			num = vq->split.vring.num;
> > +
> > +		err = vring_create_vring_split(&vring, vdev,
> > +					       vq->split.vring_align,
> > +					       vq->weak_barriers,
> > +					       vq->split.may_reduce_num, num);
> > +		if (err)
> > +			return -ENOMEM;
>
>
> We'd better need a safe fallback here like:
>
> If we can't allocate new memory, we can keep using the current one.
> Otherwise an ethtool -G fail may make the device not usable.
>
> This could be done by not freeing the old vring and virtqueue states
> until new is allocated.

I've been thinking the same thing for the past two days.

>
>
> > +
> > +		err = __vring_virtqueue_attach_split(vq, vdev, vring.vring);
> > +		if (err) {
> > +			vring_free_queue(vdev, vring.queue_size_in_bytes,
> > +					 vring.queue,
> > +					 vring.dma_addr);
> > +			return -ENOMEM;
> > +		}
> > +
> > +		vq->split.queue_dma_addr = vring.dma_addr;
> > +		vq->split.queue_size_in_bytes = vring.queue_size_in_bytes;
> > +	}
> > +
> > +	__vring_virtqueue_init_split(vq, vdev);
> > +	vq->we_own_ring = true;
>
>
> This seems wrong, we have the transport (rproc/mlxtbf) that allocate the
> vring by themselves. I think we need to fail the resize for we_own_ring
> == false.

Oh, it turns out that we_own_ring is for this purpose.

Thanks.

>
> Thanks
>
>
>
> > +	vq->vq.reset = VIRTIO_VQ_RESET_STEP_VRING_ATTACH;
> > +
> > +	return 0;
> > +}
> > +
> >
> >   /*
> >    * Packed ring specific functions - *_packed().
> > @@ -2317,6 +2384,8 @@ static int __vring_virtqueue_attach_split(struct vring_virtqueue *vq,
> >   static void __vring_virtqueue_init_split(struct vring_virtqueue *vq,
> >   					 struct virtio_device *vdev)
> >   {
> > +	vq->vq.reset = VIRTIO_VQ_RESET_STEP_NONE;
> > +
> >   	vq->packed_ring = false;
> >   	vq->we_own_ring = false;
> >   	vq->broken = false;
>
Xuan Zhuo March 10, 2022, 4:46 a.m. UTC | #3
On Wed, 9 Mar 2022 15:55:44 +0800, Jason Wang <jasowang@redhat.com> wrote:
>
> 在 2022/3/8 下午8:35, Xuan Zhuo 写道:
> > virtio ring supports reset.
> >
> > Queue reset is divided into several stages.
> >
> > 1. notify device queue reset
> > 2. vring release
> > 3. attach new vring
> > 4. notify device queue re-enable
> >
> > After the first step is completed, the vring reset operation can be
> > performed. If the newly set vring num does not change, then just reset
> > the vq related value.
> >
> > Otherwise, the vring will be released and the vring will be reallocated.
> > And the vring will be attached to the vq. If this process fails, the
> > function will exit, and the state of the vq will be the vring release
> > state. You can call this function again to reallocate the vring.
> >
> > In addition, vring_align, may_reduce_num are necessary for reallocating
> > vring, so they are retained when creating vq.
> >
> > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > ---
> >   drivers/virtio/virtio_ring.c | 69 ++++++++++++++++++++++++++++++++++++
> >   1 file changed, 69 insertions(+)
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index e0422c04c903..148fb1fd3d5a 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -158,6 +158,12 @@ struct vring_virtqueue {
> >   			/* DMA address and size information */
> >   			dma_addr_t queue_dma_addr;
> >   			size_t queue_size_in_bytes;
> > +
> > +			/* The parameters for creating vrings are reserved for
> > +			 * creating new vrings when enabling reset queue.
> > +			 */
> > +			u32 vring_align;
> > +			bool may_reduce_num;
> >   		} split;
> >
> >   		/* Available for packed ring */
> > @@ -217,6 +223,12 @@ struct vring_virtqueue {
> >   #endif
> >   };
> >
> > +static void vring_free(struct virtqueue *vq);
> > +static void __vring_virtqueue_init_split(struct vring_virtqueue *vq,
> > +					 struct virtio_device *vdev);
> > +static int __vring_virtqueue_attach_split(struct vring_virtqueue *vq,
> > +					  struct virtio_device *vdev,
> > +					  struct vring vring);
> >
> >   /*
> >    * Helpers.
> > @@ -1012,6 +1024,8 @@ static struct virtqueue *vring_create_virtqueue_split(
> >   		return NULL;
> >   	}
> >
> > +	to_vvq(vq)->split.vring_align = vring_align;
> > +	to_vvq(vq)->split.may_reduce_num = may_reduce_num;
> >   	to_vvq(vq)->split.queue_dma_addr = vring.dma_addr;
> >   	to_vvq(vq)->split.queue_size_in_bytes = vring.queue_size_in_bytes;
> >   	to_vvq(vq)->we_own_ring = true;
> > @@ -1019,6 +1033,59 @@ static struct virtqueue *vring_create_virtqueue_split(
> >   	return vq;
> >   }
> >
> > +static int virtqueue_reset_vring_split(struct virtqueue *_vq, u32 num)
> > +{
>
>
> So what this function does is to resize the virtqueue actually, I
> suggest to rename it as virtqueue_resize_split().

In addition to resize, when num is 0, the function is to reinitialize vq ring
related variables. For example avail_idx_shadow.

So I think 'reset' is more appropriate.

Thanks.

>
>
> > +	struct vring_virtqueue *vq = to_vvq(_vq);
> > +	struct virtio_device *vdev = _vq->vdev;
> > +	struct vring_split vring;
> > +	int err;
> > +
> > +	if (num > _vq->num_max)
> > +		return -E2BIG;
> > +
> > +	switch (vq->vq.reset) {
> > +	case VIRTIO_VQ_RESET_STEP_NONE:
> > +		return -ENOENT;
> > +
> > +	case VIRTIO_VQ_RESET_STEP_VRING_ATTACH:
> > +	case VIRTIO_VQ_RESET_STEP_DEVICE:
> > +		if (vq->split.vring.num == num || !num)
> > +			break;
> > +
> > +		vring_free(_vq);
> > +
> > +		fallthrough;
> > +
> > +	case VIRTIO_VQ_RESET_STEP_VRING_RELEASE:
> > +		if (!num)
> > +			num = vq->split.vring.num;
> > +
> > +		err = vring_create_vring_split(&vring, vdev,
> > +					       vq->split.vring_align,
> > +					       vq->weak_barriers,
> > +					       vq->split.may_reduce_num, num);
> > +		if (err)
> > +			return -ENOMEM;
>
>
> We'd better need a safe fallback here like:
>
> If we can't allocate new memory, we can keep using the current one.
> Otherwise an ethtool -G fail may make the device not usable.
>
> This could be done by not freeing the old vring and virtqueue states
> until new is allocated.
>
>
> > +
> > +		err = __vring_virtqueue_attach_split(vq, vdev, vring.vring);
> > +		if (err) {
> > +			vring_free_queue(vdev, vring.queue_size_in_bytes,
> > +					 vring.queue,
> > +					 vring.dma_addr);
> > +			return -ENOMEM;
> > +		}
> > +
> > +		vq->split.queue_dma_addr = vring.dma_addr;
> > +		vq->split.queue_size_in_bytes = vring.queue_size_in_bytes;
> > +	}
> > +
> > +	__vring_virtqueue_init_split(vq, vdev);
> > +	vq->we_own_ring = true;
>
>
> This seems wrong, we have the transport (rproc/mlxtbf) that allocate the
> vring by themselves. I think we need to fail the resize for we_own_ring
> == false.
>
> Thanks
>
>
>
> > +	vq->vq.reset = VIRTIO_VQ_RESET_STEP_VRING_ATTACH;
> > +
> > +	return 0;
> > +}
> > +
> >
> >   /*
> >    * Packed ring specific functions - *_packed().
> > @@ -2317,6 +2384,8 @@ static int __vring_virtqueue_attach_split(struct vring_virtqueue *vq,
> >   static void __vring_virtqueue_init_split(struct vring_virtqueue *vq,
> >   					 struct virtio_device *vdev)
> >   {
> > +	vq->vq.reset = VIRTIO_VQ_RESET_STEP_NONE;
> > +
> >   	vq->packed_ring = false;
> >   	vq->we_own_ring = false;
> >   	vq->broken = false;
>
Michael S. Tsirkin March 10, 2022, 7 a.m. UTC | #4
On Tue, Mar 08, 2022 at 08:35:01PM +0800, Xuan Zhuo wrote:
> virtio ring supports reset.
> 
> Queue reset is divided into several stages.
> 
> 1. notify device queue reset
> 2. vring release
> 3. attach new vring
> 4. notify device queue re-enable
> 
> After the first step is completed, the vring reset operation can be
> performed. If the newly set vring num does not change, then just reset
> the vq related value.
> 
> Otherwise, the vring will be released and the vring will be reallocated.
> And the vring will be attached to the vq. If this process fails, the
> function will exit, and the state of the vq will be the vring release
> state. You can call this function again to reallocate the vring.
> 
> In addition, vring_align, may_reduce_num are necessary for reallocating
> vring, so they are retained when creating vq.
> 
> Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> ---
>  drivers/virtio/virtio_ring.c | 69 ++++++++++++++++++++++++++++++++++++
>  1 file changed, 69 insertions(+)
> 
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index e0422c04c903..148fb1fd3d5a 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -158,6 +158,12 @@ struct vring_virtqueue {
>  			/* DMA address and size information */
>  			dma_addr_t queue_dma_addr;
>  			size_t queue_size_in_bytes;
> +
> +			/* The parameters for creating vrings are reserved for
> +			 * creating new vrings when enabling reset queue.
> +			 */
> +			u32 vring_align;
> +			bool may_reduce_num;
>  		} split;
>  
>  		/* Available for packed ring */
> @@ -217,6 +223,12 @@ struct vring_virtqueue {
>  #endif
>  };
>  
> +static void vring_free(struct virtqueue *vq);
> +static void __vring_virtqueue_init_split(struct vring_virtqueue *vq,
> +					 struct virtio_device *vdev);
> +static int __vring_virtqueue_attach_split(struct vring_virtqueue *vq,
> +					  struct virtio_device *vdev,
> +					  struct vring vring);
>  
>  /*
>   * Helpers.
> @@ -1012,6 +1024,8 @@ static struct virtqueue *vring_create_virtqueue_split(
>  		return NULL;
>  	}
>  
> +	to_vvq(vq)->split.vring_align = vring_align;
> +	to_vvq(vq)->split.may_reduce_num = may_reduce_num;
>  	to_vvq(vq)->split.queue_dma_addr = vring.dma_addr;
>  	to_vvq(vq)->split.queue_size_in_bytes = vring.queue_size_in_bytes;
>  	to_vvq(vq)->we_own_ring = true;
> @@ -1019,6 +1033,59 @@ static struct virtqueue *vring_create_virtqueue_split(
>  	return vq;
>  }
>  
> +static int virtqueue_reset_vring_split(struct virtqueue *_vq, u32 num)
> +{
> +	struct vring_virtqueue *vq = to_vvq(_vq);
> +	struct virtio_device *vdev = _vq->vdev;
> +	struct vring_split vring;
> +	int err;
> +
> +	if (num > _vq->num_max)
> +		return -E2BIG;
> +
> +	switch (vq->vq.reset) {
> +	case VIRTIO_VQ_RESET_STEP_NONE:
> +		return -ENOENT;
> +
> +	case VIRTIO_VQ_RESET_STEP_VRING_ATTACH:
> +	case VIRTIO_VQ_RESET_STEP_DEVICE:
> +		if (vq->split.vring.num == num || !num)
> +			break;
> +
> +		vring_free(_vq);
> +
> +		fallthrough;
> +
> +	case VIRTIO_VQ_RESET_STEP_VRING_RELEASE:
> +		if (!num)
> +			num = vq->split.vring.num;
> +
> +		err = vring_create_vring_split(&vring, vdev,
> +					       vq->split.vring_align,
> +					       vq->weak_barriers,
> +					       vq->split.may_reduce_num, num);
> +		if (err)
> +			return -ENOMEM;
> +
> +		err = __vring_virtqueue_attach_split(vq, vdev, vring.vring);
> +		if (err) {
> +			vring_free_queue(vdev, vring.queue_size_in_bytes,
> +					 vring.queue,
> +					 vring.dma_addr);
> +			return -ENOMEM;
> +		}
> +
> +		vq->split.queue_dma_addr = vring.dma_addr;
> +		vq->split.queue_size_in_bytes = vring.queue_size_in_bytes;
> +	}
> +
> +	__vring_virtqueue_init_split(vq, vdev);
> +	vq->we_own_ring = true;
> +	vq->vq.reset = VIRTIO_VQ_RESET_STEP_VRING_ATTACH;
> +
> +	return 0;
> +}
> +

I kind of dislike this state machine.

Hacks like special-casing num = 0 to mean "reset" are especially
confusing.

And as Jason points out, when we want a resize then yes this currently
implies reset but that is an implementation detail.

There should be a way to just make these cases separate functions
and then use them to compose consistent external APIs.

If we additionally want to track state for debugging then bool flags
seem more appropriate for this, though from experience that is
not always worth the extra code.



>  /*
>   * Packed ring specific functions - *_packed().
> @@ -2317,6 +2384,8 @@ static int __vring_virtqueue_attach_split(struct vring_virtqueue *vq,
>  static void __vring_virtqueue_init_split(struct vring_virtqueue *vq,
>  					 struct virtio_device *vdev)
>  {
> +	vq->vq.reset = VIRTIO_VQ_RESET_STEP_NONE;
> +
>  	vq->packed_ring = false;
>  	vq->we_own_ring = false;
>  	vq->broken = false;
> -- 
> 2.31.0
Xuan Zhuo March 10, 2022, 7:17 a.m. UTC | #5
On Thu, 10 Mar 2022 02:00:39 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> On Tue, Mar 08, 2022 at 08:35:01PM +0800, Xuan Zhuo wrote:
> > virtio ring supports reset.
> >
> > Queue reset is divided into several stages.
> >
> > 1. notify device queue reset
> > 2. vring release
> > 3. attach new vring
> > 4. notify device queue re-enable
> >
> > After the first step is completed, the vring reset operation can be
> > performed. If the newly set vring num does not change, then just reset
> > the vq related value.
> >
> > Otherwise, the vring will be released and the vring will be reallocated.
> > And the vring will be attached to the vq. If this process fails, the
> > function will exit, and the state of the vq will be the vring release
> > state. You can call this function again to reallocate the vring.
> >
> > In addition, vring_align, may_reduce_num are necessary for reallocating
> > vring, so they are retained when creating vq.
> >
> > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > ---
> >  drivers/virtio/virtio_ring.c | 69 ++++++++++++++++++++++++++++++++++++
> >  1 file changed, 69 insertions(+)
> >
> > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > index e0422c04c903..148fb1fd3d5a 100644
> > --- a/drivers/virtio/virtio_ring.c
> > +++ b/drivers/virtio/virtio_ring.c
> > @@ -158,6 +158,12 @@ struct vring_virtqueue {
> >  			/* DMA address and size information */
> >  			dma_addr_t queue_dma_addr;
> >  			size_t queue_size_in_bytes;
> > +
> > +			/* The parameters for creating vrings are reserved for
> > +			 * creating new vrings when enabling reset queue.
> > +			 */
> > +			u32 vring_align;
> > +			bool may_reduce_num;
> >  		} split;
> >
> >  		/* Available for packed ring */
> > @@ -217,6 +223,12 @@ struct vring_virtqueue {
> >  #endif
> >  };
> >
> > +static void vring_free(struct virtqueue *vq);
> > +static void __vring_virtqueue_init_split(struct vring_virtqueue *vq,
> > +					 struct virtio_device *vdev);
> > +static int __vring_virtqueue_attach_split(struct vring_virtqueue *vq,
> > +					  struct virtio_device *vdev,
> > +					  struct vring vring);
> >
> >  /*
> >   * Helpers.
> > @@ -1012,6 +1024,8 @@ static struct virtqueue *vring_create_virtqueue_split(
> >  		return NULL;
> >  	}
> >
> > +	to_vvq(vq)->split.vring_align = vring_align;
> > +	to_vvq(vq)->split.may_reduce_num = may_reduce_num;
> >  	to_vvq(vq)->split.queue_dma_addr = vring.dma_addr;
> >  	to_vvq(vq)->split.queue_size_in_bytes = vring.queue_size_in_bytes;
> >  	to_vvq(vq)->we_own_ring = true;
> > @@ -1019,6 +1033,59 @@ static struct virtqueue *vring_create_virtqueue_split(
> >  	return vq;
> >  }
> >
> > +static int virtqueue_reset_vring_split(struct virtqueue *_vq, u32 num)
> > +{
> > +	struct vring_virtqueue *vq = to_vvq(_vq);
> > +	struct virtio_device *vdev = _vq->vdev;
> > +	struct vring_split vring;
> > +	int err;
> > +
> > +	if (num > _vq->num_max)
> > +		return -E2BIG;
> > +
> > +	switch (vq->vq.reset) {
> > +	case VIRTIO_VQ_RESET_STEP_NONE:
> > +		return -ENOENT;
> > +
> > +	case VIRTIO_VQ_RESET_STEP_VRING_ATTACH:
> > +	case VIRTIO_VQ_RESET_STEP_DEVICE:
> > +		if (vq->split.vring.num == num || !num)
> > +			break;
> > +
> > +		vring_free(_vq);
> > +
> > +		fallthrough;
> > +
> > +	case VIRTIO_VQ_RESET_STEP_VRING_RELEASE:
> > +		if (!num)
> > +			num = vq->split.vring.num;
> > +
> > +		err = vring_create_vring_split(&vring, vdev,
> > +					       vq->split.vring_align,
> > +					       vq->weak_barriers,
> > +					       vq->split.may_reduce_num, num);
> > +		if (err)
> > +			return -ENOMEM;
> > +
> > +		err = __vring_virtqueue_attach_split(vq, vdev, vring.vring);
> > +		if (err) {
> > +			vring_free_queue(vdev, vring.queue_size_in_bytes,
> > +					 vring.queue,
> > +					 vring.dma_addr);
> > +			return -ENOMEM;
> > +		}
> > +
> > +		vq->split.queue_dma_addr = vring.dma_addr;
> > +		vq->split.queue_size_in_bytes = vring.queue_size_in_bytes;
> > +	}
> > +
> > +	__vring_virtqueue_init_split(vq, vdev);
> > +	vq->we_own_ring = true;
> > +	vq->vq.reset = VIRTIO_VQ_RESET_STEP_VRING_ATTACH;
> > +
> > +	return 0;
> > +}
> > +
>
> I kind of dislike this state machine.
>
> Hacks like special-casing num = 0 to mean "reset" are especially
> confusing.

I'm removing it. I'll say in the function description that this function is
currently only called when vq has been reset. I'm no longer checking it based on
state.

>
> And as Jason points out, when we want a resize then yes this currently
> implies reset but that is an implementation detail.
>
> There should be a way to just make these cases separate functions
> and then use them to compose consistent external APIs.

Yes, virtqueue_resize_split() is fine for ethtool -G.

But in the case of AF_XDP, just execute reset to free the buffer. The name
virtqueue_reset_vring_split() I think can cover both cases. Or we use two apis
to handle both scenarios?

Or can anyone think of a better name. ^_^

Thanks.

>
> If we additionally want to track state for debugging then bool flags
> seem more appropriate for this, though from experience that is
> not always worth the extra code.
>
>
>
> >  /*
> >   * Packed ring specific functions - *_packed().
> > @@ -2317,6 +2384,8 @@ static int __vring_virtqueue_attach_split(struct vring_virtqueue *vq,
> >  static void __vring_virtqueue_init_split(struct vring_virtqueue *vq,
> >  					 struct virtio_device *vdev)
> >  {
> > +	vq->vq.reset = VIRTIO_VQ_RESET_STEP_NONE;
> > +
> >  	vq->packed_ring = false;
> >  	vq->we_own_ring = false;
> >  	vq->broken = false;
> > --
> > 2.31.0
>
Michael S. Tsirkin March 10, 2022, 8:07 a.m. UTC | #6
On Thu, Mar 10, 2022 at 03:17:03PM +0800, Xuan Zhuo wrote:
> On Thu, 10 Mar 2022 02:00:39 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > On Tue, Mar 08, 2022 at 08:35:01PM +0800, Xuan Zhuo wrote:
> > > virtio ring supports reset.
> > >
> > > Queue reset is divided into several stages.
> > >
> > > 1. notify device queue reset
> > > 2. vring release
> > > 3. attach new vring
> > > 4. notify device queue re-enable
> > >
> > > After the first step is completed, the vring reset operation can be
> > > performed. If the newly set vring num does not change, then just reset
> > > the vq related value.
> > >
> > > Otherwise, the vring will be released and the vring will be reallocated.
> > > And the vring will be attached to the vq. If this process fails, the
> > > function will exit, and the state of the vq will be the vring release
> > > state. You can call this function again to reallocate the vring.
> > >
> > > In addition, vring_align, may_reduce_num are necessary for reallocating
> > > vring, so they are retained when creating vq.
> > >
> > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > ---
> > >  drivers/virtio/virtio_ring.c | 69 ++++++++++++++++++++++++++++++++++++
> > >  1 file changed, 69 insertions(+)
> > >
> > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > index e0422c04c903..148fb1fd3d5a 100644
> > > --- a/drivers/virtio/virtio_ring.c
> > > +++ b/drivers/virtio/virtio_ring.c
> > > @@ -158,6 +158,12 @@ struct vring_virtqueue {
> > >  			/* DMA address and size information */
> > >  			dma_addr_t queue_dma_addr;
> > >  			size_t queue_size_in_bytes;
> > > +
> > > +			/* The parameters for creating vrings are reserved for
> > > +			 * creating new vrings when enabling reset queue.
> > > +			 */
> > > +			u32 vring_align;
> > > +			bool may_reduce_num;
> > >  		} split;
> > >
> > >  		/* Available for packed ring */
> > > @@ -217,6 +223,12 @@ struct vring_virtqueue {
> > >  #endif
> > >  };
> > >
> > > +static void vring_free(struct virtqueue *vq);
> > > +static void __vring_virtqueue_init_split(struct vring_virtqueue *vq,
> > > +					 struct virtio_device *vdev);
> > > +static int __vring_virtqueue_attach_split(struct vring_virtqueue *vq,
> > > +					  struct virtio_device *vdev,
> > > +					  struct vring vring);
> > >
> > >  /*
> > >   * Helpers.
> > > @@ -1012,6 +1024,8 @@ static struct virtqueue *vring_create_virtqueue_split(
> > >  		return NULL;
> > >  	}
> > >
> > > +	to_vvq(vq)->split.vring_align = vring_align;
> > > +	to_vvq(vq)->split.may_reduce_num = may_reduce_num;
> > >  	to_vvq(vq)->split.queue_dma_addr = vring.dma_addr;
> > >  	to_vvq(vq)->split.queue_size_in_bytes = vring.queue_size_in_bytes;
> > >  	to_vvq(vq)->we_own_ring = true;
> > > @@ -1019,6 +1033,59 @@ static struct virtqueue *vring_create_virtqueue_split(
> > >  	return vq;
> > >  }
> > >
> > > +static int virtqueue_reset_vring_split(struct virtqueue *_vq, u32 num)
> > > +{
> > > +	struct vring_virtqueue *vq = to_vvq(_vq);
> > > +	struct virtio_device *vdev = _vq->vdev;
> > > +	struct vring_split vring;
> > > +	int err;
> > > +
> > > +	if (num > _vq->num_max)
> > > +		return -E2BIG;
> > > +
> > > +	switch (vq->vq.reset) {
> > > +	case VIRTIO_VQ_RESET_STEP_NONE:
> > > +		return -ENOENT;
> > > +
> > > +	case VIRTIO_VQ_RESET_STEP_VRING_ATTACH:
> > > +	case VIRTIO_VQ_RESET_STEP_DEVICE:
> > > +		if (vq->split.vring.num == num || !num)
> > > +			break;
> > > +
> > > +		vring_free(_vq);
> > > +
> > > +		fallthrough;
> > > +
> > > +	case VIRTIO_VQ_RESET_STEP_VRING_RELEASE:
> > > +		if (!num)
> > > +			num = vq->split.vring.num;
> > > +
> > > +		err = vring_create_vring_split(&vring, vdev,
> > > +					       vq->split.vring_align,
> > > +					       vq->weak_barriers,
> > > +					       vq->split.may_reduce_num, num);
> > > +		if (err)
> > > +			return -ENOMEM;
> > > +
> > > +		err = __vring_virtqueue_attach_split(vq, vdev, vring.vring);
> > > +		if (err) {
> > > +			vring_free_queue(vdev, vring.queue_size_in_bytes,
> > > +					 vring.queue,
> > > +					 vring.dma_addr);
> > > +			return -ENOMEM;
> > > +		}
> > > +
> > > +		vq->split.queue_dma_addr = vring.dma_addr;
> > > +		vq->split.queue_size_in_bytes = vring.queue_size_in_bytes;
> > > +	}
> > > +
> > > +	__vring_virtqueue_init_split(vq, vdev);
> > > +	vq->we_own_ring = true;
> > > +	vq->vq.reset = VIRTIO_VQ_RESET_STEP_VRING_ATTACH;
> > > +
> > > +	return 0;
> > > +}
> > > +
> >
> > I kind of dislike this state machine.
> >
> > Hacks like special-casing num = 0 to mean "reset" are especially
> > confusing.
> 
> I'm removing it. I'll say in the function description that this function is
> currently only called when vq has been reset. I'm no longer checking it based on
> state.
> 
> >
> > And as Jason points out, when we want a resize then yes this currently
> > implies reset but that is an implementation detail.
> >
> > There should be a way to just make these cases separate functions
> > and then use them to compose consistent external APIs.
> 
> Yes, virtqueue_resize_split() is fine for ethtool -G.
> 
> But in the case of AF_XDP, just execute reset to free the buffer. The name
> virtqueue_reset_vring_split() I think can cover both cases. Or we use two apis
> to handle both scenarios?
> 
> Or can anyone think of a better name. ^_^
> 
> Thanks.


I'd say resize should be called resize and reset should be called reset.

The big issue is a sane API for resize. Ideally it would resubmit
buffers which did not get used. Question is what to do
about buffers which don't fit (if ring has been downsized)?
Maybe a callback that will handle them?
And then what? Queue them up and readd later? Drop?
If we drop we should drop from the head not the tail ...


> >
> > If we additionally want to track state for debugging then bool flags
> > seem more appropriate for this, though from experience that is
> > not always worth the extra code.
> >
> >
> >
> > >  /*
> > >   * Packed ring specific functions - *_packed().
> > > @@ -2317,6 +2384,8 @@ static int __vring_virtqueue_attach_split(struct vring_virtqueue *vq,
> > >  static void __vring_virtqueue_init_split(struct vring_virtqueue *vq,
> > >  					 struct virtio_device *vdev)
> > >  {
> > > +	vq->vq.reset = VIRTIO_VQ_RESET_STEP_NONE;
> > > +
> > >  	vq->packed_ring = false;
> > >  	vq->we_own_ring = false;
> > >  	vq->broken = false;
> > > --
> > > 2.31.0
> >
Xuan Zhuo March 10, 2022, 8:14 a.m. UTC | #7
On Thu, 10 Mar 2022 03:07:22 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> On Thu, Mar 10, 2022 at 03:17:03PM +0800, Xuan Zhuo wrote:
> > On Thu, 10 Mar 2022 02:00:39 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > On Tue, Mar 08, 2022 at 08:35:01PM +0800, Xuan Zhuo wrote:
> > > > virtio ring supports reset.
> > > >
> > > > Queue reset is divided into several stages.
> > > >
> > > > 1. notify device queue reset
> > > > 2. vring release
> > > > 3. attach new vring
> > > > 4. notify device queue re-enable
> > > >
> > > > After the first step is completed, the vring reset operation can be
> > > > performed. If the newly set vring num does not change, then just reset
> > > > the vq related value.
> > > >
> > > > Otherwise, the vring will be released and the vring will be reallocated.
> > > > And the vring will be attached to the vq. If this process fails, the
> > > > function will exit, and the state of the vq will be the vring release
> > > > state. You can call this function again to reallocate the vring.
> > > >
> > > > In addition, vring_align, may_reduce_num are necessary for reallocating
> > > > vring, so they are retained when creating vq.
> > > >
> > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > ---
> > > >  drivers/virtio/virtio_ring.c | 69 ++++++++++++++++++++++++++++++++++++
> > > >  1 file changed, 69 insertions(+)
> > > >
> > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > index e0422c04c903..148fb1fd3d5a 100644
> > > > --- a/drivers/virtio/virtio_ring.c
> > > > +++ b/drivers/virtio/virtio_ring.c
> > > > @@ -158,6 +158,12 @@ struct vring_virtqueue {
> > > >  			/* DMA address and size information */
> > > >  			dma_addr_t queue_dma_addr;
> > > >  			size_t queue_size_in_bytes;
> > > > +
> > > > +			/* The parameters for creating vrings are reserved for
> > > > +			 * creating new vrings when enabling reset queue.
> > > > +			 */
> > > > +			u32 vring_align;
> > > > +			bool may_reduce_num;
> > > >  		} split;
> > > >
> > > >  		/* Available for packed ring */
> > > > @@ -217,6 +223,12 @@ struct vring_virtqueue {
> > > >  #endif
> > > >  };
> > > >
> > > > +static void vring_free(struct virtqueue *vq);
> > > > +static void __vring_virtqueue_init_split(struct vring_virtqueue *vq,
> > > > +					 struct virtio_device *vdev);
> > > > +static int __vring_virtqueue_attach_split(struct vring_virtqueue *vq,
> > > > +					  struct virtio_device *vdev,
> > > > +					  struct vring vring);
> > > >
> > > >  /*
> > > >   * Helpers.
> > > > @@ -1012,6 +1024,8 @@ static struct virtqueue *vring_create_virtqueue_split(
> > > >  		return NULL;
> > > >  	}
> > > >
> > > > +	to_vvq(vq)->split.vring_align = vring_align;
> > > > +	to_vvq(vq)->split.may_reduce_num = may_reduce_num;
> > > >  	to_vvq(vq)->split.queue_dma_addr = vring.dma_addr;
> > > >  	to_vvq(vq)->split.queue_size_in_bytes = vring.queue_size_in_bytes;
> > > >  	to_vvq(vq)->we_own_ring = true;
> > > > @@ -1019,6 +1033,59 @@ static struct virtqueue *vring_create_virtqueue_split(
> > > >  	return vq;
> > > >  }
> > > >
> > > > +static int virtqueue_reset_vring_split(struct virtqueue *_vq, u32 num)
> > > > +{
> > > > +	struct vring_virtqueue *vq = to_vvq(_vq);
> > > > +	struct virtio_device *vdev = _vq->vdev;
> > > > +	struct vring_split vring;
> > > > +	int err;
> > > > +
> > > > +	if (num > _vq->num_max)
> > > > +		return -E2BIG;
> > > > +
> > > > +	switch (vq->vq.reset) {
> > > > +	case VIRTIO_VQ_RESET_STEP_NONE:
> > > > +		return -ENOENT;
> > > > +
> > > > +	case VIRTIO_VQ_RESET_STEP_VRING_ATTACH:
> > > > +	case VIRTIO_VQ_RESET_STEP_DEVICE:
> > > > +		if (vq->split.vring.num == num || !num)
> > > > +			break;
> > > > +
> > > > +		vring_free(_vq);
> > > > +
> > > > +		fallthrough;
> > > > +
> > > > +	case VIRTIO_VQ_RESET_STEP_VRING_RELEASE:
> > > > +		if (!num)
> > > > +			num = vq->split.vring.num;
> > > > +
> > > > +		err = vring_create_vring_split(&vring, vdev,
> > > > +					       vq->split.vring_align,
> > > > +					       vq->weak_barriers,
> > > > +					       vq->split.may_reduce_num, num);
> > > > +		if (err)
> > > > +			return -ENOMEM;
> > > > +
> > > > +		err = __vring_virtqueue_attach_split(vq, vdev, vring.vring);
> > > > +		if (err) {
> > > > +			vring_free_queue(vdev, vring.queue_size_in_bytes,
> > > > +					 vring.queue,
> > > > +					 vring.dma_addr);
> > > > +			return -ENOMEM;
> > > > +		}
> > > > +
> > > > +		vq->split.queue_dma_addr = vring.dma_addr;
> > > > +		vq->split.queue_size_in_bytes = vring.queue_size_in_bytes;
> > > > +	}
> > > > +
> > > > +	__vring_virtqueue_init_split(vq, vdev);
> > > > +	vq->we_own_ring = true;
> > > > +	vq->vq.reset = VIRTIO_VQ_RESET_STEP_VRING_ATTACH;
> > > > +
> > > > +	return 0;
> > > > +}
> > > > +
> > >
> > > I kind of dislike this state machine.
> > >
> > > Hacks like special-casing num = 0 to mean "reset" are especially
> > > confusing.
> >
> > I'm removing it. I'll say in the function description that this function is
> > currently only called when vq has been reset. I'm no longer checking it based on
> > state.
> >
> > >
> > > And as Jason points out, when we want a resize then yes this currently
> > > implies reset but that is an implementation detail.
> > >
> > > There should be a way to just make these cases separate functions
> > > and then use them to compose consistent external APIs.
> >
> > Yes, virtqueue_resize_split() is fine for ethtool -G.
> >
> > But in the case of AF_XDP, just execute reset to free the buffer. The name
> > virtqueue_reset_vring_split() I think can cover both cases. Or we use two apis
> > to handle both scenarios?
> >
> > Or can anyone think of a better name. ^_^
> >
> > Thanks.
>
>
> I'd say resize should be called resize and reset should be called reset.


OK, I'll change it to resize here.

But I want to know that when I implement virtio-net to support AF_XDP, its
requirement is to release all submitted buffers. Then should I add a new api
such as virtqueue_reset_vring()?

>
> The big issue is a sane API for resize. Ideally it would resubmit
> buffers which did not get used. Question is what to do
> about buffers which don't fit (if ring has been downsized)?
> Maybe a callback that will handle them?
> And then what? Queue them up and readd later? Drop?
> If we drop we should drop from the head not the tail ...

It's a good idea, let's implement it later.

Thanks.

>
>
> > >
> > > If we additionally want to track state for debugging then bool flags
> > > seem more appropriate for this, though from experience that is
> > > not always worth the extra code.
> > >
> > >
> > >
> > > >  /*
> > > >   * Packed ring specific functions - *_packed().
> > > > @@ -2317,6 +2384,8 @@ static int __vring_virtqueue_attach_split(struct vring_virtqueue *vq,
> > > >  static void __vring_virtqueue_init_split(struct vring_virtqueue *vq,
> > > >  					 struct virtio_device *vdev)
> > > >  {
> > > > +	vq->vq.reset = VIRTIO_VQ_RESET_STEP_NONE;
> > > > +
> > > >  	vq->packed_ring = false;
> > > >  	vq->we_own_ring = false;
> > > >  	vq->broken = false;
> > > > --
> > > > 2.31.0
> > >
>
Michael S. Tsirkin March 10, 2022, 12:17 p.m. UTC | #8
On Thu, Mar 10, 2022 at 04:14:16PM +0800, Xuan Zhuo wrote:
> On Thu, 10 Mar 2022 03:07:22 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > On Thu, Mar 10, 2022 at 03:17:03PM +0800, Xuan Zhuo wrote:
> > > On Thu, 10 Mar 2022 02:00:39 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > On Tue, Mar 08, 2022 at 08:35:01PM +0800, Xuan Zhuo wrote:
> > > > > virtio ring supports reset.
> > > > >
> > > > > Queue reset is divided into several stages.
> > > > >
> > > > > 1. notify device queue reset
> > > > > 2. vring release
> > > > > 3. attach new vring
> > > > > 4. notify device queue re-enable
> > > > >
> > > > > After the first step is completed, the vring reset operation can be
> > > > > performed. If the newly set vring num does not change, then just reset
> > > > > the vq related value.
> > > > >
> > > > > Otherwise, the vring will be released and the vring will be reallocated.
> > > > > And the vring will be attached to the vq. If this process fails, the
> > > > > function will exit, and the state of the vq will be the vring release
> > > > > state. You can call this function again to reallocate the vring.
> > > > >
> > > > > In addition, vring_align, may_reduce_num are necessary for reallocating
> > > > > vring, so they are retained when creating vq.
> > > > >
> > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > > ---
> > > > >  drivers/virtio/virtio_ring.c | 69 ++++++++++++++++++++++++++++++++++++
> > > > >  1 file changed, 69 insertions(+)
> > > > >
> > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > index e0422c04c903..148fb1fd3d5a 100644
> > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > @@ -158,6 +158,12 @@ struct vring_virtqueue {
> > > > >  			/* DMA address and size information */
> > > > >  			dma_addr_t queue_dma_addr;
> > > > >  			size_t queue_size_in_bytes;
> > > > > +
> > > > > +			/* The parameters for creating vrings are reserved for
> > > > > +			 * creating new vrings when enabling reset queue.
> > > > > +			 */
> > > > > +			u32 vring_align;
> > > > > +			bool may_reduce_num;
> > > > >  		} split;
> > > > >
> > > > >  		/* Available for packed ring */
> > > > > @@ -217,6 +223,12 @@ struct vring_virtqueue {
> > > > >  #endif
> > > > >  };
> > > > >
> > > > > +static void vring_free(struct virtqueue *vq);
> > > > > +static void __vring_virtqueue_init_split(struct vring_virtqueue *vq,
> > > > > +					 struct virtio_device *vdev);
> > > > > +static int __vring_virtqueue_attach_split(struct vring_virtqueue *vq,
> > > > > +					  struct virtio_device *vdev,
> > > > > +					  struct vring vring);
> > > > >
> > > > >  /*
> > > > >   * Helpers.
> > > > > @@ -1012,6 +1024,8 @@ static struct virtqueue *vring_create_virtqueue_split(
> > > > >  		return NULL;
> > > > >  	}
> > > > >
> > > > > +	to_vvq(vq)->split.vring_align = vring_align;
> > > > > +	to_vvq(vq)->split.may_reduce_num = may_reduce_num;
> > > > >  	to_vvq(vq)->split.queue_dma_addr = vring.dma_addr;
> > > > >  	to_vvq(vq)->split.queue_size_in_bytes = vring.queue_size_in_bytes;
> > > > >  	to_vvq(vq)->we_own_ring = true;
> > > > > @@ -1019,6 +1033,59 @@ static struct virtqueue *vring_create_virtqueue_split(
> > > > >  	return vq;
> > > > >  }
> > > > >
> > > > > +static int virtqueue_reset_vring_split(struct virtqueue *_vq, u32 num)
> > > > > +{
> > > > > +	struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > +	struct virtio_device *vdev = _vq->vdev;
> > > > > +	struct vring_split vring;
> > > > > +	int err;
> > > > > +
> > > > > +	if (num > _vq->num_max)
> > > > > +		return -E2BIG;
> > > > > +
> > > > > +	switch (vq->vq.reset) {
> > > > > +	case VIRTIO_VQ_RESET_STEP_NONE:
> > > > > +		return -ENOENT;
> > > > > +
> > > > > +	case VIRTIO_VQ_RESET_STEP_VRING_ATTACH:
> > > > > +	case VIRTIO_VQ_RESET_STEP_DEVICE:
> > > > > +		if (vq->split.vring.num == num || !num)
> > > > > +			break;
> > > > > +
> > > > > +		vring_free(_vq);
> > > > > +
> > > > > +		fallthrough;
> > > > > +
> > > > > +	case VIRTIO_VQ_RESET_STEP_VRING_RELEASE:
> > > > > +		if (!num)
> > > > > +			num = vq->split.vring.num;
> > > > > +
> > > > > +		err = vring_create_vring_split(&vring, vdev,
> > > > > +					       vq->split.vring_align,
> > > > > +					       vq->weak_barriers,
> > > > > +					       vq->split.may_reduce_num, num);
> > > > > +		if (err)
> > > > > +			return -ENOMEM;
> > > > > +
> > > > > +		err = __vring_virtqueue_attach_split(vq, vdev, vring.vring);
> > > > > +		if (err) {
> > > > > +			vring_free_queue(vdev, vring.queue_size_in_bytes,
> > > > > +					 vring.queue,
> > > > > +					 vring.dma_addr);
> > > > > +			return -ENOMEM;
> > > > > +		}
> > > > > +
> > > > > +		vq->split.queue_dma_addr = vring.dma_addr;
> > > > > +		vq->split.queue_size_in_bytes = vring.queue_size_in_bytes;
> > > > > +	}
> > > > > +
> > > > > +	__vring_virtqueue_init_split(vq, vdev);
> > > > > +	vq->we_own_ring = true;
> > > > > +	vq->vq.reset = VIRTIO_VQ_RESET_STEP_VRING_ATTACH;
> > > > > +
> > > > > +	return 0;
> > > > > +}
> > > > > +
> > > >
> > > > I kind of dislike this state machine.
> > > >
> > > > Hacks like special-casing num = 0 to mean "reset" are especially
> > > > confusing.
> > >
> > > I'm removing it. I'll say in the function description that this function is
> > > currently only called when vq has been reset. I'm no longer checking it based on
> > > state.
> > >
> > > >
> > > > And as Jason points out, when we want a resize then yes this currently
> > > > implies reset but that is an implementation detail.
> > > >
> > > > There should be a way to just make these cases separate functions
> > > > and then use them to compose consistent external APIs.
> > >
> > > Yes, virtqueue_resize_split() is fine for ethtool -G.
> > >
> > > But in the case of AF_XDP, just execute reset to free the buffer. The name
> > > virtqueue_reset_vring_split() I think can cover both cases. Or we use two apis
> > > to handle both scenarios?
> > >
> > > Or can anyone think of a better name. ^_^
> > >
> > > Thanks.
> >
> >
> > I'd say resize should be called resize and reset should be called reset.
> 
> 
> OK, I'll change it to resize here.
> 
> But I want to know that when I implement virtio-net to support AF_XDP, its
> requirement is to release all submitted buffers. Then should I add a new api
> such as virtqueue_reset_vring()?

Sounds like a reasonable name.

> >
> > The big issue is a sane API for resize. Ideally it would resubmit
> > buffers which did not get used. Question is what to do
> > about buffers which don't fit (if ring has been downsized)?
> > Maybe a callback that will handle them?
> > And then what? Queue them up and readd later? Drop?
> > If we drop we should drop from the head not the tail ...
> 
> It's a good idea, let's implement it later.
> 
> Thanks.

Well ... not sure how you are going to support resize
if you don't know what to do with buffers that were
in the ring.

> >
> >
> > > >
> > > > If we additionally want to track state for debugging then bool flags
> > > > seem more appropriate for this, though from experience that is
> > > > not always worth the extra code.
> > > >
> > > >
> > > >
> > > > >  /*
> > > > >   * Packed ring specific functions - *_packed().
> > > > > @@ -2317,6 +2384,8 @@ static int __vring_virtqueue_attach_split(struct vring_virtqueue *vq,
> > > > >  static void __vring_virtqueue_init_split(struct vring_virtqueue *vq,
> > > > >  					 struct virtio_device *vdev)
> > > > >  {
> > > > > +	vq->vq.reset = VIRTIO_VQ_RESET_STEP_NONE;
> > > > > +
> > > > >  	vq->packed_ring = false;
> > > > >  	vq->we_own_ring = false;
> > > > >  	vq->broken = false;
> > > > > --
> > > > > 2.31.0
> > > >
> >
Xuan Zhuo March 10, 2022, 12:33 p.m. UTC | #9
On Thu, 10 Mar 2022 07:17:09 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> On Thu, Mar 10, 2022 at 04:14:16PM +0800, Xuan Zhuo wrote:
> > On Thu, 10 Mar 2022 03:07:22 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > On Thu, Mar 10, 2022 at 03:17:03PM +0800, Xuan Zhuo wrote:
> > > > On Thu, 10 Mar 2022 02:00:39 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > > On Tue, Mar 08, 2022 at 08:35:01PM +0800, Xuan Zhuo wrote:
> > > > > > virtio ring supports reset.
> > > > > >
> > > > > > Queue reset is divided into several stages.
> > > > > >
> > > > > > 1. notify device queue reset
> > > > > > 2. vring release
> > > > > > 3. attach new vring
> > > > > > 4. notify device queue re-enable
> > > > > >
> > > > > > After the first step is completed, the vring reset operation can be
> > > > > > performed. If the newly set vring num does not change, then just reset
> > > > > > the vq related value.
> > > > > >
> > > > > > Otherwise, the vring will be released and the vring will be reallocated.
> > > > > > And the vring will be attached to the vq. If this process fails, the
> > > > > > function will exit, and the state of the vq will be the vring release
> > > > > > state. You can call this function again to reallocate the vring.
> > > > > >
> > > > > > In addition, vring_align, may_reduce_num are necessary for reallocating
> > > > > > vring, so they are retained when creating vq.
> > > > > >
> > > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > > > ---
> > > > > >  drivers/virtio/virtio_ring.c | 69 ++++++++++++++++++++++++++++++++++++
> > > > > >  1 file changed, 69 insertions(+)
> > > > > >
> > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > index e0422c04c903..148fb1fd3d5a 100644
> > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > @@ -158,6 +158,12 @@ struct vring_virtqueue {
> > > > > >  			/* DMA address and size information */
> > > > > >  			dma_addr_t queue_dma_addr;
> > > > > >  			size_t queue_size_in_bytes;
> > > > > > +
> > > > > > +			/* The parameters for creating vrings are reserved for
> > > > > > +			 * creating new vrings when enabling reset queue.
> > > > > > +			 */
> > > > > > +			u32 vring_align;
> > > > > > +			bool may_reduce_num;
> > > > > >  		} split;
> > > > > >
> > > > > >  		/* Available for packed ring */
> > > > > > @@ -217,6 +223,12 @@ struct vring_virtqueue {
> > > > > >  #endif
> > > > > >  };
> > > > > >
> > > > > > +static void vring_free(struct virtqueue *vq);
> > > > > > +static void __vring_virtqueue_init_split(struct vring_virtqueue *vq,
> > > > > > +					 struct virtio_device *vdev);
> > > > > > +static int __vring_virtqueue_attach_split(struct vring_virtqueue *vq,
> > > > > > +					  struct virtio_device *vdev,
> > > > > > +					  struct vring vring);
> > > > > >
> > > > > >  /*
> > > > > >   * Helpers.
> > > > > > @@ -1012,6 +1024,8 @@ static struct virtqueue *vring_create_virtqueue_split(
> > > > > >  		return NULL;
> > > > > >  	}
> > > > > >
> > > > > > +	to_vvq(vq)->split.vring_align = vring_align;
> > > > > > +	to_vvq(vq)->split.may_reduce_num = may_reduce_num;
> > > > > >  	to_vvq(vq)->split.queue_dma_addr = vring.dma_addr;
> > > > > >  	to_vvq(vq)->split.queue_size_in_bytes = vring.queue_size_in_bytes;
> > > > > >  	to_vvq(vq)->we_own_ring = true;
> > > > > > @@ -1019,6 +1033,59 @@ static struct virtqueue *vring_create_virtqueue_split(
> > > > > >  	return vq;
> > > > > >  }
> > > > > >
> > > > > > +static int virtqueue_reset_vring_split(struct virtqueue *_vq, u32 num)
> > > > > > +{
> > > > > > +	struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > > +	struct virtio_device *vdev = _vq->vdev;
> > > > > > +	struct vring_split vring;
> > > > > > +	int err;
> > > > > > +
> > > > > > +	if (num > _vq->num_max)
> > > > > > +		return -E2BIG;
> > > > > > +
> > > > > > +	switch (vq->vq.reset) {
> > > > > > +	case VIRTIO_VQ_RESET_STEP_NONE:
> > > > > > +		return -ENOENT;
> > > > > > +
> > > > > > +	case VIRTIO_VQ_RESET_STEP_VRING_ATTACH:
> > > > > > +	case VIRTIO_VQ_RESET_STEP_DEVICE:
> > > > > > +		if (vq->split.vring.num == num || !num)
> > > > > > +			break;
> > > > > > +
> > > > > > +		vring_free(_vq);
> > > > > > +
> > > > > > +		fallthrough;
> > > > > > +
> > > > > > +	case VIRTIO_VQ_RESET_STEP_VRING_RELEASE:
> > > > > > +		if (!num)
> > > > > > +			num = vq->split.vring.num;
> > > > > > +
> > > > > > +		err = vring_create_vring_split(&vring, vdev,
> > > > > > +					       vq->split.vring_align,
> > > > > > +					       vq->weak_barriers,
> > > > > > +					       vq->split.may_reduce_num, num);
> > > > > > +		if (err)
> > > > > > +			return -ENOMEM;
> > > > > > +
> > > > > > +		err = __vring_virtqueue_attach_split(vq, vdev, vring.vring);
> > > > > > +		if (err) {
> > > > > > +			vring_free_queue(vdev, vring.queue_size_in_bytes,
> > > > > > +					 vring.queue,
> > > > > > +					 vring.dma_addr);
> > > > > > +			return -ENOMEM;
> > > > > > +		}
> > > > > > +
> > > > > > +		vq->split.queue_dma_addr = vring.dma_addr;
> > > > > > +		vq->split.queue_size_in_bytes = vring.queue_size_in_bytes;
> > > > > > +	}
> > > > > > +
> > > > > > +	__vring_virtqueue_init_split(vq, vdev);
> > > > > > +	vq->we_own_ring = true;
> > > > > > +	vq->vq.reset = VIRTIO_VQ_RESET_STEP_VRING_ATTACH;
> > > > > > +
> > > > > > +	return 0;
> > > > > > +}
> > > > > > +
> > > > >
> > > > > I kind of dislike this state machine.
> > > > >
> > > > > Hacks like special-casing num = 0 to mean "reset" are especially
> > > > > confusing.
> > > >
> > > > I'm removing it. I'll say in the function description that this function is
> > > > currently only called when vq has been reset. I'm no longer checking it based on
> > > > state.
> > > >
> > > > >
> > > > > And as Jason points out, when we want a resize then yes this currently
> > > > > implies reset but that is an implementation detail.
> > > > >
> > > > > There should be a way to just make these cases separate functions
> > > > > and then use them to compose consistent external APIs.
> > > >
> > > > Yes, virtqueue_resize_split() is fine for ethtool -G.
> > > >
> > > > But in the case of AF_XDP, just execute reset to free the buffer. The name
> > > > virtqueue_reset_vring_split() I think can cover both cases. Or we use two apis
> > > > to handle both scenarios?
> > > >
> > > > Or can anyone think of a better name. ^_^
> > > >
> > > > Thanks.
> > >
> > >
> > > I'd say resize should be called resize and reset should be called reset.
> >
> >
> > OK, I'll change it to resize here.
> >
> > But I want to know that when I implement virtio-net to support AF_XDP, its
> > requirement is to release all submitted buffers. Then should I add a new api
> > such as virtqueue_reset_vring()?
>
> Sounds like a reasonable name.
>
> > >
> > > The big issue is a sane API for resize. Ideally it would resubmit
> > > buffers which did not get used. Question is what to do
> > > about buffers which don't fit (if ring has been downsized)?
> > > Maybe a callback that will handle them?
> > > And then what? Queue them up and readd later? Drop?
> > > If we drop we should drop from the head not the tail ...
> >
> > It's a good idea, let's implement it later.
> >
> > Thanks.
>
> Well ... not sure how you are going to support resize
> if you don't know what to do with buffers that were
> in the ring.

The current solution is to call virtqueue_detach_unused_buf() to release buffers
before resize ring.

Thanks.
Michael S. Tsirkin March 10, 2022, 1:04 p.m. UTC | #10
On Thu, Mar 10, 2022 at 08:33:30PM +0800, Xuan Zhuo wrote:
> On Thu, 10 Mar 2022 07:17:09 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > On Thu, Mar 10, 2022 at 04:14:16PM +0800, Xuan Zhuo wrote:
> > > On Thu, 10 Mar 2022 03:07:22 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > On Thu, Mar 10, 2022 at 03:17:03PM +0800, Xuan Zhuo wrote:
> > > > > On Thu, 10 Mar 2022 02:00:39 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > > > On Tue, Mar 08, 2022 at 08:35:01PM +0800, Xuan Zhuo wrote:
> > > > > > > virtio ring supports reset.
> > > > > > >
> > > > > > > Queue reset is divided into several stages.
> > > > > > >
> > > > > > > 1. notify device queue reset
> > > > > > > 2. vring release
> > > > > > > 3. attach new vring
> > > > > > > 4. notify device queue re-enable
> > > > > > >
> > > > > > > After the first step is completed, the vring reset operation can be
> > > > > > > performed. If the newly set vring num does not change, then just reset
> > > > > > > the vq related value.
> > > > > > >
> > > > > > > Otherwise, the vring will be released and the vring will be reallocated.
> > > > > > > And the vring will be attached to the vq. If this process fails, the
> > > > > > > function will exit, and the state of the vq will be the vring release
> > > > > > > state. You can call this function again to reallocate the vring.
> > > > > > >
> > > > > > > In addition, vring_align, may_reduce_num are necessary for reallocating
> > > > > > > vring, so they are retained when creating vq.
> > > > > > >
> > > > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > > > > ---
> > > > > > >  drivers/virtio/virtio_ring.c | 69 ++++++++++++++++++++++++++++++++++++
> > > > > > >  1 file changed, 69 insertions(+)
> > > > > > >
> > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > > index e0422c04c903..148fb1fd3d5a 100644
> > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > @@ -158,6 +158,12 @@ struct vring_virtqueue {
> > > > > > >  			/* DMA address and size information */
> > > > > > >  			dma_addr_t queue_dma_addr;
> > > > > > >  			size_t queue_size_in_bytes;
> > > > > > > +
> > > > > > > +			/* The parameters for creating vrings are reserved for
> > > > > > > +			 * creating new vrings when enabling reset queue.
> > > > > > > +			 */
> > > > > > > +			u32 vring_align;
> > > > > > > +			bool may_reduce_num;
> > > > > > >  		} split;
> > > > > > >
> > > > > > >  		/* Available for packed ring */
> > > > > > > @@ -217,6 +223,12 @@ struct vring_virtqueue {
> > > > > > >  #endif
> > > > > > >  };
> > > > > > >
> > > > > > > +static void vring_free(struct virtqueue *vq);
> > > > > > > +static void __vring_virtqueue_init_split(struct vring_virtqueue *vq,
> > > > > > > +					 struct virtio_device *vdev);
> > > > > > > +static int __vring_virtqueue_attach_split(struct vring_virtqueue *vq,
> > > > > > > +					  struct virtio_device *vdev,
> > > > > > > +					  struct vring vring);
> > > > > > >
> > > > > > >  /*
> > > > > > >   * Helpers.
> > > > > > > @@ -1012,6 +1024,8 @@ static struct virtqueue *vring_create_virtqueue_split(
> > > > > > >  		return NULL;
> > > > > > >  	}
> > > > > > >
> > > > > > > +	to_vvq(vq)->split.vring_align = vring_align;
> > > > > > > +	to_vvq(vq)->split.may_reduce_num = may_reduce_num;
> > > > > > >  	to_vvq(vq)->split.queue_dma_addr = vring.dma_addr;
> > > > > > >  	to_vvq(vq)->split.queue_size_in_bytes = vring.queue_size_in_bytes;
> > > > > > >  	to_vvq(vq)->we_own_ring = true;
> > > > > > > @@ -1019,6 +1033,59 @@ static struct virtqueue *vring_create_virtqueue_split(
> > > > > > >  	return vq;
> > > > > > >  }
> > > > > > >
> > > > > > > +static int virtqueue_reset_vring_split(struct virtqueue *_vq, u32 num)
> > > > > > > +{
> > > > > > > +	struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > > > +	struct virtio_device *vdev = _vq->vdev;
> > > > > > > +	struct vring_split vring;
> > > > > > > +	int err;
> > > > > > > +
> > > > > > > +	if (num > _vq->num_max)
> > > > > > > +		return -E2BIG;
> > > > > > > +
> > > > > > > +	switch (vq->vq.reset) {
> > > > > > > +	case VIRTIO_VQ_RESET_STEP_NONE:
> > > > > > > +		return -ENOENT;
> > > > > > > +
> > > > > > > +	case VIRTIO_VQ_RESET_STEP_VRING_ATTACH:
> > > > > > > +	case VIRTIO_VQ_RESET_STEP_DEVICE:
> > > > > > > +		if (vq->split.vring.num == num || !num)
> > > > > > > +			break;
> > > > > > > +
> > > > > > > +		vring_free(_vq);
> > > > > > > +
> > > > > > > +		fallthrough;
> > > > > > > +
> > > > > > > +	case VIRTIO_VQ_RESET_STEP_VRING_RELEASE:
> > > > > > > +		if (!num)
> > > > > > > +			num = vq->split.vring.num;
> > > > > > > +
> > > > > > > +		err = vring_create_vring_split(&vring, vdev,
> > > > > > > +					       vq->split.vring_align,
> > > > > > > +					       vq->weak_barriers,
> > > > > > > +					       vq->split.may_reduce_num, num);
> > > > > > > +		if (err)
> > > > > > > +			return -ENOMEM;
> > > > > > > +
> > > > > > > +		err = __vring_virtqueue_attach_split(vq, vdev, vring.vring);
> > > > > > > +		if (err) {
> > > > > > > +			vring_free_queue(vdev, vring.queue_size_in_bytes,
> > > > > > > +					 vring.queue,
> > > > > > > +					 vring.dma_addr);
> > > > > > > +			return -ENOMEM;
> > > > > > > +		}
> > > > > > > +
> > > > > > > +		vq->split.queue_dma_addr = vring.dma_addr;
> > > > > > > +		vq->split.queue_size_in_bytes = vring.queue_size_in_bytes;
> > > > > > > +	}
> > > > > > > +
> > > > > > > +	__vring_virtqueue_init_split(vq, vdev);
> > > > > > > +	vq->we_own_ring = true;
> > > > > > > +	vq->vq.reset = VIRTIO_VQ_RESET_STEP_VRING_ATTACH;
> > > > > > > +
> > > > > > > +	return 0;
> > > > > > > +}
> > > > > > > +
> > > > > >
> > > > > > I kind of dislike this state machine.
> > > > > >
> > > > > > Hacks like special-casing num = 0 to mean "reset" are especially
> > > > > > confusing.
> > > > >
> > > > > I'm removing it. I'll say in the function description that this function is
> > > > > currently only called when vq has been reset. I'm no longer checking it based on
> > > > > state.
> > > > >
> > > > > >
> > > > > > And as Jason points out, when we want a resize then yes this currently
> > > > > > implies reset but that is an implementation detail.
> > > > > >
> > > > > > There should be a way to just make these cases separate functions
> > > > > > and then use them to compose consistent external APIs.
> > > > >
> > > > > Yes, virtqueue_resize_split() is fine for ethtool -G.
> > > > >
> > > > > But in the case of AF_XDP, just execute reset to free the buffer. The name
> > > > > virtqueue_reset_vring_split() I think can cover both cases. Or we use two apis
> > > > > to handle both scenarios?
> > > > >
> > > > > Or can anyone think of a better name. ^_^
> > > > >
> > > > > Thanks.
> > > >
> > > >
> > > > I'd say resize should be called resize and reset should be called reset.
> > >
> > >
> > > OK, I'll change it to resize here.
> > >
> > > But I want to know that when I implement virtio-net to support AF_XDP, its
> > > requirement is to release all submitted buffers. Then should I add a new api
> > > such as virtqueue_reset_vring()?
> >
> > Sounds like a reasonable name.
> >
> > > >
> > > > The big issue is a sane API for resize. Ideally it would resubmit
> > > > buffers which did not get used. Question is what to do
> > > > about buffers which don't fit (if ring has been downsized)?
> > > > Maybe a callback that will handle them?
> > > > And then what? Queue them up and readd later? Drop?
> > > > If we drop we should drop from the head not the tail ...
> > >
> > > It's a good idea, let's implement it later.
> > >
> > > Thanks.
> >
> > Well ... not sure how you are going to support resize
> > if you don't know what to do with buffers that were
> > in the ring.
> 
> The current solution is to call virtqueue_detach_unused_buf() to release buffers
> before resize ring.
> 
> Thanks.

This requires basically a richer api:
- stop
- detach
- resize
- start

with a callback you would just have a resize, and the fact
it resets internally becomes an implementation detail.
Xuan Zhuo March 10, 2022, 2:09 p.m. UTC | #11
On Thu, 10 Mar 2022 08:04:27 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> On Thu, Mar 10, 2022 at 08:33:30PM +0800, Xuan Zhuo wrote:
> > On Thu, 10 Mar 2022 07:17:09 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > On Thu, Mar 10, 2022 at 04:14:16PM +0800, Xuan Zhuo wrote:
> > > > On Thu, 10 Mar 2022 03:07:22 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > > On Thu, Mar 10, 2022 at 03:17:03PM +0800, Xuan Zhuo wrote:
> > > > > > On Thu, 10 Mar 2022 02:00:39 -0500, "Michael S. Tsirkin" <mst@redhat.com> wrote:
> > > > > > > On Tue, Mar 08, 2022 at 08:35:01PM +0800, Xuan Zhuo wrote:
> > > > > > > > virtio ring supports reset.
> > > > > > > >
> > > > > > > > Queue reset is divided into several stages.
> > > > > > > >
> > > > > > > > 1. notify device queue reset
> > > > > > > > 2. vring release
> > > > > > > > 3. attach new vring
> > > > > > > > 4. notify device queue re-enable
> > > > > > > >
> > > > > > > > After the first step is completed, the vring reset operation can be
> > > > > > > > performed. If the newly set vring num does not change, then just reset
> > > > > > > > the vq related value.
> > > > > > > >
> > > > > > > > Otherwise, the vring will be released and the vring will be reallocated.
> > > > > > > > And the vring will be attached to the vq. If this process fails, the
> > > > > > > > function will exit, and the state of the vq will be the vring release
> > > > > > > > state. You can call this function again to reallocate the vring.
> > > > > > > >
> > > > > > > > In addition, vring_align, may_reduce_num are necessary for reallocating
> > > > > > > > vring, so they are retained when creating vq.
> > > > > > > >
> > > > > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > > > > > ---
> > > > > > > >  drivers/virtio/virtio_ring.c | 69 ++++++++++++++++++++++++++++++++++++
> > > > > > > >  1 file changed, 69 insertions(+)
> > > > > > > >
> > > > > > > > diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> > > > > > > > index e0422c04c903..148fb1fd3d5a 100644
> > > > > > > > --- a/drivers/virtio/virtio_ring.c
> > > > > > > > +++ b/drivers/virtio/virtio_ring.c
> > > > > > > > @@ -158,6 +158,12 @@ struct vring_virtqueue {
> > > > > > > >  			/* DMA address and size information */
> > > > > > > >  			dma_addr_t queue_dma_addr;
> > > > > > > >  			size_t queue_size_in_bytes;
> > > > > > > > +
> > > > > > > > +			/* The parameters for creating vrings are reserved for
> > > > > > > > +			 * creating new vrings when enabling reset queue.
> > > > > > > > +			 */
> > > > > > > > +			u32 vring_align;
> > > > > > > > +			bool may_reduce_num;
> > > > > > > >  		} split;
> > > > > > > >
> > > > > > > >  		/* Available for packed ring */
> > > > > > > > @@ -217,6 +223,12 @@ struct vring_virtqueue {
> > > > > > > >  #endif
> > > > > > > >  };
> > > > > > > >
> > > > > > > > +static void vring_free(struct virtqueue *vq);
> > > > > > > > +static void __vring_virtqueue_init_split(struct vring_virtqueue *vq,
> > > > > > > > +					 struct virtio_device *vdev);
> > > > > > > > +static int __vring_virtqueue_attach_split(struct vring_virtqueue *vq,
> > > > > > > > +					  struct virtio_device *vdev,
> > > > > > > > +					  struct vring vring);
> > > > > > > >
> > > > > > > >  /*
> > > > > > > >   * Helpers.
> > > > > > > > @@ -1012,6 +1024,8 @@ static struct virtqueue *vring_create_virtqueue_split(
> > > > > > > >  		return NULL;
> > > > > > > >  	}
> > > > > > > >
> > > > > > > > +	to_vvq(vq)->split.vring_align = vring_align;
> > > > > > > > +	to_vvq(vq)->split.may_reduce_num = may_reduce_num;
> > > > > > > >  	to_vvq(vq)->split.queue_dma_addr = vring.dma_addr;
> > > > > > > >  	to_vvq(vq)->split.queue_size_in_bytes = vring.queue_size_in_bytes;
> > > > > > > >  	to_vvq(vq)->we_own_ring = true;
> > > > > > > > @@ -1019,6 +1033,59 @@ static struct virtqueue *vring_create_virtqueue_split(
> > > > > > > >  	return vq;
> > > > > > > >  }
> > > > > > > >
> > > > > > > > +static int virtqueue_reset_vring_split(struct virtqueue *_vq, u32 num)
> > > > > > > > +{
> > > > > > > > +	struct vring_virtqueue *vq = to_vvq(_vq);
> > > > > > > > +	struct virtio_device *vdev = _vq->vdev;
> > > > > > > > +	struct vring_split vring;
> > > > > > > > +	int err;
> > > > > > > > +
> > > > > > > > +	if (num > _vq->num_max)
> > > > > > > > +		return -E2BIG;
> > > > > > > > +
> > > > > > > > +	switch (vq->vq.reset) {
> > > > > > > > +	case VIRTIO_VQ_RESET_STEP_NONE:
> > > > > > > > +		return -ENOENT;
> > > > > > > > +
> > > > > > > > +	case VIRTIO_VQ_RESET_STEP_VRING_ATTACH:
> > > > > > > > +	case VIRTIO_VQ_RESET_STEP_DEVICE:
> > > > > > > > +		if (vq->split.vring.num == num || !num)
> > > > > > > > +			break;
> > > > > > > > +
> > > > > > > > +		vring_free(_vq);
> > > > > > > > +
> > > > > > > > +		fallthrough;
> > > > > > > > +
> > > > > > > > +	case VIRTIO_VQ_RESET_STEP_VRING_RELEASE:
> > > > > > > > +		if (!num)
> > > > > > > > +			num = vq->split.vring.num;
> > > > > > > > +
> > > > > > > > +		err = vring_create_vring_split(&vring, vdev,
> > > > > > > > +					       vq->split.vring_align,
> > > > > > > > +					       vq->weak_barriers,
> > > > > > > > +					       vq->split.may_reduce_num, num);
> > > > > > > > +		if (err)
> > > > > > > > +			return -ENOMEM;
> > > > > > > > +
> > > > > > > > +		err = __vring_virtqueue_attach_split(vq, vdev, vring.vring);
> > > > > > > > +		if (err) {
> > > > > > > > +			vring_free_queue(vdev, vring.queue_size_in_bytes,
> > > > > > > > +					 vring.queue,
> > > > > > > > +					 vring.dma_addr);
> > > > > > > > +			return -ENOMEM;
> > > > > > > > +		}
> > > > > > > > +
> > > > > > > > +		vq->split.queue_dma_addr = vring.dma_addr;
> > > > > > > > +		vq->split.queue_size_in_bytes = vring.queue_size_in_bytes;
> > > > > > > > +	}
> > > > > > > > +
> > > > > > > > +	__vring_virtqueue_init_split(vq, vdev);
> > > > > > > > +	vq->we_own_ring = true;
> > > > > > > > +	vq->vq.reset = VIRTIO_VQ_RESET_STEP_VRING_ATTACH;
> > > > > > > > +
> > > > > > > > +	return 0;
> > > > > > > > +}
> > > > > > > > +
> > > > > > >
> > > > > > > I kind of dislike this state machine.
> > > > > > >
> > > > > > > Hacks like special-casing num = 0 to mean "reset" are especially
> > > > > > > confusing.
> > > > > >
> > > > > > I'm removing it. I'll say in the function description that this function is
> > > > > > currently only called when vq has been reset. I'm no longer checking it based on
> > > > > > state.
> > > > > >
> > > > > > >
> > > > > > > And as Jason points out, when we want a resize then yes this currently
> > > > > > > implies reset but that is an implementation detail.
> > > > > > >
> > > > > > > There should be a way to just make these cases separate functions
> > > > > > > and then use them to compose consistent external APIs.
> > > > > >
> > > > > > Yes, virtqueue_resize_split() is fine for ethtool -G.
> > > > > >
> > > > > > But in the case of AF_XDP, just execute reset to free the buffer. The name
> > > > > > virtqueue_reset_vring_split() I think can cover both cases. Or we use two apis
> > > > > > to handle both scenarios?
> > > > > >
> > > > > > Or can anyone think of a better name. ^_^
> > > > > >
> > > > > > Thanks.
> > > > >
> > > > >
> > > > > I'd say resize should be called resize and reset should be called reset.
> > > >
> > > >
> > > > OK, I'll change it to resize here.
> > > >
> > > > But I want to know that when I implement virtio-net to support AF_XDP, its
> > > > requirement is to release all submitted buffers. Then should I add a new api
> > > > such as virtqueue_reset_vring()?
> > >
> > > Sounds like a reasonable name.
> > >
> > > > >
> > > > > The big issue is a sane API for resize. Ideally it would resubmit
> > > > > buffers which did not get used. Question is what to do
> > > > > about buffers which don't fit (if ring has been downsized)?
> > > > > Maybe a callback that will handle them?
> > > > > And then what? Queue them up and readd later? Drop?
> > > > > If we drop we should drop from the head not the tail ...
> > > >
> > > > It's a good idea, let's implement it later.
> > > >
> > > > Thanks.
> > >
> > > Well ... not sure how you are going to support resize
> > > if you don't know what to do with buffers that were
> > > in the ring.
> >
> > The current solution is to call virtqueue_detach_unused_buf() to release buffers
> > before resize ring.
> >
> > Thanks.
>
> This requires basically a richer api:
> - stop
> - detach
> - resize
> - start

Yes, that's how it is currently implemented.

>
> with a callback you would just have a resize, and the fact
> it resets internally becomes an implementation detail.


I think, I understand what you mean, we encapsulate the following code into a
function as an external interface.

int virtqueue_resize(vq, callback)
{
	err = virtqueue_reset(sq->vq);
	if (err) {
		netif_start_subqueue(vi->dev, qindex);
		goto err;
	}

	/* detach */
	while ((buf = virtqueue_detach_unused_buf(sq->vq)) != NULL) {
		callback(vq, buf);
	}

	err = virtqueue_resize(sq->vq, ring_num);
	if (err)
		goto err;

	err = virtqueue_enable_resetq(sq->vq);
	if (err)
		goto err;
}

Thanks.

>
> --
> MST
>
Jason Wang March 11, 2022, 5:01 a.m. UTC | #12
在 2022/3/10 下午12:46, Xuan Zhuo 写道:
> On Wed, 9 Mar 2022 15:55:44 +0800, Jason Wang <jasowang@redhat.com> wrote:
>> 在 2022/3/8 下午8:35, Xuan Zhuo 写道:
>>> virtio ring supports reset.
>>>
>>> Queue reset is divided into several stages.
>>>
>>> 1. notify device queue reset
>>> 2. vring release
>>> 3. attach new vring
>>> 4. notify device queue re-enable
>>>
>>> After the first step is completed, the vring reset operation can be
>>> performed. If the newly set vring num does not change, then just reset
>>> the vq related value.
>>>
>>> Otherwise, the vring will be released and the vring will be reallocated.
>>> And the vring will be attached to the vq. If this process fails, the
>>> function will exit, and the state of the vq will be the vring release
>>> state. You can call this function again to reallocate the vring.
>>>
>>> In addition, vring_align, may_reduce_num are necessary for reallocating
>>> vring, so they are retained when creating vq.
>>>
>>> Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
>>> ---
>>>    drivers/virtio/virtio_ring.c | 69 ++++++++++++++++++++++++++++++++++++
>>>    1 file changed, 69 insertions(+)
>>>
>>> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
>>> index e0422c04c903..148fb1fd3d5a 100644
>>> --- a/drivers/virtio/virtio_ring.c
>>> +++ b/drivers/virtio/virtio_ring.c
>>> @@ -158,6 +158,12 @@ struct vring_virtqueue {
>>>    			/* DMA address and size information */
>>>    			dma_addr_t queue_dma_addr;
>>>    			size_t queue_size_in_bytes;
>>> +
>>> +			/* The parameters for creating vrings are reserved for
>>> +			 * creating new vrings when enabling reset queue.
>>> +			 */
>>> +			u32 vring_align;
>>> +			bool may_reduce_num;
>>>    		} split;
>>>
>>>    		/* Available for packed ring */
>>> @@ -217,6 +223,12 @@ struct vring_virtqueue {
>>>    #endif
>>>    };
>>>
>>> +static void vring_free(struct virtqueue *vq);
>>> +static void __vring_virtqueue_init_split(struct vring_virtqueue *vq,
>>> +					 struct virtio_device *vdev);
>>> +static int __vring_virtqueue_attach_split(struct vring_virtqueue *vq,
>>> +					  struct virtio_device *vdev,
>>> +					  struct vring vring);
>>>
>>>    /*
>>>     * Helpers.
>>> @@ -1012,6 +1024,8 @@ static struct virtqueue *vring_create_virtqueue_split(
>>>    		return NULL;
>>>    	}
>>>
>>> +	to_vvq(vq)->split.vring_align = vring_align;
>>> +	to_vvq(vq)->split.may_reduce_num = may_reduce_num;
>>>    	to_vvq(vq)->split.queue_dma_addr = vring.dma_addr;
>>>    	to_vvq(vq)->split.queue_size_in_bytes = vring.queue_size_in_bytes;
>>>    	to_vvq(vq)->we_own_ring = true;
>>> @@ -1019,6 +1033,59 @@ static struct virtqueue *vring_create_virtqueue_split(
>>>    	return vq;
>>>    }
>>>
>>> +static int virtqueue_reset_vring_split(struct virtqueue *_vq, u32 num)
>>> +{
>>
>> So what this function does is to resize the virtqueue actually, I
>> suggest to rename it as virtqueue_resize_split().
> In addition to resize, when num is 0, the function is to reinitialize vq ring
> related variables. For example avail_idx_shadow.


We need to move those logic to virtio_reset_vq() (I think we agree to 
have a better name of it).


> So I think 'reset' is more appropriate.


The name is confusing at least to me, since we've already had 
virtio_reset_vq() and most of the logic is to do the resize.

Thanks


>
> Thanks.
>
>>
>>> +	struct vring_virtqueue *vq = to_vvq(_vq);
>>> +	struct virtio_device *vdev = _vq->vdev;
>>> +	struct vring_split vring;
>>> +	int err;
>>> +
>>> +	if (num > _vq->num_max)
>>> +		return -E2BIG;
>>> +
>>> +	switch (vq->vq.reset) {
>>> +	case VIRTIO_VQ_RESET_STEP_NONE:
>>> +		return -ENOENT;
>>> +
>>> +	case VIRTIO_VQ_RESET_STEP_VRING_ATTACH:
>>> +	case VIRTIO_VQ_RESET_STEP_DEVICE:
>>> +		if (vq->split.vring.num == num || !num)
>>> +			break;
>>> +
>>> +		vring_free(_vq);
>>> +
>>> +		fallthrough;
>>> +
>>> +	case VIRTIO_VQ_RESET_STEP_VRING_RELEASE:
>>> +		if (!num)
>>> +			num = vq->split.vring.num;
>>> +
>>> +		err = vring_create_vring_split(&vring, vdev,
>>> +					       vq->split.vring_align,
>>> +					       vq->weak_barriers,
>>> +					       vq->split.may_reduce_num, num);
>>> +		if (err)
>>> +			return -ENOMEM;
>>
>> We'd better need a safe fallback here like:
>>
>> If we can't allocate new memory, we can keep using the current one.
>> Otherwise an ethtool -G fail may make the device not usable.
>>
>> This could be done by not freeing the old vring and virtqueue states
>> until new is allocated.
>>
>>
>>> +
>>> +		err = __vring_virtqueue_attach_split(vq, vdev, vring.vring);
>>> +		if (err) {
>>> +			vring_free_queue(vdev, vring.queue_size_in_bytes,
>>> +					 vring.queue,
>>> +					 vring.dma_addr);
>>> +			return -ENOMEM;
>>> +		}
>>> +
>>> +		vq->split.queue_dma_addr = vring.dma_addr;
>>> +		vq->split.queue_size_in_bytes = vring.queue_size_in_bytes;
>>> +	}
>>> +
>>> +	__vring_virtqueue_init_split(vq, vdev);
>>> +	vq->we_own_ring = true;
>>
>> This seems wrong, we have the transport (rproc/mlxtbf) that allocate the
>> vring by themselves. I think we need to fail the resize for we_own_ring
>> == false.
>>
>> Thanks
>>
>>
>>
>>> +	vq->vq.reset = VIRTIO_VQ_RESET_STEP_VRING_ATTACH;
>>> +
>>> +	return 0;
>>> +}
>>> +
>>>
>>>    /*
>>>     * Packed ring specific functions - *_packed().
>>> @@ -2317,6 +2384,8 @@ static int __vring_virtqueue_attach_split(struct vring_virtqueue *vq,
>>>    static void __vring_virtqueue_init_split(struct vring_virtqueue *vq,
>>>    					 struct virtio_device *vdev)
>>>    {
>>> +	vq->vq.reset = VIRTIO_VQ_RESET_STEP_NONE;
>>> +
>>>    	vq->packed_ring = false;
>>>    	vq->we_own_ring = false;
>>>    	vq->broken = false;
diff mbox series

Patch

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index e0422c04c903..148fb1fd3d5a 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -158,6 +158,12 @@  struct vring_virtqueue {
 			/* DMA address and size information */
 			dma_addr_t queue_dma_addr;
 			size_t queue_size_in_bytes;
+
+			/* The parameters for creating vrings are reserved for
+			 * creating new vrings when enabling reset queue.
+			 */
+			u32 vring_align;
+			bool may_reduce_num;
 		} split;
 
 		/* Available for packed ring */
@@ -217,6 +223,12 @@  struct vring_virtqueue {
 #endif
 };
 
+static void vring_free(struct virtqueue *vq);
+static void __vring_virtqueue_init_split(struct vring_virtqueue *vq,
+					 struct virtio_device *vdev);
+static int __vring_virtqueue_attach_split(struct vring_virtqueue *vq,
+					  struct virtio_device *vdev,
+					  struct vring vring);
 
 /*
  * Helpers.
@@ -1012,6 +1024,8 @@  static struct virtqueue *vring_create_virtqueue_split(
 		return NULL;
 	}
 
+	to_vvq(vq)->split.vring_align = vring_align;
+	to_vvq(vq)->split.may_reduce_num = may_reduce_num;
 	to_vvq(vq)->split.queue_dma_addr = vring.dma_addr;
 	to_vvq(vq)->split.queue_size_in_bytes = vring.queue_size_in_bytes;
 	to_vvq(vq)->we_own_ring = true;
@@ -1019,6 +1033,59 @@  static struct virtqueue *vring_create_virtqueue_split(
 	return vq;
 }
 
+static int virtqueue_reset_vring_split(struct virtqueue *_vq, u32 num)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	struct virtio_device *vdev = _vq->vdev;
+	struct vring_split vring;
+	int err;
+
+	if (num > _vq->num_max)
+		return -E2BIG;
+
+	switch (vq->vq.reset) {
+	case VIRTIO_VQ_RESET_STEP_NONE:
+		return -ENOENT;
+
+	case VIRTIO_VQ_RESET_STEP_VRING_ATTACH:
+	case VIRTIO_VQ_RESET_STEP_DEVICE:
+		if (vq->split.vring.num == num || !num)
+			break;
+
+		vring_free(_vq);
+
+		fallthrough;
+
+	case VIRTIO_VQ_RESET_STEP_VRING_RELEASE:
+		if (!num)
+			num = vq->split.vring.num;
+
+		err = vring_create_vring_split(&vring, vdev,
+					       vq->split.vring_align,
+					       vq->weak_barriers,
+					       vq->split.may_reduce_num, num);
+		if (err)
+			return -ENOMEM;
+
+		err = __vring_virtqueue_attach_split(vq, vdev, vring.vring);
+		if (err) {
+			vring_free_queue(vdev, vring.queue_size_in_bytes,
+					 vring.queue,
+					 vring.dma_addr);
+			return -ENOMEM;
+		}
+
+		vq->split.queue_dma_addr = vring.dma_addr;
+		vq->split.queue_size_in_bytes = vring.queue_size_in_bytes;
+	}
+
+	__vring_virtqueue_init_split(vq, vdev);
+	vq->we_own_ring = true;
+	vq->vq.reset = VIRTIO_VQ_RESET_STEP_VRING_ATTACH;
+
+	return 0;
+}
+
 
 /*
  * Packed ring specific functions - *_packed().
@@ -2317,6 +2384,8 @@  static int __vring_virtqueue_attach_split(struct vring_virtqueue *vq,
 static void __vring_virtqueue_init_split(struct vring_virtqueue *vq,
 					 struct virtio_device *vdev)
 {
+	vq->vq.reset = VIRTIO_VQ_RESET_STEP_NONE;
+
 	vq->packed_ring = false;
 	vq->we_own_ring = false;
 	vq->broken = false;