diff mbox series

[v9,31/32] virtio_net: support rx/tx queue resize

Message ID 20220406034346.74409-32-xuanzhuo@linux.alibaba.com (mailing list archive)
State Not Applicable
Delegated to: Netdev Maintainers
Headers show
Series virtio pci support VIRTIO_F_RING_RESET (refactor vring) | expand

Checks

Context Check Description
netdev/tree_selection success Guessing tree name failed - patch did not apply

Commit Message

Xuan Zhuo April 6, 2022, 3:43 a.m. UTC
This patch implements the resize function of the rx, tx queues.
Based on this function, it is possible to modify the ring num of the
queue.

There may be an exception during the resize process, the resize may
fail, or the vq can no longer be used. Either way, we must execute
napi_enable(). Because napi_disable is similar to a lock, napi_enable
must be called after calling napi_disable.

Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
---
 drivers/net/virtio_net.c | 81 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 81 insertions(+)

Comments

Jason Wang April 13, 2022, 8 a.m. UTC | #1
在 2022/4/6 上午11:43, Xuan Zhuo 写道:
> This patch implements the resize function of the rx, tx queues.
> Based on this function, it is possible to modify the ring num of the
> queue.
>
> There may be an exception during the resize process, the resize may
> fail, or the vq can no longer be used. Either way, we must execute
> napi_enable(). Because napi_disable is similar to a lock, napi_enable
> must be called after calling napi_disable.
>
> Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> ---
>   drivers/net/virtio_net.c | 81 ++++++++++++++++++++++++++++++++++++++++
>   1 file changed, 81 insertions(+)
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index b8bf00525177..ba6859f305f7 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -251,6 +251,9 @@ struct padded_vnet_hdr {
>   	char padding[4];
>   };
>   
> +static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf);
> +static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf);
> +
>   static bool is_xdp_frame(void *ptr)
>   {
>   	return (unsigned long)ptr & VIRTIO_XDP_FLAG;
> @@ -1369,6 +1372,15 @@ static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi)
>   {
>   	napi_enable(napi);
>   
> +	/* Check if vq is in reset state. The normal reset/resize process will
> +	 * be protected by napi. However, the protection of napi is only enabled
> +	 * during the operation, and the protection of napi will end after the
> +	 * operation is completed. If re-enable fails during the process, vq
> +	 * will remain unavailable with reset state.
> +	 */
> +	if (vq->reset)
> +		return;


I don't get when could we hit this condition.


> +
>   	/* If all buffers were filled by other side before we napi_enabled, we
>   	 * won't get another interrupt, so process any outstanding packets now.
>   	 * Call local_bh_enable after to trigger softIRQ processing.
> @@ -1413,6 +1425,15 @@ static void refill_work(struct work_struct *work)
>   		struct receive_queue *rq = &vi->rq[i];
>   
>   		napi_disable(&rq->napi);
> +
> +		/* Check if vq is in reset state. See more in
> +		 * virtnet_napi_enable()
> +		 */
> +		if (rq->vq->reset) {
> +			virtnet_napi_enable(rq->vq, &rq->napi);
> +			continue;
> +		}


Can we do something similar in virtnet_close() by canceling the work?


> +
>   		still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
>   		virtnet_napi_enable(rq->vq, &rq->napi);
>   
> @@ -1523,6 +1544,10 @@ static void virtnet_poll_cleantx(struct receive_queue *rq)
>   	if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index))
>   		return;
>   
> +	/* Check if vq is in reset state. See more in virtnet_napi_enable() */
> +	if (sq->vq->reset)
> +		return;


We've disabled TX napi, any chance we can still hit this?


> +
>   	if (__netif_tx_trylock(txq)) {
>   		do {
>   			virtqueue_disable_cb(sq->vq);
> @@ -1769,6 +1794,62 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
>   	return NETDEV_TX_OK;
>   }
>   
> +static int virtnet_rx_resize(struct virtnet_info *vi,
> +			     struct receive_queue *rq, u32 ring_num)
> +{
> +	int err;
> +
> +	napi_disable(&rq->napi);
> +
> +	err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_free_unused_buf);
> +	if (err)
> +		goto err;
> +
> +	if (!try_fill_recv(vi, rq, GFP_KERNEL))
> +		schedule_delayed_work(&vi->refill, 0);
> +
> +	virtnet_napi_enable(rq->vq, &rq->napi);
> +	return 0;
> +
> +err:
> +	netdev_err(vi->dev,
> +		   "reset rx reset vq fail: rx queue index: %td err: %d\n",
> +		   rq - vi->rq, err);
> +	virtnet_napi_enable(rq->vq, &rq->napi);
> +	return err;
> +}
> +
> +static int virtnet_tx_resize(struct virtnet_info *vi,
> +			     struct send_queue *sq, u32 ring_num)
> +{
> +	struct netdev_queue *txq;
> +	int err, qindex;
> +
> +	qindex = sq - vi->sq;
> +
> +	virtnet_napi_tx_disable(&sq->napi);
> +
> +	txq = netdev_get_tx_queue(vi->dev, qindex);
> +	__netif_tx_lock_bh(txq);
> +	netif_stop_subqueue(vi->dev, qindex);
> +	__netif_tx_unlock_bh(txq);
> +
> +	err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf);
> +	if (err)
> +		goto err;
> +
> +	netif_start_subqueue(vi->dev, qindex);
> +	virtnet_napi_tx_enable(vi, sq->vq, &sq->napi);
> +	return 0;
> +
> +err:


I guess we can still start the queue in this case? (Since we don't 
change the queue if resize fails).


> +	netdev_err(vi->dev,
> +		   "reset tx reset vq fail: tx queue index: %td err: %d\n",
> +		   sq - vi->sq, err);
> +	virtnet_napi_tx_enable(vi, sq->vq, &sq->napi);
> +	return err;
> +}
> +
>   /*
>    * Send command via the control virtqueue and check status.  Commands
>    * supported by the hypervisor, as indicated by feature bits, should
Xuan Zhuo April 13, 2022, 8:35 a.m. UTC | #2
On Wed, 13 Apr 2022 16:00:18 +0800, Jason Wang <jasowang@redhat.com> wrote:
>
> 在 2022/4/6 上午11:43, Xuan Zhuo 写道:
> > This patch implements the resize function of the rx, tx queues.
> > Based on this function, it is possible to modify the ring num of the
> > queue.
> >
> > There may be an exception during the resize process, the resize may
> > fail, or the vq can no longer be used. Either way, we must execute
> > napi_enable(). Because napi_disable is similar to a lock, napi_enable
> > must be called after calling napi_disable.
> >
> > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > ---
> >   drivers/net/virtio_net.c | 81 ++++++++++++++++++++++++++++++++++++++++
> >   1 file changed, 81 insertions(+)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index b8bf00525177..ba6859f305f7 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -251,6 +251,9 @@ struct padded_vnet_hdr {
> >   	char padding[4];
> >   };
> >
> > +static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf);
> > +static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf);
> > +
> >   static bool is_xdp_frame(void *ptr)
> >   {
> >   	return (unsigned long)ptr & VIRTIO_XDP_FLAG;
> > @@ -1369,6 +1372,15 @@ static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi)
> >   {
> >   	napi_enable(napi);
> >
> > +	/* Check if vq is in reset state. The normal reset/resize process will
> > +	 * be protected by napi. However, the protection of napi is only enabled
> > +	 * during the operation, and the protection of napi will end after the
> > +	 * operation is completed. If re-enable fails during the process, vq
> > +	 * will remain unavailable with reset state.
> > +	 */
> > +	if (vq->reset)
> > +		return;
>
>
> I don't get when could we hit this condition.


In patch 23, the code to implement re-enable vq is as follows:

+static int vp_modern_enable_reset_vq(struct virtqueue *vq)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
+	struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
+	struct virtio_pci_vq_info *info;
+	unsigned long flags, index;
+	int err;
+
+	if (!vq->reset)
+		return -EBUSY;
+
+	index = vq->index;
+	info = vp_dev->vqs[index];
+
+	/* check queue reset status */
+	if (vp_modern_get_queue_reset(mdev, index) != 1)
+		return -EBUSY;
+
+	err = vp_active_vq(vq, info->msix_vector);
+	if (err)
+		return err;
+
+	if (vq->callback) {
+		spin_lock_irqsave(&vp_dev->lock, flags);
+		list_add(&info->node, &vp_dev->virtqueues);
+		spin_unlock_irqrestore(&vp_dev->lock, flags);
+	} else {
+		INIT_LIST_HEAD(&info->node);
+	}
+
+	vp_modern_set_queue_enable(&vp_dev->mdev, index, true);
+
+	if (vp_dev->per_vq_vectors && info->msix_vector != VIRTIO_MSI_NO_VECTOR)
+		enable_irq(pci_irq_vector(vp_dev->pci_dev, info->msix_vector));
+
+	vq->reset = false;
+
+	return 0;
+}


There are three situations where an error will be returned. These are the
situations I want to handle.

But I'm rethinking the question, and I feel like you're right, although the
hardware setup may fail. We can no longer sync with the hardware. But using it
as a normal vq doesn't have any problems.

>
>
> > +
> >   	/* If all buffers were filled by other side before we napi_enabled, we
> >   	 * won't get another interrupt, so process any outstanding packets now.
> >   	 * Call local_bh_enable after to trigger softIRQ processing.
> > @@ -1413,6 +1425,15 @@ static void refill_work(struct work_struct *work)
> >   		struct receive_queue *rq = &vi->rq[i];
> >
> >   		napi_disable(&rq->napi);
> > +
> > +		/* Check if vq is in reset state. See more in
> > +		 * virtnet_napi_enable()
> > +		 */
> > +		if (rq->vq->reset) {
> > +			virtnet_napi_enable(rq->vq, &rq->napi);
> > +			continue;
> > +		}
>
>
> Can we do something similar in virtnet_close() by canceling the work?

I think there is no need to cancel the work here, because napi_disable will wait
for the napi_enable of the resize. So if the re-enable failed vq is used as a normal
vq, this logic can be removed.


>
>
> > +
> >   		still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
> >   		virtnet_napi_enable(rq->vq, &rq->napi);
> >
> > @@ -1523,6 +1544,10 @@ static void virtnet_poll_cleantx(struct receive_queue *rq)
> >   	if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index))
> >   		return;
> >
> > +	/* Check if vq is in reset state. See more in virtnet_napi_enable() */
> > +	if (sq->vq->reset)
> > +		return;
>
>
> We've disabled TX napi, any chance we can still hit this?

Same as above.

>
>
> > +
> >   	if (__netif_tx_trylock(txq)) {
> >   		do {
> >   			virtqueue_disable_cb(sq->vq);
> > @@ -1769,6 +1794,62 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
> >   	return NETDEV_TX_OK;
> >   }
> >
> > +static int virtnet_rx_resize(struct virtnet_info *vi,
> > +			     struct receive_queue *rq, u32 ring_num)
> > +{
> > +	int err;
> > +
> > +	napi_disable(&rq->napi);
> > +
> > +	err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_free_unused_buf);
> > +	if (err)
> > +		goto err;
> > +
> > +	if (!try_fill_recv(vi, rq, GFP_KERNEL))
> > +		schedule_delayed_work(&vi->refill, 0);
> > +
> > +	virtnet_napi_enable(rq->vq, &rq->napi);
> > +	return 0;
> > +
> > +err:
> > +	netdev_err(vi->dev,
> > +		   "reset rx reset vq fail: rx queue index: %td err: %d\n",
> > +		   rq - vi->rq, err);
> > +	virtnet_napi_enable(rq->vq, &rq->napi);
> > +	return err;
> > +}
> > +
> > +static int virtnet_tx_resize(struct virtnet_info *vi,
> > +			     struct send_queue *sq, u32 ring_num)
> > +{
> > +	struct netdev_queue *txq;
> > +	int err, qindex;
> > +
> > +	qindex = sq - vi->sq;
> > +
> > +	virtnet_napi_tx_disable(&sq->napi);
> > +
> > +	txq = netdev_get_tx_queue(vi->dev, qindex);
> > +	__netif_tx_lock_bh(txq);
> > +	netif_stop_subqueue(vi->dev, qindex);
> > +	__netif_tx_unlock_bh(txq);
> > +
> > +	err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf);
> > +	if (err)
> > +		goto err;
> > +
> > +	netif_start_subqueue(vi->dev, qindex);
> > +	virtnet_napi_tx_enable(vi, sq->vq, &sq->napi);
> > +	return 0;
> > +
> > +err:
>
>
> I guess we can still start the queue in this case? (Since we don't
> change the queue if resize fails).

Yes, you are right.

Thanks.

>
>
> > +	netdev_err(vi->dev,
> > +		   "reset tx reset vq fail: tx queue index: %td err: %d\n",
> > +		   sq - vi->sq, err);
> > +	virtnet_napi_tx_enable(vi, sq->vq, &sq->napi);
> > +	return err;
> > +}
> > +
> >   /*
> >    * Send command via the control virtqueue and check status.  Commands
> >    * supported by the hypervisor, as indicated by feature bits, should
>
Jason Wang April 14, 2022, 9:30 a.m. UTC | #3
On Wed, Apr 13, 2022 at 4:47 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> On Wed, 13 Apr 2022 16:00:18 +0800, Jason Wang <jasowang@redhat.com> wrote:
> >
> > 在 2022/4/6 上午11:43, Xuan Zhuo 写道:
> > > This patch implements the resize function of the rx, tx queues.
> > > Based on this function, it is possible to modify the ring num of the
> > > queue.
> > >
> > > There may be an exception during the resize process, the resize may
> > > fail, or the vq can no longer be used. Either way, we must execute
> > > napi_enable(). Because napi_disable is similar to a lock, napi_enable
> > > must be called after calling napi_disable.
> > >
> > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > ---
> > >   drivers/net/virtio_net.c | 81 ++++++++++++++++++++++++++++++++++++++++
> > >   1 file changed, 81 insertions(+)
> > >
> > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > index b8bf00525177..ba6859f305f7 100644
> > > --- a/drivers/net/virtio_net.c
> > > +++ b/drivers/net/virtio_net.c
> > > @@ -251,6 +251,9 @@ struct padded_vnet_hdr {
> > >     char padding[4];
> > >   };
> > >
> > > +static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf);
> > > +static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf);
> > > +
> > >   static bool is_xdp_frame(void *ptr)
> > >   {
> > >     return (unsigned long)ptr & VIRTIO_XDP_FLAG;
> > > @@ -1369,6 +1372,15 @@ static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi)
> > >   {
> > >     napi_enable(napi);
> > >
> > > +   /* Check if vq is in reset state. The normal reset/resize process will
> > > +    * be protected by napi. However, the protection of napi is only enabled
> > > +    * during the operation, and the protection of napi will end after the
> > > +    * operation is completed. If re-enable fails during the process, vq
> > > +    * will remain unavailable with reset state.
> > > +    */
> > > +   if (vq->reset)
> > > +           return;
> >
> >
> > I don't get when could we hit this condition.
>
>
> In patch 23, the code to implement re-enable vq is as follows:
>
> +static int vp_modern_enable_reset_vq(struct virtqueue *vq)
> +{
> +       struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
> +       struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
> +       struct virtio_pci_vq_info *info;
> +       unsigned long flags, index;
> +       int err;
> +
> +       if (!vq->reset)
> +               return -EBUSY;
> +
> +       index = vq->index;
> +       info = vp_dev->vqs[index];
> +
> +       /* check queue reset status */
> +       if (vp_modern_get_queue_reset(mdev, index) != 1)
> +               return -EBUSY;
> +
> +       err = vp_active_vq(vq, info->msix_vector);
> +       if (err)
> +               return err;
> +
> +       if (vq->callback) {
> +               spin_lock_irqsave(&vp_dev->lock, flags);
> +               list_add(&info->node, &vp_dev->virtqueues);
> +               spin_unlock_irqrestore(&vp_dev->lock, flags);
> +       } else {
> +               INIT_LIST_HEAD(&info->node);
> +       }
> +
> +       vp_modern_set_queue_enable(&vp_dev->mdev, index, true);
> +
> +       if (vp_dev->per_vq_vectors && info->msix_vector != VIRTIO_MSI_NO_VECTOR)
> +               enable_irq(pci_irq_vector(vp_dev->pci_dev, info->msix_vector));
> +
> +       vq->reset = false;
> +
> +       return 0;
> +}
>
>
> There are three situations where an error will be returned. These are the
> situations I want to handle.

Right, but it looks harmless if we just schedule the NAPI without the check.

>
> But I'm rethinking the question, and I feel like you're right, although the
> hardware setup may fail. We can no longer sync with the hardware. But using it
> as a normal vq doesn't have any problems.

Note that we should make sure the buggy(malicous) device won't crash
the codes by changing the queue_reset value at its will.

>
> >
> >
> > > +
> > >     /* If all buffers were filled by other side before we napi_enabled, we
> > >      * won't get another interrupt, so process any outstanding packets now.
> > >      * Call local_bh_enable after to trigger softIRQ processing.
> > > @@ -1413,6 +1425,15 @@ static void refill_work(struct work_struct *work)
> > >             struct receive_queue *rq = &vi->rq[i];
> > >
> > >             napi_disable(&rq->napi);
> > > +
> > > +           /* Check if vq is in reset state. See more in
> > > +            * virtnet_napi_enable()
> > > +            */
> > > +           if (rq->vq->reset) {
> > > +                   virtnet_napi_enable(rq->vq, &rq->napi);
> > > +                   continue;
> > > +           }
> >
> >
> > Can we do something similar in virtnet_close() by canceling the work?
>
> I think there is no need to cancel the work here, because napi_disable will wait
> for the napi_enable of the resize. So if the re-enable failed vq is used as a normal
> vq, this logic can be removed.

Actually I meant the part of virtnet_rx_resize().

If we don't synchronize with the refill work, it might enable NAPI unexpectedly?

Thanks

>
>
> >
> >
> > > +
> > >             still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
> > >             virtnet_napi_enable(rq->vq, &rq->napi);
> > >
> > > @@ -1523,6 +1544,10 @@ static void virtnet_poll_cleantx(struct receive_queue *rq)
> > >     if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index))
> > >             return;
> > >
> > > +   /* Check if vq is in reset state. See more in virtnet_napi_enable() */
> > > +   if (sq->vq->reset)
> > > +           return;
> >
> >
> > We've disabled TX napi, any chance we can still hit this?
>
> Same as above.
>
> >
> >
> > > +
> > >     if (__netif_tx_trylock(txq)) {
> > >             do {
> > >                     virtqueue_disable_cb(sq->vq);
> > > @@ -1769,6 +1794,62 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
> > >     return NETDEV_TX_OK;
> > >   }
> > >
> > > +static int virtnet_rx_resize(struct virtnet_info *vi,
> > > +                        struct receive_queue *rq, u32 ring_num)
> > > +{
> > > +   int err;
> > > +
> > > +   napi_disable(&rq->napi);
> > > +
> > > +   err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_free_unused_buf);
> > > +   if (err)
> > > +           goto err;
> > > +
> > > +   if (!try_fill_recv(vi, rq, GFP_KERNEL))
> > > +           schedule_delayed_work(&vi->refill, 0);
> > > +
> > > +   virtnet_napi_enable(rq->vq, &rq->napi);
> > > +   return 0;
> > > +
> > > +err:
> > > +   netdev_err(vi->dev,
> > > +              "reset rx reset vq fail: rx queue index: %td err: %d\n",
> > > +              rq - vi->rq, err);
> > > +   virtnet_napi_enable(rq->vq, &rq->napi);
> > > +   return err;
> > > +}
> > > +
> > > +static int virtnet_tx_resize(struct virtnet_info *vi,
> > > +                        struct send_queue *sq, u32 ring_num)
> > > +{
> > > +   struct netdev_queue *txq;
> > > +   int err, qindex;
> > > +
> > > +   qindex = sq - vi->sq;
> > > +
> > > +   virtnet_napi_tx_disable(&sq->napi);
> > > +
> > > +   txq = netdev_get_tx_queue(vi->dev, qindex);
> > > +   __netif_tx_lock_bh(txq);
> > > +   netif_stop_subqueue(vi->dev, qindex);
> > > +   __netif_tx_unlock_bh(txq);
> > > +
> > > +   err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf);
> > > +   if (err)
> > > +           goto err;
> > > +
> > > +   netif_start_subqueue(vi->dev, qindex);
> > > +   virtnet_napi_tx_enable(vi, sq->vq, &sq->napi);
> > > +   return 0;
> > > +
> > > +err:
> >
> >
> > I guess we can still start the queue in this case? (Since we don't
> > change the queue if resize fails).
>
> Yes, you are right.
>
> Thanks.
>
> >
> >
> > > +   netdev_err(vi->dev,
> > > +              "reset tx reset vq fail: tx queue index: %td err: %d\n",
> > > +              sq - vi->sq, err);
> > > +   virtnet_napi_tx_enable(vi, sq->vq, &sq->napi);
> > > +   return err;
> > > +}
> > > +
> > >   /*
> > >    * Send command via the control virtqueue and check status.  Commands
> > >    * supported by the hypervisor, as indicated by feature bits, should
> >
>
Xuan Zhuo April 15, 2022, 2:18 a.m. UTC | #4
On Thu, 14 Apr 2022 17:30:02 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Wed, Apr 13, 2022 at 4:47 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Wed, 13 Apr 2022 16:00:18 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > >
> > > 在 2022/4/6 上午11:43, Xuan Zhuo 写道:
> > > > This patch implements the resize function of the rx, tx queues.
> > > > Based on this function, it is possible to modify the ring num of the
> > > > queue.
> > > >
> > > > There may be an exception during the resize process, the resize may
> > > > fail, or the vq can no longer be used. Either way, we must execute
> > > > napi_enable(). Because napi_disable is similar to a lock, napi_enable
> > > > must be called after calling napi_disable.
> > > >
> > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > ---
> > > >   drivers/net/virtio_net.c | 81 ++++++++++++++++++++++++++++++++++++++++
> > > >   1 file changed, 81 insertions(+)
> > > >
> > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > index b8bf00525177..ba6859f305f7 100644
> > > > --- a/drivers/net/virtio_net.c
> > > > +++ b/drivers/net/virtio_net.c
> > > > @@ -251,6 +251,9 @@ struct padded_vnet_hdr {
> > > >     char padding[4];
> > > >   };
> > > >
> > > > +static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf);
> > > > +static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf);
> > > > +
> > > >   static bool is_xdp_frame(void *ptr)
> > > >   {
> > > >     return (unsigned long)ptr & VIRTIO_XDP_FLAG;
> > > > @@ -1369,6 +1372,15 @@ static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi)
> > > >   {
> > > >     napi_enable(napi);
> > > >
> > > > +   /* Check if vq is in reset state. The normal reset/resize process will
> > > > +    * be protected by napi. However, the protection of napi is only enabled
> > > > +    * during the operation, and the protection of napi will end after the
> > > > +    * operation is completed. If re-enable fails during the process, vq
> > > > +    * will remain unavailable with reset state.
> > > > +    */
> > > > +   if (vq->reset)
> > > > +           return;
> > >
> > >
> > > I don't get when could we hit this condition.
> >
> >
> > In patch 23, the code to implement re-enable vq is as follows:
> >
> > +static int vp_modern_enable_reset_vq(struct virtqueue *vq)
> > +{
> > +       struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
> > +       struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
> > +       struct virtio_pci_vq_info *info;
> > +       unsigned long flags, index;
> > +       int err;
> > +
> > +       if (!vq->reset)
> > +               return -EBUSY;
> > +
> > +       index = vq->index;
> > +       info = vp_dev->vqs[index];
> > +
> > +       /* check queue reset status */
> > +       if (vp_modern_get_queue_reset(mdev, index) != 1)
> > +               return -EBUSY;
> > +
> > +       err = vp_active_vq(vq, info->msix_vector);
> > +       if (err)
> > +               return err;
> > +
> > +       if (vq->callback) {
> > +               spin_lock_irqsave(&vp_dev->lock, flags);
> > +               list_add(&info->node, &vp_dev->virtqueues);
> > +               spin_unlock_irqrestore(&vp_dev->lock, flags);
> > +       } else {
> > +               INIT_LIST_HEAD(&info->node);
> > +       }
> > +
> > +       vp_modern_set_queue_enable(&vp_dev->mdev, index, true);
> > +
> > +       if (vp_dev->per_vq_vectors && info->msix_vector != VIRTIO_MSI_NO_VECTOR)
> > +               enable_irq(pci_irq_vector(vp_dev->pci_dev, info->msix_vector));
> > +
> > +       vq->reset = false;
> > +
> > +       return 0;
> > +}
> >
> >
> > There are three situations where an error will be returned. These are the
> > situations I want to handle.
>
> Right, but it looks harmless if we just schedule the NAPI without the check.

Yes.

> >
> > But I'm rethinking the question, and I feel like you're right, although the
> > hardware setup may fail. We can no longer sync with the hardware. But using it
> > as a normal vq doesn't have any problems.
>
> Note that we should make sure the buggy(malicous) device won't crash
> the codes by changing the queue_reset value at its will.

I will keep an eye on this situation.

>
> >
> > >
> > >
> > > > +
> > > >     /* If all buffers were filled by other side before we napi_enabled, we
> > > >      * won't get another interrupt, so process any outstanding packets now.
> > > >      * Call local_bh_enable after to trigger softIRQ processing.
> > > > @@ -1413,6 +1425,15 @@ static void refill_work(struct work_struct *work)
> > > >             struct receive_queue *rq = &vi->rq[i];
> > > >
> > > >             napi_disable(&rq->napi);
> > > > +
> > > > +           /* Check if vq is in reset state. See more in
> > > > +            * virtnet_napi_enable()
> > > > +            */
> > > > +           if (rq->vq->reset) {
> > > > +                   virtnet_napi_enable(rq->vq, &rq->napi);
> > > > +                   continue;
> > > > +           }
> > >
> > >
> > > Can we do something similar in virtnet_close() by canceling the work?
> >
> > I think there is no need to cancel the work here, because napi_disable will wait
> > for the napi_enable of the resize. So if the re-enable failed vq is used as a normal
> > vq, this logic can be removed.
>
> Actually I meant the part of virtnet_rx_resize().
>
> If we don't synchronize with the refill work, it might enable NAPI unexpectedly?

I don't think this situation will be encountered, because napi_disable is
mutually exclusive, so there will be no unexpected napi enable.

Is there something I misunderstood?

Thanks.

>
> Thanks
>
> >
> >
> > >
> > >
> > > > +
> > > >             still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
> > > >             virtnet_napi_enable(rq->vq, &rq->napi);
> > > >
> > > > @@ -1523,6 +1544,10 @@ static void virtnet_poll_cleantx(struct receive_queue *rq)
> > > >     if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index))
> > > >             return;
> > > >
> > > > +   /* Check if vq is in reset state. See more in virtnet_napi_enable() */
> > > > +   if (sq->vq->reset)
> > > > +           return;
> > >
> > >
> > > We've disabled TX napi, any chance we can still hit this?
> >
> > Same as above.
> >
> > >
> > >
> > > > +
> > > >     if (__netif_tx_trylock(txq)) {
> > > >             do {
> > > >                     virtqueue_disable_cb(sq->vq);
> > > > @@ -1769,6 +1794,62 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
> > > >     return NETDEV_TX_OK;
> > > >   }
> > > >
> > > > +static int virtnet_rx_resize(struct virtnet_info *vi,
> > > > +                        struct receive_queue *rq, u32 ring_num)
> > > > +{
> > > > +   int err;
> > > > +
> > > > +   napi_disable(&rq->napi);
> > > > +
> > > > +   err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_free_unused_buf);
> > > > +   if (err)
> > > > +           goto err;
> > > > +
> > > > +   if (!try_fill_recv(vi, rq, GFP_KERNEL))
> > > > +           schedule_delayed_work(&vi->refill, 0);
> > > > +
> > > > +   virtnet_napi_enable(rq->vq, &rq->napi);
> > > > +   return 0;
> > > > +
> > > > +err:
> > > > +   netdev_err(vi->dev,
> > > > +              "reset rx reset vq fail: rx queue index: %td err: %d\n",
> > > > +              rq - vi->rq, err);
> > > > +   virtnet_napi_enable(rq->vq, &rq->napi);
> > > > +   return err;
> > > > +}
> > > > +
> > > > +static int virtnet_tx_resize(struct virtnet_info *vi,
> > > > +                        struct send_queue *sq, u32 ring_num)
> > > > +{
> > > > +   struct netdev_queue *txq;
> > > > +   int err, qindex;
> > > > +
> > > > +   qindex = sq - vi->sq;
> > > > +
> > > > +   virtnet_napi_tx_disable(&sq->napi);
> > > > +
> > > > +   txq = netdev_get_tx_queue(vi->dev, qindex);
> > > > +   __netif_tx_lock_bh(txq);
> > > > +   netif_stop_subqueue(vi->dev, qindex);
> > > > +   __netif_tx_unlock_bh(txq);
> > > > +
> > > > +   err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf);
> > > > +   if (err)
> > > > +           goto err;
> > > > +
> > > > +   netif_start_subqueue(vi->dev, qindex);
> > > > +   virtnet_napi_tx_enable(vi, sq->vq, &sq->napi);
> > > > +   return 0;
> > > > +
> > > > +err:
> > >
> > >
> > > I guess we can still start the queue in this case? (Since we don't
> > > change the queue if resize fails).
> >
> > Yes, you are right.
> >
> > Thanks.
> >
> > >
> > >
> > > > +   netdev_err(vi->dev,
> > > > +              "reset tx reset vq fail: tx queue index: %td err: %d\n",
> > > > +              sq - vi->sq, err);
> > > > +   virtnet_napi_tx_enable(vi, sq->vq, &sq->napi);
> > > > +   return err;
> > > > +}
> > > > +
> > > >   /*
> > > >    * Send command via the control virtqueue and check status.  Commands
> > > >    * supported by the hypervisor, as indicated by feature bits, should
> > >
> >
>
Jason Wang April 15, 2022, 5:53 a.m. UTC | #5
On Fri, Apr 15, 2022 at 10:23 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> On Thu, 14 Apr 2022 17:30:02 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > On Wed, Apr 13, 2022 at 4:47 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > >
> > > On Wed, 13 Apr 2022 16:00:18 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > >
> > > > 在 2022/4/6 上午11:43, Xuan Zhuo 写道:
> > > > > This patch implements the resize function of the rx, tx queues.
> > > > > Based on this function, it is possible to modify the ring num of the
> > > > > queue.
> > > > >
> > > > > There may be an exception during the resize process, the resize may
> > > > > fail, or the vq can no longer be used. Either way, we must execute
> > > > > napi_enable(). Because napi_disable is similar to a lock, napi_enable
> > > > > must be called after calling napi_disable.
> > > > >
> > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > > ---
> > > > >   drivers/net/virtio_net.c | 81 ++++++++++++++++++++++++++++++++++++++++
> > > > >   1 file changed, 81 insertions(+)
> > > > >
> > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > index b8bf00525177..ba6859f305f7 100644
> > > > > --- a/drivers/net/virtio_net.c
> > > > > +++ b/drivers/net/virtio_net.c
> > > > > @@ -251,6 +251,9 @@ struct padded_vnet_hdr {
> > > > >     char padding[4];
> > > > >   };
> > > > >
> > > > > +static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf);
> > > > > +static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf);
> > > > > +
> > > > >   static bool is_xdp_frame(void *ptr)
> > > > >   {
> > > > >     return (unsigned long)ptr & VIRTIO_XDP_FLAG;
> > > > > @@ -1369,6 +1372,15 @@ static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi)
> > > > >   {
> > > > >     napi_enable(napi);
> > > > >
> > > > > +   /* Check if vq is in reset state. The normal reset/resize process will
> > > > > +    * be protected by napi. However, the protection of napi is only enabled
> > > > > +    * during the operation, and the protection of napi will end after the
> > > > > +    * operation is completed. If re-enable fails during the process, vq
> > > > > +    * will remain unavailable with reset state.
> > > > > +    */
> > > > > +   if (vq->reset)
> > > > > +           return;
> > > >
> > > >
> > > > I don't get when could we hit this condition.
> > >
> > >
> > > In patch 23, the code to implement re-enable vq is as follows:
> > >
> > > +static int vp_modern_enable_reset_vq(struct virtqueue *vq)
> > > +{
> > > +       struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
> > > +       struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
> > > +       struct virtio_pci_vq_info *info;
> > > +       unsigned long flags, index;
> > > +       int err;
> > > +
> > > +       if (!vq->reset)
> > > +               return -EBUSY;
> > > +
> > > +       index = vq->index;
> > > +       info = vp_dev->vqs[index];
> > > +
> > > +       /* check queue reset status */
> > > +       if (vp_modern_get_queue_reset(mdev, index) != 1)
> > > +               return -EBUSY;
> > > +
> > > +       err = vp_active_vq(vq, info->msix_vector);
> > > +       if (err)
> > > +               return err;
> > > +
> > > +       if (vq->callback) {
> > > +               spin_lock_irqsave(&vp_dev->lock, flags);
> > > +               list_add(&info->node, &vp_dev->virtqueues);
> > > +               spin_unlock_irqrestore(&vp_dev->lock, flags);
> > > +       } else {
> > > +               INIT_LIST_HEAD(&info->node);
> > > +       }
> > > +
> > > +       vp_modern_set_queue_enable(&vp_dev->mdev, index, true);
> > > +
> > > +       if (vp_dev->per_vq_vectors && info->msix_vector != VIRTIO_MSI_NO_VECTOR)
> > > +               enable_irq(pci_irq_vector(vp_dev->pci_dev, info->msix_vector));
> > > +
> > > +       vq->reset = false;
> > > +
> > > +       return 0;
> > > +}
> > >
> > >
> > > There are three situations where an error will be returned. These are the
> > > situations I want to handle.
> >
> > Right, but it looks harmless if we just schedule the NAPI without the check.
>
> Yes.
>
> > >
> > > But I'm rethinking the question, and I feel like you're right, although the
> > > hardware setup may fail. We can no longer sync with the hardware. But using it
> > > as a normal vq doesn't have any problems.
> >
> > Note that we should make sure the buggy(malicous) device won't crash
> > the codes by changing the queue_reset value at its will.
>
> I will keep an eye on this situation.
>
> >
> > >
> > > >
> > > >
> > > > > +
> > > > >     /* If all buffers were filled by other side before we napi_enabled, we
> > > > >      * won't get another interrupt, so process any outstanding packets now.
> > > > >      * Call local_bh_enable after to trigger softIRQ processing.
> > > > > @@ -1413,6 +1425,15 @@ static void refill_work(struct work_struct *work)
> > > > >             struct receive_queue *rq = &vi->rq[i];
> > > > >
> > > > >             napi_disable(&rq->napi);
> > > > > +
> > > > > +           /* Check if vq is in reset state. See more in
> > > > > +            * virtnet_napi_enable()
> > > > > +            */
> > > > > +           if (rq->vq->reset) {
> > > > > +                   virtnet_napi_enable(rq->vq, &rq->napi);
> > > > > +                   continue;
> > > > > +           }
> > > >
> > > >
> > > > Can we do something similar in virtnet_close() by canceling the work?
> > >
> > > I think there is no need to cancel the work here, because napi_disable will wait
> > > for the napi_enable of the resize. So if the re-enable failed vq is used as a normal
> > > vq, this logic can be removed.
> >
> > Actually I meant the part of virtnet_rx_resize().
> >
> > If we don't synchronize with the refill work, it might enable NAPI unexpectedly?
>
> I don't think this situation will be encountered, because napi_disable is
> mutually exclusive, so there will be no unexpected napi enable.
>
> Is there something I misunderstood?

So in virtnet_rx_resize() we do:

napi_disable()
...
resize()
...
napi_enalbe()

How can we guarantee that the work is not run after the napi_disable()?

Thanks

>
> Thanks.
>
> >
> > Thanks
> >
> > >
> > >
> > > >
> > > >
> > > > > +
> > > > >             still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
> > > > >             virtnet_napi_enable(rq->vq, &rq->napi);
> > > > >
> > > > > @@ -1523,6 +1544,10 @@ static void virtnet_poll_cleantx(struct receive_queue *rq)
> > > > >     if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index))
> > > > >             return;
> > > > >
> > > > > +   /* Check if vq is in reset state. See more in virtnet_napi_enable() */
> > > > > +   if (sq->vq->reset)
> > > > > +           return;
> > > >
> > > >
> > > > We've disabled TX napi, any chance we can still hit this?
> > >
> > > Same as above.
> > >
> > > >
> > > >
> > > > > +
> > > > >     if (__netif_tx_trylock(txq)) {
> > > > >             do {
> > > > >                     virtqueue_disable_cb(sq->vq);
> > > > > @@ -1769,6 +1794,62 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
> > > > >     return NETDEV_TX_OK;
> > > > >   }
> > > > >
> > > > > +static int virtnet_rx_resize(struct virtnet_info *vi,
> > > > > +                        struct receive_queue *rq, u32 ring_num)
> > > > > +{
> > > > > +   int err;
> > > > > +
> > > > > +   napi_disable(&rq->napi);
> > > > > +
> > > > > +   err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_free_unused_buf);
> > > > > +   if (err)
> > > > > +           goto err;
> > > > > +
> > > > > +   if (!try_fill_recv(vi, rq, GFP_KERNEL))
> > > > > +           schedule_delayed_work(&vi->refill, 0);
> > > > > +
> > > > > +   virtnet_napi_enable(rq->vq, &rq->napi);
> > > > > +   return 0;
> > > > > +
> > > > > +err:
> > > > > +   netdev_err(vi->dev,
> > > > > +              "reset rx reset vq fail: rx queue index: %td err: %d\n",
> > > > > +              rq - vi->rq, err);
> > > > > +   virtnet_napi_enable(rq->vq, &rq->napi);
> > > > > +   return err;
> > > > > +}
> > > > > +
> > > > > +static int virtnet_tx_resize(struct virtnet_info *vi,
> > > > > +                        struct send_queue *sq, u32 ring_num)
> > > > > +{
> > > > > +   struct netdev_queue *txq;
> > > > > +   int err, qindex;
> > > > > +
> > > > > +   qindex = sq - vi->sq;
> > > > > +
> > > > > +   virtnet_napi_tx_disable(&sq->napi);
> > > > > +
> > > > > +   txq = netdev_get_tx_queue(vi->dev, qindex);
> > > > > +   __netif_tx_lock_bh(txq);
> > > > > +   netif_stop_subqueue(vi->dev, qindex);
> > > > > +   __netif_tx_unlock_bh(txq);
> > > > > +
> > > > > +   err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf);
> > > > > +   if (err)
> > > > > +           goto err;
> > > > > +
> > > > > +   netif_start_subqueue(vi->dev, qindex);
> > > > > +   virtnet_napi_tx_enable(vi, sq->vq, &sq->napi);
> > > > > +   return 0;
> > > > > +
> > > > > +err:
> > > >
> > > >
> > > > I guess we can still start the queue in this case? (Since we don't
> > > > change the queue if resize fails).
> > >
> > > Yes, you are right.
> > >
> > > Thanks.
> > >
> > > >
> > > >
> > > > > +   netdev_err(vi->dev,
> > > > > +              "reset tx reset vq fail: tx queue index: %td err: %d\n",
> > > > > +              sq - vi->sq, err);
> > > > > +   virtnet_napi_tx_enable(vi, sq->vq, &sq->napi);
> > > > > +   return err;
> > > > > +}
> > > > > +
> > > > >   /*
> > > > >    * Send command via the control virtqueue and check status.  Commands
> > > > >    * supported by the hypervisor, as indicated by feature bits, should
> > > >
> > >
> >
>
Xuan Zhuo April 15, 2022, 9:17 a.m. UTC | #6
On Fri, 15 Apr 2022 13:53:54 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Fri, Apr 15, 2022 at 10:23 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Thu, 14 Apr 2022 17:30:02 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > On Wed, Apr 13, 2022 at 4:47 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> > > >
> > > > On Wed, 13 Apr 2022 16:00:18 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > > > >
> > > > > 在 2022/4/6 上午11:43, Xuan Zhuo 写道:
> > > > > > This patch implements the resize function of the rx, tx queues.
> > > > > > Based on this function, it is possible to modify the ring num of the
> > > > > > queue.
> > > > > >
> > > > > > There may be an exception during the resize process, the resize may
> > > > > > fail, or the vq can no longer be used. Either way, we must execute
> > > > > > napi_enable(). Because napi_disable is similar to a lock, napi_enable
> > > > > > must be called after calling napi_disable.
> > > > > >
> > > > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > > > ---
> > > > > >   drivers/net/virtio_net.c | 81 ++++++++++++++++++++++++++++++++++++++++
> > > > > >   1 file changed, 81 insertions(+)
> > > > > >
> > > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > > > index b8bf00525177..ba6859f305f7 100644
> > > > > > --- a/drivers/net/virtio_net.c
> > > > > > +++ b/drivers/net/virtio_net.c
> > > > > > @@ -251,6 +251,9 @@ struct padded_vnet_hdr {
> > > > > >     char padding[4];
> > > > > >   };
> > > > > >
> > > > > > +static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf);
> > > > > > +static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf);
> > > > > > +
> > > > > >   static bool is_xdp_frame(void *ptr)
> > > > > >   {
> > > > > >     return (unsigned long)ptr & VIRTIO_XDP_FLAG;
> > > > > > @@ -1369,6 +1372,15 @@ static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi)
> > > > > >   {
> > > > > >     napi_enable(napi);
> > > > > >
> > > > > > +   /* Check if vq is in reset state. The normal reset/resize process will
> > > > > > +    * be protected by napi. However, the protection of napi is only enabled
> > > > > > +    * during the operation, and the protection of napi will end after the
> > > > > > +    * operation is completed. If re-enable fails during the process, vq
> > > > > > +    * will remain unavailable with reset state.
> > > > > > +    */
> > > > > > +   if (vq->reset)
> > > > > > +           return;
> > > > >
> > > > >
> > > > > I don't get when could we hit this condition.
> > > >
> > > >
> > > > In patch 23, the code to implement re-enable vq is as follows:
> > > >
> > > > +static int vp_modern_enable_reset_vq(struct virtqueue *vq)
> > > > +{
> > > > +       struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
> > > > +       struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
> > > > +       struct virtio_pci_vq_info *info;
> > > > +       unsigned long flags, index;
> > > > +       int err;
> > > > +
> > > > +       if (!vq->reset)
> > > > +               return -EBUSY;
> > > > +
> > > > +       index = vq->index;
> > > > +       info = vp_dev->vqs[index];
> > > > +
> > > > +       /* check queue reset status */
> > > > +       if (vp_modern_get_queue_reset(mdev, index) != 1)
> > > > +               return -EBUSY;
> > > > +
> > > > +       err = vp_active_vq(vq, info->msix_vector);
> > > > +       if (err)
> > > > +               return err;
> > > > +
> > > > +       if (vq->callback) {
> > > > +               spin_lock_irqsave(&vp_dev->lock, flags);
> > > > +               list_add(&info->node, &vp_dev->virtqueues);
> > > > +               spin_unlock_irqrestore(&vp_dev->lock, flags);
> > > > +       } else {
> > > > +               INIT_LIST_HEAD(&info->node);
> > > > +       }
> > > > +
> > > > +       vp_modern_set_queue_enable(&vp_dev->mdev, index, true);
> > > > +
> > > > +       if (vp_dev->per_vq_vectors && info->msix_vector != VIRTIO_MSI_NO_VECTOR)
> > > > +               enable_irq(pci_irq_vector(vp_dev->pci_dev, info->msix_vector));
> > > > +
> > > > +       vq->reset = false;
> > > > +
> > > > +       return 0;
> > > > +}
> > > >
> > > >
> > > > There are three situations where an error will be returned. These are the
> > > > situations I want to handle.
> > >
> > > Right, but it looks harmless if we just schedule the NAPI without the check.
> >
> > Yes.
> >
> > > >
> > > > But I'm rethinking the question, and I feel like you're right, although the
> > > > hardware setup may fail. We can no longer sync with the hardware. But using it
> > > > as a normal vq doesn't have any problems.
> > >
> > > Note that we should make sure the buggy(malicous) device won't crash
> > > the codes by changing the queue_reset value at its will.
> >
> > I will keep an eye on this situation.
> >
> > >
> > > >
> > > > >
> > > > >
> > > > > > +
> > > > > >     /* If all buffers were filled by other side before we napi_enabled, we
> > > > > >      * won't get another interrupt, so process any outstanding packets now.
> > > > > >      * Call local_bh_enable after to trigger softIRQ processing.
> > > > > > @@ -1413,6 +1425,15 @@ static void refill_work(struct work_struct *work)
> > > > > >             struct receive_queue *rq = &vi->rq[i];
> > > > > >
> > > > > >             napi_disable(&rq->napi);
> > > > > > +
> > > > > > +           /* Check if vq is in reset state. See more in
> > > > > > +            * virtnet_napi_enable()
> > > > > > +            */
> > > > > > +           if (rq->vq->reset) {
> > > > > > +                   virtnet_napi_enable(rq->vq, &rq->napi);
> > > > > > +                   continue;
> > > > > > +           }
> > > > >
> > > > >
> > > > > Can we do something similar in virtnet_close() by canceling the work?
> > > >
> > > > I think there is no need to cancel the work here, because napi_disable will wait
> > > > for the napi_enable of the resize. So if the re-enable failed vq is used as a normal
> > > > vq, this logic can be removed.
> > >
> > > Actually I meant the part of virtnet_rx_resize().
> > >
> > > If we don't synchronize with the refill work, it might enable NAPI unexpectedly?
> >
> > I don't think this situation will be encountered, because napi_disable is
> > mutually exclusive, so there will be no unexpected napi enable.
> >
> > Is there something I misunderstood?
>
> So in virtnet_rx_resize() we do:
>
> napi_disable()
> ...
> resize()
> ...
> napi_enalbe()
>
> How can we guarantee that the work is not run after the napi_disable()?


I think you're talking about a situation like this:

virtnet_rx_resize          refill work
-----------------------------------------------------------
 napi_disable()
 ...                       napi_disable()
 resize()                      ...
                           napi_enable()
 ...
 napi_enalbe()


But in fact:

virtnet_rx_resize          refill work
-----------------------------------------------------------
 napi_disable()
 ...                       napi_disable() <----[0]
 resize()                       |
 ...                            |
 napi_enalbe()                  |
                           napi_disable() <---- [1] here success
                           napi_enable()

Because virtnet_rx_resize() has already executed napi_disable(), napi_disalbe()
of [0] will wait until [1] to complete.

I'm not sure if my understanding is correct.

Thanks.

>
> Thanks
>
> >
> > Thanks.
> >
> > >
> > > Thanks
> > >
> > > >
> > > >
> > > > >
> > > > >
> > > > > > +
> > > > > >             still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
> > > > > >             virtnet_napi_enable(rq->vq, &rq->napi);
> > > > > >
> > > > > > @@ -1523,6 +1544,10 @@ static void virtnet_poll_cleantx(struct receive_queue *rq)
> > > > > >     if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index))
> > > > > >             return;
> > > > > >
> > > > > > +   /* Check if vq is in reset state. See more in virtnet_napi_enable() */
> > > > > > +   if (sq->vq->reset)
> > > > > > +           return;
> > > > >
> > > > >
> > > > > We've disabled TX napi, any chance we can still hit this?
> > > >
> > > > Same as above.
> > > >
> > > > >
> > > > >
> > > > > > +
> > > > > >     if (__netif_tx_trylock(txq)) {
> > > > > >             do {
> > > > > >                     virtqueue_disable_cb(sq->vq);
> > > > > > @@ -1769,6 +1794,62 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
> > > > > >     return NETDEV_TX_OK;
> > > > > >   }
> > > > > >
> > > > > > +static int virtnet_rx_resize(struct virtnet_info *vi,
> > > > > > +                        struct receive_queue *rq, u32 ring_num)
> > > > > > +{
> > > > > > +   int err;
> > > > > > +
> > > > > > +   napi_disable(&rq->napi);
> > > > > > +
> > > > > > +   err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_free_unused_buf);
> > > > > > +   if (err)
> > > > > > +           goto err;
> > > > > > +
> > > > > > +   if (!try_fill_recv(vi, rq, GFP_KERNEL))
> > > > > > +           schedule_delayed_work(&vi->refill, 0);
> > > > > > +
> > > > > > +   virtnet_napi_enable(rq->vq, &rq->napi);
> > > > > > +   return 0;
> > > > > > +
> > > > > > +err:
> > > > > > +   netdev_err(vi->dev,
> > > > > > +              "reset rx reset vq fail: rx queue index: %td err: %d\n",
> > > > > > +              rq - vi->rq, err);
> > > > > > +   virtnet_napi_enable(rq->vq, &rq->napi);
> > > > > > +   return err;
> > > > > > +}
> > > > > > +
> > > > > > +static int virtnet_tx_resize(struct virtnet_info *vi,
> > > > > > +                        struct send_queue *sq, u32 ring_num)
> > > > > > +{
> > > > > > +   struct netdev_queue *txq;
> > > > > > +   int err, qindex;
> > > > > > +
> > > > > > +   qindex = sq - vi->sq;
> > > > > > +
> > > > > > +   virtnet_napi_tx_disable(&sq->napi);
> > > > > > +
> > > > > > +   txq = netdev_get_tx_queue(vi->dev, qindex);
> > > > > > +   __netif_tx_lock_bh(txq);
> > > > > > +   netif_stop_subqueue(vi->dev, qindex);
> > > > > > +   __netif_tx_unlock_bh(txq);
> > > > > > +
> > > > > > +   err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf);
> > > > > > +   if (err)
> > > > > > +           goto err;
> > > > > > +
> > > > > > +   netif_start_subqueue(vi->dev, qindex);
> > > > > > +   virtnet_napi_tx_enable(vi, sq->vq, &sq->napi);
> > > > > > +   return 0;
> > > > > > +
> > > > > > +err:
> > > > >
> > > > >
> > > > > I guess we can still start the queue in this case? (Since we don't
> > > > > change the queue if resize fails).
> > > >
> > > > Yes, you are right.
> > > >
> > > > Thanks.
> > > >
> > > > >
> > > > >
> > > > > > +   netdev_err(vi->dev,
> > > > > > +              "reset tx reset vq fail: tx queue index: %td err: %d\n",
> > > > > > +              sq - vi->sq, err);
> > > > > > +   virtnet_napi_tx_enable(vi, sq->vq, &sq->napi);
> > > > > > +   return err;
> > > > > > +}
> > > > > > +
> > > > > >   /*
> > > > > >    * Send command via the control virtqueue and check status.  Commands
> > > > > >    * supported by the hypervisor, as indicated by feature bits, should
> > > > >
> > > >
> > >
> >
>
Xuan Zhuo April 18, 2022, 3:21 a.m. UTC | #7
On Wed, 13 Apr 2022 16:00:18 +0800, Jason Wang <jasowang@redhat.com> wrote:
>
> 在 2022/4/6 上午11:43, Xuan Zhuo 写道:
> > This patch implements the resize function of the rx, tx queues.
> > Based on this function, it is possible to modify the ring num of the
> > queue.
> >
> > There may be an exception during the resize process, the resize may
> > fail, or the vq can no longer be used. Either way, we must execute
> > napi_enable(). Because napi_disable is similar to a lock, napi_enable
> > must be called after calling napi_disable.
> >
> > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > ---
> >   drivers/net/virtio_net.c | 81 ++++++++++++++++++++++++++++++++++++++++
> >   1 file changed, 81 insertions(+)
> >
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index b8bf00525177..ba6859f305f7 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -251,6 +251,9 @@ struct padded_vnet_hdr {
> >   	char padding[4];
> >   };
> >
> > +static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf);
> > +static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf);
> > +
> >   static bool is_xdp_frame(void *ptr)
> >   {
> >   	return (unsigned long)ptr & VIRTIO_XDP_FLAG;
> > @@ -1369,6 +1372,15 @@ static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi)
> >   {
> >   	napi_enable(napi);
> >
> > +	/* Check if vq is in reset state. The normal reset/resize process will
> > +	 * be protected by napi. However, the protection of napi is only enabled
> > +	 * during the operation, and the protection of napi will end after the
> > +	 * operation is completed. If re-enable fails during the process, vq
> > +	 * will remain unavailable with reset state.
> > +	 */
> > +	if (vq->reset)
> > +		return;
>
>
> I don't get when could we hit this condition.
>
>
> > +
> >   	/* If all buffers were filled by other side before we napi_enabled, we
> >   	 * won't get another interrupt, so process any outstanding packets now.
> >   	 * Call local_bh_enable after to trigger softIRQ processing.
> > @@ -1413,6 +1425,15 @@ static void refill_work(struct work_struct *work)
> >   		struct receive_queue *rq = &vi->rq[i];
> >
> >   		napi_disable(&rq->napi);
> > +
> > +		/* Check if vq is in reset state. See more in
> > +		 * virtnet_napi_enable()
> > +		 */
> > +		if (rq->vq->reset) {
> > +			virtnet_napi_enable(rq->vq, &rq->napi);
> > +			continue;
> > +		}
>
>
> Can we do something similar in virtnet_close() by canceling the work?
>
>
> > +
> >   		still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
> >   		virtnet_napi_enable(rq->vq, &rq->napi);
> >
> > @@ -1523,6 +1544,10 @@ static void virtnet_poll_cleantx(struct receive_queue *rq)
> >   	if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index))
> >   		return;
> >
> > +	/* Check if vq is in reset state. See more in virtnet_napi_enable() */
> > +	if (sq->vq->reset)
> > +		return;
>
>
> We've disabled TX napi, any chance we can still hit this?


static int virtnet_poll(struct napi_struct *napi, int budget)
{
	struct receive_queue *rq =
		container_of(napi, struct receive_queue, napi);
	struct virtnet_info *vi = rq->vq->vdev->priv;
	struct send_queue *sq;
	unsigned int received;
	unsigned int xdp_xmit = 0;

	virtnet_poll_cleantx(rq);
...
}

This is called by rx poll. Although it is the logic of tx, it is not driven by
tx napi, but is called in rx poll.

Thanks.


>
>
> > +
> >   	if (__netif_tx_trylock(txq)) {
> >   		do {
> >   			virtqueue_disable_cb(sq->vq);
> > @@ -1769,6 +1794,62 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
> >   	return NETDEV_TX_OK;
> >   }
> >
> > +static int virtnet_rx_resize(struct virtnet_info *vi,
> > +			     struct receive_queue *rq, u32 ring_num)
> > +{
> > +	int err;
> > +
> > +	napi_disable(&rq->napi);
> > +
> > +	err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_free_unused_buf);
> > +	if (err)
> > +		goto err;
> > +
> > +	if (!try_fill_recv(vi, rq, GFP_KERNEL))
> > +		schedule_delayed_work(&vi->refill, 0);
> > +
> > +	virtnet_napi_enable(rq->vq, &rq->napi);
> > +	return 0;
> > +
> > +err:
> > +	netdev_err(vi->dev,
> > +		   "reset rx reset vq fail: rx queue index: %td err: %d\n",
> > +		   rq - vi->rq, err);
> > +	virtnet_napi_enable(rq->vq, &rq->napi);
> > +	return err;
> > +}
> > +
> > +static int virtnet_tx_resize(struct virtnet_info *vi,
> > +			     struct send_queue *sq, u32 ring_num)
> > +{
> > +	struct netdev_queue *txq;
> > +	int err, qindex;
> > +
> > +	qindex = sq - vi->sq;
> > +
> > +	virtnet_napi_tx_disable(&sq->napi);
> > +
> > +	txq = netdev_get_tx_queue(vi->dev, qindex);
> > +	__netif_tx_lock_bh(txq);
> > +	netif_stop_subqueue(vi->dev, qindex);
> > +	__netif_tx_unlock_bh(txq);
> > +
> > +	err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf);
> > +	if (err)
> > +		goto err;
> > +
> > +	netif_start_subqueue(vi->dev, qindex);
> > +	virtnet_napi_tx_enable(vi, sq->vq, &sq->napi);
> > +	return 0;
> > +
> > +err:
>
>
> I guess we can still start the queue in this case? (Since we don't
> change the queue if resize fails).
>
>
> > +	netdev_err(vi->dev,
> > +		   "reset tx reset vq fail: tx queue index: %td err: %d\n",
> > +		   sq - vi->sq, err);
> > +	virtnet_napi_tx_enable(vi, sq->vq, &sq->napi);
> > +	return err;
> > +}
> > +
> >   /*
> >    * Send command via the control virtqueue and check status.  Commands
> >    * supported by the hypervisor, as indicated by feature bits, should
>
Jason Wang April 18, 2022, 7:49 a.m. UTC | #8
On Mon, Apr 18, 2022 at 11:24 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>
> On Wed, 13 Apr 2022 16:00:18 +0800, Jason Wang <jasowang@redhat.com> wrote:
> >
> > 在 2022/4/6 上午11:43, Xuan Zhuo 写道:
> > > This patch implements the resize function of the rx, tx queues.
> > > Based on this function, it is possible to modify the ring num of the
> > > queue.
> > >
> > > There may be an exception during the resize process, the resize may
> > > fail, or the vq can no longer be used. Either way, we must execute
> > > napi_enable(). Because napi_disable is similar to a lock, napi_enable
> > > must be called after calling napi_disable.
> > >
> > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > ---
> > >   drivers/net/virtio_net.c | 81 ++++++++++++++++++++++++++++++++++++++++
> > >   1 file changed, 81 insertions(+)
> > >
> > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > index b8bf00525177..ba6859f305f7 100644
> > > --- a/drivers/net/virtio_net.c
> > > +++ b/drivers/net/virtio_net.c
> > > @@ -251,6 +251,9 @@ struct padded_vnet_hdr {
> > >     char padding[4];
> > >   };
> > >
> > > +static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf);
> > > +static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf);
> > > +
> > >   static bool is_xdp_frame(void *ptr)
> > >   {
> > >     return (unsigned long)ptr & VIRTIO_XDP_FLAG;
> > > @@ -1369,6 +1372,15 @@ static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi)
> > >   {
> > >     napi_enable(napi);
> > >
> > > +   /* Check if vq is in reset state. The normal reset/resize process will
> > > +    * be protected by napi. However, the protection of napi is only enabled
> > > +    * during the operation, and the protection of napi will end after the
> > > +    * operation is completed. If re-enable fails during the process, vq
> > > +    * will remain unavailable with reset state.
> > > +    */
> > > +   if (vq->reset)
> > > +           return;
> >
> >
> > I don't get when could we hit this condition.
> >
> >
> > > +
> > >     /* If all buffers were filled by other side before we napi_enabled, we
> > >      * won't get another interrupt, so process any outstanding packets now.
> > >      * Call local_bh_enable after to trigger softIRQ processing.
> > > @@ -1413,6 +1425,15 @@ static void refill_work(struct work_struct *work)
> > >             struct receive_queue *rq = &vi->rq[i];
> > >
> > >             napi_disable(&rq->napi);
> > > +
> > > +           /* Check if vq is in reset state. See more in
> > > +            * virtnet_napi_enable()
> > > +            */
> > > +           if (rq->vq->reset) {
> > > +                   virtnet_napi_enable(rq->vq, &rq->napi);
> > > +                   continue;
> > > +           }
> >
> >
> > Can we do something similar in virtnet_close() by canceling the work?
> >
> >
> > > +
> > >             still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
> > >             virtnet_napi_enable(rq->vq, &rq->napi);
> > >
> > > @@ -1523,6 +1544,10 @@ static void virtnet_poll_cleantx(struct receive_queue *rq)
> > >     if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index))
> > >             return;
> > >
> > > +   /* Check if vq is in reset state. See more in virtnet_napi_enable() */
> > > +   if (sq->vq->reset)
> > > +           return;
> >
> >
> > We've disabled TX napi, any chance we can still hit this?
>
>
> static int virtnet_poll(struct napi_struct *napi, int budget)
> {
>         struct receive_queue *rq =
>                 container_of(napi, struct receive_queue, napi);
>         struct virtnet_info *vi = rq->vq->vdev->priv;
>         struct send_queue *sq;
>         unsigned int received;
>         unsigned int xdp_xmit = 0;
>
>         virtnet_poll_cleantx(rq);
> ...
> }
>
> This is called by rx poll. Although it is the logic of tx, it is not driven by
> tx napi, but is called in rx poll.

Ok, but we need guarantee the memory ordering in this case. Disable RX
napi could be a solution for this.

Thanks

>
> Thanks.
>
>
> >
> >
> > > +
> > >     if (__netif_tx_trylock(txq)) {
> > >             do {
> > >                     virtqueue_disable_cb(sq->vq);
> > > @@ -1769,6 +1794,62 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
> > >     return NETDEV_TX_OK;
> > >   }
> > >
> > > +static int virtnet_rx_resize(struct virtnet_info *vi,
> > > +                        struct receive_queue *rq, u32 ring_num)
> > > +{
> > > +   int err;
> > > +
> > > +   napi_disable(&rq->napi);
> > > +
> > > +   err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_free_unused_buf);
> > > +   if (err)
> > > +           goto err;
> > > +
> > > +   if (!try_fill_recv(vi, rq, GFP_KERNEL))
> > > +           schedule_delayed_work(&vi->refill, 0);
> > > +
> > > +   virtnet_napi_enable(rq->vq, &rq->napi);
> > > +   return 0;
> > > +
> > > +err:
> > > +   netdev_err(vi->dev,
> > > +              "reset rx reset vq fail: rx queue index: %td err: %d\n",
> > > +              rq - vi->rq, err);
> > > +   virtnet_napi_enable(rq->vq, &rq->napi);
> > > +   return err;
> > > +}
> > > +
> > > +static int virtnet_tx_resize(struct virtnet_info *vi,
> > > +                        struct send_queue *sq, u32 ring_num)
> > > +{
> > > +   struct netdev_queue *txq;
> > > +   int err, qindex;
> > > +
> > > +   qindex = sq - vi->sq;
> > > +
> > > +   virtnet_napi_tx_disable(&sq->napi);
> > > +
> > > +   txq = netdev_get_tx_queue(vi->dev, qindex);
> > > +   __netif_tx_lock_bh(txq);
> > > +   netif_stop_subqueue(vi->dev, qindex);
> > > +   __netif_tx_unlock_bh(txq);
> > > +
> > > +   err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf);
> > > +   if (err)
> > > +           goto err;
> > > +
> > > +   netif_start_subqueue(vi->dev, qindex);
> > > +   virtnet_napi_tx_enable(vi, sq->vq, &sq->napi);
> > > +   return 0;
> > > +
> > > +err:
> >
> >
> > I guess we can still start the queue in this case? (Since we don't
> > change the queue if resize fails).
> >
> >
> > > +   netdev_err(vi->dev,
> > > +              "reset tx reset vq fail: tx queue index: %td err: %d\n",
> > > +              sq - vi->sq, err);
> > > +   virtnet_napi_tx_enable(vi, sq->vq, &sq->napi);
> > > +   return err;
> > > +}
> > > +
> > >   /*
> > >    * Send command via the control virtqueue and check status.  Commands
> > >    * supported by the hypervisor, as indicated by feature bits, should
> >
>
Jason Wang April 18, 2022, 7:57 a.m. UTC | #9
在 2022/4/15 17:17, Xuan Zhuo 写道:
> On Fri, 15 Apr 2022 13:53:54 +0800, Jason Wang <jasowang@redhat.com> wrote:
>> On Fri, Apr 15, 2022 at 10:23 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>>> On Thu, 14 Apr 2022 17:30:02 +0800, Jason Wang <jasowang@redhat.com> wrote:
>>>> On Wed, Apr 13, 2022 at 4:47 PM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
>>>>> On Wed, 13 Apr 2022 16:00:18 +0800, Jason Wang <jasowang@redhat.com> wrote:
>>>>>> 在 2022/4/6 上午11:43, Xuan Zhuo 写道:
>>>>>>> This patch implements the resize function of the rx, tx queues.
>>>>>>> Based on this function, it is possible to modify the ring num of the
>>>>>>> queue.
>>>>>>>
>>>>>>> There may be an exception during the resize process, the resize may
>>>>>>> fail, or the vq can no longer be used. Either way, we must execute
>>>>>>> napi_enable(). Because napi_disable is similar to a lock, napi_enable
>>>>>>> must be called after calling napi_disable.
>>>>>>>
>>>>>>> Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
>>>>>>> ---
>>>>>>>    drivers/net/virtio_net.c | 81 ++++++++++++++++++++++++++++++++++++++++
>>>>>>>    1 file changed, 81 insertions(+)
>>>>>>>
>>>>>>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>>>>>>> index b8bf00525177..ba6859f305f7 100644
>>>>>>> --- a/drivers/net/virtio_net.c
>>>>>>> +++ b/drivers/net/virtio_net.c
>>>>>>> @@ -251,6 +251,9 @@ struct padded_vnet_hdr {
>>>>>>>      char padding[4];
>>>>>>>    };
>>>>>>>
>>>>>>> +static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf);
>>>>>>> +static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf);
>>>>>>> +
>>>>>>>    static bool is_xdp_frame(void *ptr)
>>>>>>>    {
>>>>>>>      return (unsigned long)ptr & VIRTIO_XDP_FLAG;
>>>>>>> @@ -1369,6 +1372,15 @@ static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi)
>>>>>>>    {
>>>>>>>      napi_enable(napi);
>>>>>>>
>>>>>>> +   /* Check if vq is in reset state. The normal reset/resize process will
>>>>>>> +    * be protected by napi. However, the protection of napi is only enabled
>>>>>>> +    * during the operation, and the protection of napi will end after the
>>>>>>> +    * operation is completed. If re-enable fails during the process, vq
>>>>>>> +    * will remain unavailable with reset state.
>>>>>>> +    */
>>>>>>> +   if (vq->reset)
>>>>>>> +           return;
>>>>>>
>>>>>> I don't get when could we hit this condition.
>>>>>
>>>>> In patch 23, the code to implement re-enable vq is as follows:
>>>>>
>>>>> +static int vp_modern_enable_reset_vq(struct virtqueue *vq)
>>>>> +{
>>>>> +       struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
>>>>> +       struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
>>>>> +       struct virtio_pci_vq_info *info;
>>>>> +       unsigned long flags, index;
>>>>> +       int err;
>>>>> +
>>>>> +       if (!vq->reset)
>>>>> +               return -EBUSY;
>>>>> +
>>>>> +       index = vq->index;
>>>>> +       info = vp_dev->vqs[index];
>>>>> +
>>>>> +       /* check queue reset status */
>>>>> +       if (vp_modern_get_queue_reset(mdev, index) != 1)
>>>>> +               return -EBUSY;
>>>>> +
>>>>> +       err = vp_active_vq(vq, info->msix_vector);
>>>>> +       if (err)
>>>>> +               return err;
>>>>> +
>>>>> +       if (vq->callback) {
>>>>> +               spin_lock_irqsave(&vp_dev->lock, flags);
>>>>> +               list_add(&info->node, &vp_dev->virtqueues);
>>>>> +               spin_unlock_irqrestore(&vp_dev->lock, flags);
>>>>> +       } else {
>>>>> +               INIT_LIST_HEAD(&info->node);
>>>>> +       }
>>>>> +
>>>>> +       vp_modern_set_queue_enable(&vp_dev->mdev, index, true);
>>>>> +
>>>>> +       if (vp_dev->per_vq_vectors && info->msix_vector != VIRTIO_MSI_NO_VECTOR)
>>>>> +               enable_irq(pci_irq_vector(vp_dev->pci_dev, info->msix_vector));
>>>>> +
>>>>> +       vq->reset = false;
>>>>> +
>>>>> +       return 0;
>>>>> +}
>>>>>
>>>>>
>>>>> There are three situations where an error will be returned. These are the
>>>>> situations I want to handle.
>>>> Right, but it looks harmless if we just schedule the NAPI without the check.
>>> Yes.
>>>
>>>>> But I'm rethinking the question, and I feel like you're right, although the
>>>>> hardware setup may fail. We can no longer sync with the hardware. But using it
>>>>> as a normal vq doesn't have any problems.
>>>> Note that we should make sure the buggy(malicous) device won't crash
>>>> the codes by changing the queue_reset value at its will.
>>> I will keep an eye on this situation.
>>>
>>>>>>
>>>>>>> +
>>>>>>>      /* If all buffers were filled by other side before we napi_enabled, we
>>>>>>>       * won't get another interrupt, so process any outstanding packets now.
>>>>>>>       * Call local_bh_enable after to trigger softIRQ processing.
>>>>>>> @@ -1413,6 +1425,15 @@ static void refill_work(struct work_struct *work)
>>>>>>>              struct receive_queue *rq = &vi->rq[i];
>>>>>>>
>>>>>>>              napi_disable(&rq->napi);
>>>>>>> +
>>>>>>> +           /* Check if vq is in reset state. See more in
>>>>>>> +            * virtnet_napi_enable()
>>>>>>> +            */
>>>>>>> +           if (rq->vq->reset) {
>>>>>>> +                   virtnet_napi_enable(rq->vq, &rq->napi);
>>>>>>> +                   continue;
>>>>>>> +           }
>>>>>>
>>>>>> Can we do something similar in virtnet_close() by canceling the work?
>>>>> I think there is no need to cancel the work here, because napi_disable will wait
>>>>> for the napi_enable of the resize. So if the re-enable failed vq is used as a normal
>>>>> vq, this logic can be removed.
>>>> Actually I meant the part of virtnet_rx_resize().
>>>>
>>>> If we don't synchronize with the refill work, it might enable NAPI unexpectedly?
>>> I don't think this situation will be encountered, because napi_disable is
>>> mutually exclusive, so there will be no unexpected napi enable.
>>>
>>> Is there something I misunderstood?
>> So in virtnet_rx_resize() we do:
>>
>> napi_disable()
>> ...
>> resize()
>> ...
>> napi_enalbe()
>>
>> How can we guarantee that the work is not run after the napi_disable()?
>
> I think you're talking about a situation like this:
>
> virtnet_rx_resize          refill work
> -----------------------------------------------------------
>   napi_disable()
>   ...                       napi_disable()
>   resize()                      ...
>                             napi_enable()
>   ...
>   napi_enalbe()
>
>
> But in fact:
>
> virtnet_rx_resize          refill work
> -----------------------------------------------------------
>   napi_disable()
>   ...                       napi_disable() <----[0]
>   resize()                       |
>   ...                            |
>   napi_enalbe()                  |
>                             napi_disable() <---- [1] here success
>                             napi_enable()
>
> Because virtnet_rx_resize() has already executed napi_disable(), napi_disalbe()
> of [0] will wait until [1] to complete.
>
> I'm not sure if my understanding is correct.


I think you're right here.

Thanks


>
> Thanks.
>
>> Thanks
>>
>>> Thanks.
>>>
>>>> Thanks
>>>>
>>>>>
>>>>>>
>>>>>>> +
>>>>>>>              still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
>>>>>>>              virtnet_napi_enable(rq->vq, &rq->napi);
>>>>>>>
>>>>>>> @@ -1523,6 +1544,10 @@ static void virtnet_poll_cleantx(struct receive_queue *rq)
>>>>>>>      if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index))
>>>>>>>              return;
>>>>>>>
>>>>>>> +   /* Check if vq is in reset state. See more in virtnet_napi_enable() */
>>>>>>> +   if (sq->vq->reset)
>>>>>>> +           return;
>>>>>>
>>>>>> We've disabled TX napi, any chance we can still hit this?
>>>>> Same as above.
>>>>>
>>>>>>
>>>>>>> +
>>>>>>>      if (__netif_tx_trylock(txq)) {
>>>>>>>              do {
>>>>>>>                      virtqueue_disable_cb(sq->vq);
>>>>>>> @@ -1769,6 +1794,62 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
>>>>>>>      return NETDEV_TX_OK;
>>>>>>>    }
>>>>>>>
>>>>>>> +static int virtnet_rx_resize(struct virtnet_info *vi,
>>>>>>> +                        struct receive_queue *rq, u32 ring_num)
>>>>>>> +{
>>>>>>> +   int err;
>>>>>>> +
>>>>>>> +   napi_disable(&rq->napi);
>>>>>>> +
>>>>>>> +   err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_free_unused_buf);
>>>>>>> +   if (err)
>>>>>>> +           goto err;
>>>>>>> +
>>>>>>> +   if (!try_fill_recv(vi, rq, GFP_KERNEL))
>>>>>>> +           schedule_delayed_work(&vi->refill, 0);
>>>>>>> +
>>>>>>> +   virtnet_napi_enable(rq->vq, &rq->napi);
>>>>>>> +   return 0;
>>>>>>> +
>>>>>>> +err:
>>>>>>> +   netdev_err(vi->dev,
>>>>>>> +              "reset rx reset vq fail: rx queue index: %td err: %d\n",
>>>>>>> +              rq - vi->rq, err);
>>>>>>> +   virtnet_napi_enable(rq->vq, &rq->napi);
>>>>>>> +   return err;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static int virtnet_tx_resize(struct virtnet_info *vi,
>>>>>>> +                        struct send_queue *sq, u32 ring_num)
>>>>>>> +{
>>>>>>> +   struct netdev_queue *txq;
>>>>>>> +   int err, qindex;
>>>>>>> +
>>>>>>> +   qindex = sq - vi->sq;
>>>>>>> +
>>>>>>> +   virtnet_napi_tx_disable(&sq->napi);
>>>>>>> +
>>>>>>> +   txq = netdev_get_tx_queue(vi->dev, qindex);
>>>>>>> +   __netif_tx_lock_bh(txq);
>>>>>>> +   netif_stop_subqueue(vi->dev, qindex);
>>>>>>> +   __netif_tx_unlock_bh(txq);
>>>>>>> +
>>>>>>> +   err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf);
>>>>>>> +   if (err)
>>>>>>> +           goto err;
>>>>>>> +
>>>>>>> +   netif_start_subqueue(vi->dev, qindex);
>>>>>>> +   virtnet_napi_tx_enable(vi, sq->vq, &sq->napi);
>>>>>>> +   return 0;
>>>>>>> +
>>>>>>> +err:
>>>>>>
>>>>>> I guess we can still start the queue in this case? (Since we don't
>>>>>> change the queue if resize fails).
>>>>> Yes, you are right.
>>>>>
>>>>> Thanks.
>>>>>
>>>>>>
>>>>>>> +   netdev_err(vi->dev,
>>>>>>> +              "reset tx reset vq fail: tx queue index: %td err: %d\n",
>>>>>>> +              sq - vi->sq, err);
>>>>>>> +   virtnet_napi_tx_enable(vi, sq->vq, &sq->napi);
>>>>>>> +   return err;
>>>>>>> +}
>>>>>>> +
>>>>>>>    /*
>>>>>>>     * Send command via the control virtqueue and check status.  Commands
>>>>>>>     * supported by the hypervisor, as indicated by feature bits, should
Xuan Zhuo April 18, 2022, 8:48 a.m. UTC | #10
On Mon, 18 Apr 2022 15:49:29 +0800, Jason Wang <jasowang@redhat.com> wrote:
> On Mon, Apr 18, 2022 at 11:24 AM Xuan Zhuo <xuanzhuo@linux.alibaba.com> wrote:
> >
> > On Wed, 13 Apr 2022 16:00:18 +0800, Jason Wang <jasowang@redhat.com> wrote:
> > >
> > > 在 2022/4/6 上午11:43, Xuan Zhuo 写道:
> > > > This patch implements the resize function of the rx, tx queues.
> > > > Based on this function, it is possible to modify the ring num of the
> > > > queue.
> > > >
> > > > There may be an exception during the resize process, the resize may
> > > > fail, or the vq can no longer be used. Either way, we must execute
> > > > napi_enable(). Because napi_disable is similar to a lock, napi_enable
> > > > must be called after calling napi_disable.
> > > >
> > > > Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
> > > > ---
> > > >   drivers/net/virtio_net.c | 81 ++++++++++++++++++++++++++++++++++++++++
> > > >   1 file changed, 81 insertions(+)
> > > >
> > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > > index b8bf00525177..ba6859f305f7 100644
> > > > --- a/drivers/net/virtio_net.c
> > > > +++ b/drivers/net/virtio_net.c
> > > > @@ -251,6 +251,9 @@ struct padded_vnet_hdr {
> > > >     char padding[4];
> > > >   };
> > > >
> > > > +static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf);
> > > > +static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf);
> > > > +
> > > >   static bool is_xdp_frame(void *ptr)
> > > >   {
> > > >     return (unsigned long)ptr & VIRTIO_XDP_FLAG;
> > > > @@ -1369,6 +1372,15 @@ static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi)
> > > >   {
> > > >     napi_enable(napi);
> > > >
> > > > +   /* Check if vq is in reset state. The normal reset/resize process will
> > > > +    * be protected by napi. However, the protection of napi is only enabled
> > > > +    * during the operation, and the protection of napi will end after the
> > > > +    * operation is completed. If re-enable fails during the process, vq
> > > > +    * will remain unavailable with reset state.
> > > > +    */
> > > > +   if (vq->reset)
> > > > +           return;
> > >
> > >
> > > I don't get when could we hit this condition.
> > >
> > >
> > > > +
> > > >     /* If all buffers were filled by other side before we napi_enabled, we
> > > >      * won't get another interrupt, so process any outstanding packets now.
> > > >      * Call local_bh_enable after to trigger softIRQ processing.
> > > > @@ -1413,6 +1425,15 @@ static void refill_work(struct work_struct *work)
> > > >             struct receive_queue *rq = &vi->rq[i];
> > > >
> > > >             napi_disable(&rq->napi);
> > > > +
> > > > +           /* Check if vq is in reset state. See more in
> > > > +            * virtnet_napi_enable()
> > > > +            */
> > > > +           if (rq->vq->reset) {
> > > > +                   virtnet_napi_enable(rq->vq, &rq->napi);
> > > > +                   continue;
> > > > +           }
> > >
> > >
> > > Can we do something similar in virtnet_close() by canceling the work?
> > >
> > >
> > > > +
> > > >             still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
> > > >             virtnet_napi_enable(rq->vq, &rq->napi);
> > > >
> > > > @@ -1523,6 +1544,10 @@ static void virtnet_poll_cleantx(struct receive_queue *rq)
> > > >     if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index))
> > > >             return;
> > > >
> > > > +   /* Check if vq is in reset state. See more in virtnet_napi_enable() */
> > > > +   if (sq->vq->reset)
> > > > +           return;
> > >
> > >
> > > We've disabled TX napi, any chance we can still hit this?
> >
> >
> > static int virtnet_poll(struct napi_struct *napi, int budget)
> > {
> >         struct receive_queue *rq =
> >                 container_of(napi, struct receive_queue, napi);
> >         struct virtnet_info *vi = rq->vq->vdev->priv;
> >         struct send_queue *sq;
> >         unsigned int received;
> >         unsigned int xdp_xmit = 0;
> >
> >         virtnet_poll_cleantx(rq);
> > ...
> > }
> >
> > This is called by rx poll. Although it is the logic of tx, it is not driven by
> > tx napi, but is called in rx poll.
>
> Ok, but we need guarantee the memory ordering in this case. Disable RX
> napi could be a solution for this.

Yes, I have realized this too. I have two solutions, disable rx napi or the
following.

Thanks.


diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 9bf1b6530b38..7764d1dcb831 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -135,6 +135,7 @@ struct send_queue {
 	struct virtnet_sq_stats stats;

 	struct napi_struct napi;
+	bool reset;
 };

 /* Internal representation of a receive virtqueue */
@@ -1583,6 +1587,11 @@ static void virtnet_poll_cleantx(struct receive_queue *rq)
 		return;

 	if (__netif_tx_trylock(txq)) {
+		if (sq->reset) {
+			__netif_tx_unlock(txq);
+			return;
+		}
+
 		do {
 			virtqueue_disable_cb(sq->vq);
 			free_old_xmit_skbs(sq, true);
@@ -1828,6 +1837,56 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 	return NETDEV_TX_OK;
 }

+static int virtnet_tx_resize(struct virtnet_info *vi,
+			     struct send_queue *sq, u32 ring_num)
+{
+	struct netdev_queue *txq;
+	int err, qindex;
+
+	qindex = sq - vi->sq;
+
+	virtnet_napi_tx_disable(&sq->napi);
+
+	txq = netdev_get_tx_queue(vi->dev, qindex);
+
+	__netif_tx_lock_bh(txq);
+	netif_stop_subqueue(vi->dev, qindex);
+	sq->reset = true;
+	__netif_tx_unlock_bh(txq);
+
+	err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf);
+	if (err)
+		netdev_err(vi->dev, "resize tx fail: tx queue index: %d err: %d\n", qindex, err);
+
+	__netif_tx_lock_bh(txq);
+	sq->reset = false;
+	netif_start_subqueue(vi->dev, qindex);
+	__netif_tx_unlock_bh(txq);
+
+	virtnet_napi_tx_enable(vi, sq->vq, &sq->napi);
+	return err;
+}
+
diff mbox series

Patch

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index b8bf00525177..ba6859f305f7 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -251,6 +251,9 @@  struct padded_vnet_hdr {
 	char padding[4];
 };
 
+static void virtnet_sq_free_unused_buf(struct virtqueue *vq, void *buf);
+static void virtnet_rq_free_unused_buf(struct virtqueue *vq, void *buf);
+
 static bool is_xdp_frame(void *ptr)
 {
 	return (unsigned long)ptr & VIRTIO_XDP_FLAG;
@@ -1369,6 +1372,15 @@  static void virtnet_napi_enable(struct virtqueue *vq, struct napi_struct *napi)
 {
 	napi_enable(napi);
 
+	/* Check if vq is in reset state. The normal reset/resize process will
+	 * be protected by napi. However, the protection of napi is only enabled
+	 * during the operation, and the protection of napi will end after the
+	 * operation is completed. If re-enable fails during the process, vq
+	 * will remain unavailable with reset state.
+	 */
+	if (vq->reset)
+		return;
+
 	/* If all buffers were filled by other side before we napi_enabled, we
 	 * won't get another interrupt, so process any outstanding packets now.
 	 * Call local_bh_enable after to trigger softIRQ processing.
@@ -1413,6 +1425,15 @@  static void refill_work(struct work_struct *work)
 		struct receive_queue *rq = &vi->rq[i];
 
 		napi_disable(&rq->napi);
+
+		/* Check if vq is in reset state. See more in
+		 * virtnet_napi_enable()
+		 */
+		if (rq->vq->reset) {
+			virtnet_napi_enable(rq->vq, &rq->napi);
+			continue;
+		}
+
 		still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
 		virtnet_napi_enable(rq->vq, &rq->napi);
 
@@ -1523,6 +1544,10 @@  static void virtnet_poll_cleantx(struct receive_queue *rq)
 	if (!sq->napi.weight || is_xdp_raw_buffer_queue(vi, index))
 		return;
 
+	/* Check if vq is in reset state. See more in virtnet_napi_enable() */
+	if (sq->vq->reset)
+		return;
+
 	if (__netif_tx_trylock(txq)) {
 		do {
 			virtqueue_disable_cb(sq->vq);
@@ -1769,6 +1794,62 @@  static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 	return NETDEV_TX_OK;
 }
 
+static int virtnet_rx_resize(struct virtnet_info *vi,
+			     struct receive_queue *rq, u32 ring_num)
+{
+	int err;
+
+	napi_disable(&rq->napi);
+
+	err = virtqueue_resize(rq->vq, ring_num, virtnet_rq_free_unused_buf);
+	if (err)
+		goto err;
+
+	if (!try_fill_recv(vi, rq, GFP_KERNEL))
+		schedule_delayed_work(&vi->refill, 0);
+
+	virtnet_napi_enable(rq->vq, &rq->napi);
+	return 0;
+
+err:
+	netdev_err(vi->dev,
+		   "reset rx reset vq fail: rx queue index: %td err: %d\n",
+		   rq - vi->rq, err);
+	virtnet_napi_enable(rq->vq, &rq->napi);
+	return err;
+}
+
+static int virtnet_tx_resize(struct virtnet_info *vi,
+			     struct send_queue *sq, u32 ring_num)
+{
+	struct netdev_queue *txq;
+	int err, qindex;
+
+	qindex = sq - vi->sq;
+
+	virtnet_napi_tx_disable(&sq->napi);
+
+	txq = netdev_get_tx_queue(vi->dev, qindex);
+	__netif_tx_lock_bh(txq);
+	netif_stop_subqueue(vi->dev, qindex);
+	__netif_tx_unlock_bh(txq);
+
+	err = virtqueue_resize(sq->vq, ring_num, virtnet_sq_free_unused_buf);
+	if (err)
+		goto err;
+
+	netif_start_subqueue(vi->dev, qindex);
+	virtnet_napi_tx_enable(vi, sq->vq, &sq->napi);
+	return 0;
+
+err:
+	netdev_err(vi->dev,
+		   "reset tx reset vq fail: tx queue index: %td err: %d\n",
+		   sq - vi->sq, err);
+	virtnet_napi_tx_enable(vi, sq->vq, &sq->napi);
+	return err;
+}
+
 /*
  * Send command via the control virtqueue and check status.  Commands
  * supported by the hypervisor, as indicated by feature bits, should