Message ID | 4d3a1478-b6fc-47a3-8d77-7eca6a973a06@gmail.com (mailing list archive) |
---|---|
State | Changes Requested |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | virtio-net: hold netdev_lock when pausing rx | expand |
Context | Check | Description |
---|---|---|
netdev/tree_selection | success | Guessing tree name failed - patch did not apply |
On Thu, Apr 10, 2025 at 02:05:57PM +0700, Bui Quang Minh wrote: > When pausing rx (e.g. set up xdp, xsk pool, rx resize), we call > napi_disable() on the receive queue's napi. In delayed refill_work, it > also calls napi_disable() on the receive queue's napi. When > napi_disable() is called on an already disabled napi, it will sleep in > napi_disable_locked while still holding the netdev_lock. As a result, > later napi_enable gets stuck too as it cannot acquire the netdev_lock. > This leads to refill_work and the pause-then-resume tx are stuck > altogether. > > This scenario can be reproducible by binding a XDP socket to virtio-net > interface without setting up the fill ring. As a result, try_fill_recv > will fail until the fill ring is set up and refill_work is scheduled. > > This commit makes the pausing rx path hold the netdev_lock until > resuming, prevent any napi_disable() to be called on a temporarily > disabled napi. > > Fixes: 413f0271f396 ("net: protect NAPI enablement with netdev_lock()") > Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com> > --- > drivers/net/virtio_net.c | 74 +++++++++++++++++++++++++--------------- > 1 file changed, 47 insertions(+), 27 deletions(-) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index 7e4617216a4b..74bd1065c586 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -2786,9 +2786,13 @@ static void skb_recv_done(struct virtqueue *rvq) > } > > static void virtnet_napi_do_enable(struct virtqueue *vq, > - struct napi_struct *napi) > + struct napi_struct *napi, > + bool netdev_locked) > { > - napi_enable(napi); > + if (netdev_locked) > + napi_enable_locked(napi); > + else > + napi_enable(napi); > > /* If all buffers were filled by other side before we napi_enabled, we > * won't get another interrupt, so process any outstanding packets now. > @@ -2799,16 +2803,16 @@ static void virtnet_napi_do_enable(struct virtqueue > *vq, Your patch is line-wrapped, unfortunately. Here and elsewhere. > local_bh_enable(); > } > > -static void virtnet_napi_enable(struct receive_queue *rq) > +static void virtnet_napi_enable(struct receive_queue *rq, bool > netdev_locked) > { > struct virtnet_info *vi = rq->vq->vdev->priv; > int qidx = vq2rxq(rq->vq); > > - virtnet_napi_do_enable(rq->vq, &rq->napi); > + virtnet_napi_do_enable(rq->vq, &rq->napi, netdev_locked); > netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, &rq->napi); > } > > -static void virtnet_napi_tx_enable(struct send_queue *sq) > +static void virtnet_napi_tx_enable(struct send_queue *sq, bool > netdev_locked) > { > struct virtnet_info *vi = sq->vq->vdev->priv; > struct napi_struct *napi = &sq->napi; > @@ -2825,11 +2829,11 @@ static void virtnet_napi_tx_enable(struct send_queue > *sq) > return; > } > > - virtnet_napi_do_enable(sq->vq, napi); > + virtnet_napi_do_enable(sq->vq, napi, netdev_locked); > netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, napi); > } > > -static void virtnet_napi_tx_disable(struct send_queue *sq) > +static void virtnet_napi_tx_disable(struct send_queue *sq, bool > netdev_locked) > { > struct virtnet_info *vi = sq->vq->vdev->priv; > struct napi_struct *napi = &sq->napi; > @@ -2837,18 +2841,24 @@ static void virtnet_napi_tx_disable(struct > send_queue *sq) > > if (napi->weight) { > netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, NULL); > - napi_disable(napi); > + if (netdev_locked) > + napi_disable_locked(napi); > + else > + napi_disable(napi); > } > } > > -static void virtnet_napi_disable(struct receive_queue *rq) > +static void virtnet_napi_disable(struct receive_queue *rq, bool > netdev_locked) > { > struct virtnet_info *vi = rq->vq->vdev->priv; > struct napi_struct *napi = &rq->napi; > int qidx = vq2rxq(rq->vq); > > netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, NULL); > - napi_disable(napi); > + if (netdev_locked) > + napi_disable_locked(napi); > + else > + napi_disable(napi); > } > > static void refill_work(struct work_struct *work) > @@ -2875,9 +2885,11 @@ static void refill_work(struct work_struct *work) > * instance lock) > * - check netif_running() and return early to avoid a race > */ > - napi_disable(&rq->napi); > + netdev_lock(vi->dev); > + napi_disable_locked(&rq->napi); > still_empty = !try_fill_recv(vi, rq, GFP_KERNEL); This does mean netdev_lock is held potentially for a long while, while try_fill_recv and processing inside virtnet_napi_do_enable finish. Better ideas? > - virtnet_napi_do_enable(rq->vq, &rq->napi); > + virtnet_napi_do_enable(rq->vq, &rq->napi, true); > + netdev_unlock(vi->dev); > > /* In theory, this can happen: if we don't get any buffers in > * we will *never* try to fill again. > @@ -3074,8 +3086,8 @@ static int virtnet_poll(struct napi_struct *napi, int > budget) > > static void virtnet_disable_queue_pair(struct virtnet_info *vi, int > qp_index) > { > - virtnet_napi_tx_disable(&vi->sq[qp_index]); > - virtnet_napi_disable(&vi->rq[qp_index]); > + virtnet_napi_tx_disable(&vi->sq[qp_index], false); > + virtnet_napi_disable(&vi->rq[qp_index], false); > xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); > } > > @@ -3094,8 +3106,8 @@ static int virtnet_enable_queue_pair(struct > virtnet_info *vi, int qp_index) > if (err < 0) > goto err_xdp_reg_mem_model; > > - virtnet_napi_enable(&vi->rq[qp_index]); > - virtnet_napi_tx_enable(&vi->sq[qp_index]); > + virtnet_napi_enable(&vi->rq[qp_index], false); > + virtnet_napi_tx_enable(&vi->sq[qp_index], false); > > return 0; > > @@ -3347,7 +3359,8 @@ static void virtnet_rx_pause(struct virtnet_info *vi, > struct receive_queue *rq) > bool running = netif_running(vi->dev); > > if (running) { > - virtnet_napi_disable(rq); > + netdev_lock(vi->dev); > + virtnet_napi_disable(rq, true); > virtnet_cancel_dim(vi, &rq->dim); > } > } > @@ -3359,8 +3372,10 @@ static void virtnet_rx_resume(struct virtnet_info > *vi, struct receive_queue *rq) > if (!try_fill_recv(vi, rq, GFP_KERNEL)) > schedule_delayed_work(&vi->refill, 0); > > - if (running) > - virtnet_napi_enable(rq); > + if (running) { > + virtnet_napi_enable(rq, true); > + netdev_unlock(vi->dev); > + } > } > > static int virtnet_rx_resize(struct virtnet_info *vi, > @@ -3389,7 +3404,7 @@ static void virtnet_tx_pause(struct virtnet_info *vi, > struct send_queue *sq) > qindex = sq - vi->sq; > > if (running) > - virtnet_napi_tx_disable(sq); > + virtnet_napi_tx_disable(sq, false); > > txq = netdev_get_tx_queue(vi->dev, qindex); > > @@ -3423,7 +3438,7 @@ static void virtnet_tx_resume(struct virtnet_info *vi, > struct send_queue *sq) > __netif_tx_unlock_bh(txq); > > if (running) > - virtnet_napi_tx_enable(sq); > + virtnet_napi_tx_enable(sq, false); > } > > static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue > *sq, > @@ -5961,9 +5976,10 @@ static int virtnet_xdp_set(struct net_device *dev, > struct bpf_prog *prog, > > /* Make sure NAPI is not using any XDP TX queues for RX. */ > if (netif_running(dev)) { > + netdev_lock(dev); > for (i = 0; i < vi->max_queue_pairs; i++) { > - virtnet_napi_disable(&vi->rq[i]); > - virtnet_napi_tx_disable(&vi->sq[i]); > + virtnet_napi_disable(&vi->rq[i], true); > + virtnet_napi_tx_disable(&vi->sq[i], true); > } > } > > @@ -6000,11 +6016,14 @@ static int virtnet_xdp_set(struct net_device *dev, > struct bpf_prog *prog, > if (old_prog) > bpf_prog_put(old_prog); > if (netif_running(dev)) { > - virtnet_napi_enable(&vi->rq[i]); > - virtnet_napi_tx_enable(&vi->sq[i]); > + virtnet_napi_enable(&vi->rq[i], true); > + virtnet_napi_tx_enable(&vi->sq[i], true); > } > } > > + if (netif_running(dev)) > + netdev_unlock(dev); > + > return 0; > > err: > @@ -6016,9 +6035,10 @@ static int virtnet_xdp_set(struct net_device *dev, > struct bpf_prog *prog, > > if (netif_running(dev)) { > for (i = 0; i < vi->max_queue_pairs; i++) { > - virtnet_napi_enable(&vi->rq[i]); > - virtnet_napi_tx_enable(&vi->sq[i]); > + virtnet_napi_enable(&vi->rq[i], true); > + virtnet_napi_tx_enable(&vi->sq[i], true); > } > + netdev_unlock(dev); > } > if (prog) > bpf_prog_sub(prog, vi->max_queue_pairs - 1); > -- > 2.43.0 >
On 4/10/25 14:58, Michael S. Tsirkin wrote: > On Thu, Apr 10, 2025 at 02:05:57PM +0700, Bui Quang Minh wrote: >> When pausing rx (e.g. set up xdp, xsk pool, rx resize), we call >> napi_disable() on the receive queue's napi. In delayed refill_work, it >> also calls napi_disable() on the receive queue's napi. When >> napi_disable() is called on an already disabled napi, it will sleep in >> napi_disable_locked while still holding the netdev_lock. As a result, >> later napi_enable gets stuck too as it cannot acquire the netdev_lock. >> This leads to refill_work and the pause-then-resume tx are stuck >> altogether. >> >> This scenario can be reproducible by binding a XDP socket to virtio-net >> interface without setting up the fill ring. As a result, try_fill_recv >> will fail until the fill ring is set up and refill_work is scheduled. >> >> This commit makes the pausing rx path hold the netdev_lock until >> resuming, prevent any napi_disable() to be called on a temporarily >> disabled napi. >> >> Fixes: 413f0271f396 ("net: protect NAPI enablement with netdev_lock()") >> Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com> >> --- >> drivers/net/virtio_net.c | 74 +++++++++++++++++++++++++--------------- >> 1 file changed, 47 insertions(+), 27 deletions(-) >> >> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c >> index 7e4617216a4b..74bd1065c586 100644 >> --- a/drivers/net/virtio_net.c >> +++ b/drivers/net/virtio_net.c >> @@ -2786,9 +2786,13 @@ static void skb_recv_done(struct virtqueue *rvq) >> } >> >> static void virtnet_napi_do_enable(struct virtqueue *vq, >> - struct napi_struct *napi) >> + struct napi_struct *napi, >> + bool netdev_locked) >> { >> - napi_enable(napi); >> + if (netdev_locked) >> + napi_enable_locked(napi); >> + else >> + napi_enable(napi); >> >> /* If all buffers were filled by other side before we napi_enabled, we >> * won't get another interrupt, so process any outstanding packets now. >> @@ -2799,16 +2803,16 @@ static void virtnet_napi_do_enable(struct virtqueue >> *vq, > > > > Your patch is line-wrapped, unfortunately. Here and elsewhere. > > > > >> local_bh_enable(); >> } >> >> -static void virtnet_napi_enable(struct receive_queue *rq) >> +static void virtnet_napi_enable(struct receive_queue *rq, bool >> netdev_locked) >> { >> struct virtnet_info *vi = rq->vq->vdev->priv; >> int qidx = vq2rxq(rq->vq); >> >> - virtnet_napi_do_enable(rq->vq, &rq->napi); >> + virtnet_napi_do_enable(rq->vq, &rq->napi, netdev_locked); >> netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, &rq->napi); >> } >> >> -static void virtnet_napi_tx_enable(struct send_queue *sq) >> +static void virtnet_napi_tx_enable(struct send_queue *sq, bool >> netdev_locked) >> { >> struct virtnet_info *vi = sq->vq->vdev->priv; >> struct napi_struct *napi = &sq->napi; >> @@ -2825,11 +2829,11 @@ static void virtnet_napi_tx_enable(struct send_queue >> *sq) >> return; >> } >> >> - virtnet_napi_do_enable(sq->vq, napi); >> + virtnet_napi_do_enable(sq->vq, napi, netdev_locked); >> netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, napi); >> } >> >> -static void virtnet_napi_tx_disable(struct send_queue *sq) >> +static void virtnet_napi_tx_disable(struct send_queue *sq, bool >> netdev_locked) >> { >> struct virtnet_info *vi = sq->vq->vdev->priv; >> struct napi_struct *napi = &sq->napi; >> @@ -2837,18 +2841,24 @@ static void virtnet_napi_tx_disable(struct >> send_queue *sq) >> >> if (napi->weight) { >> netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, NULL); >> - napi_disable(napi); >> + if (netdev_locked) >> + napi_disable_locked(napi); >> + else >> + napi_disable(napi); >> } >> } >> >> -static void virtnet_napi_disable(struct receive_queue *rq) >> +static void virtnet_napi_disable(struct receive_queue *rq, bool >> netdev_locked) >> { >> struct virtnet_info *vi = rq->vq->vdev->priv; >> struct napi_struct *napi = &rq->napi; >> int qidx = vq2rxq(rq->vq); >> >> netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, NULL); >> - napi_disable(napi); >> + if (netdev_locked) >> + napi_disable_locked(napi); >> + else >> + napi_disable(napi); >> } >> >> static void refill_work(struct work_struct *work) >> @@ -2875,9 +2885,11 @@ static void refill_work(struct work_struct *work) >> * instance lock) >> * - check netif_running() and return early to avoid a race >> */ >> - napi_disable(&rq->napi); >> + netdev_lock(vi->dev); >> + napi_disable_locked(&rq->napi); >> still_empty = !try_fill_recv(vi, rq, GFP_KERNEL); > > This does mean netdev_lock is held potentially for a long while, > while try_fill_recv and processing inside virtnet_napi_do_enable > finish. Better ideas? I prefer the first patch in this thread where we disable delayed refill and cancel all inflight refill_work before pausing rx. Thanks, Quang Minh.
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 7e4617216a4b..74bd1065c586 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2786,9 +2786,13 @@ static void skb_recv_done(struct virtqueue *rvq) } static void virtnet_napi_do_enable(struct virtqueue *vq, - struct napi_struct *napi) + struct napi_struct *napi, + bool netdev_locked) { - napi_enable(napi); + if (netdev_locked) + napi_enable_locked(napi); + else + napi_enable(napi); /* If all buffers were filled by other side before we napi_enabled, we * won't get another interrupt, so process any outstanding packets
When pausing rx (e.g. set up xdp, xsk pool, rx resize), we call napi_disable() on the receive queue's napi. In delayed refill_work, it also calls napi_disable() on the receive queue's napi. When napi_disable() is called on an already disabled napi, it will sleep in napi_disable_locked while still holding the netdev_lock. As a result, later napi_enable gets stuck too as it cannot acquire the netdev_lock. This leads to refill_work and the pause-then-resume tx are stuck altogether. This scenario can be reproducible by binding a XDP socket to virtio-net interface without setting up the fill ring. As a result, try_fill_recv will fail until the fill ring is set up and refill_work is scheduled. This commit makes the pausing rx path hold the netdev_lock until resuming, prevent any napi_disable() to be called on a temporarily disabled napi. Fixes: 413f0271f396 ("net: protect NAPI enablement with netdev_lock()") Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com> --- drivers/net/virtio_net.c | 74 +++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 27 deletions(-) now. @@ -2799,16 +2803,16 @@ static void virtnet_napi_do_enable(struct virtqueue *vq, local_bh_enable(); } -static void virtnet_napi_enable(struct receive_queue *rq) +static void virtnet_napi_enable(struct receive_queue *rq, bool netdev_locked) { struct virtnet_info *vi = rq->vq->vdev->priv; int qidx = vq2rxq(rq->vq); - virtnet_napi_do_enable(rq->vq, &rq->napi); + virtnet_napi_do_enable(rq->vq, &rq->napi, netdev_locked); netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, &rq->napi); } -static void virtnet_napi_tx_enable(struct send_queue *sq) +static void virtnet_napi_tx_enable(struct send_queue *sq, bool netdev_locked) { struct virtnet_info *vi = sq->vq->vdev->priv; struct napi_struct *napi = &sq->napi; @@ -2825,11 +2829,11 @@ static void virtnet_napi_tx_enable(struct send_queue *sq) return; } - virtnet_napi_do_enable(sq->vq, napi); + virtnet_napi_do_enable(sq->vq, napi, netdev_locked); netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, napi); } -static void virtnet_napi_tx_disable(struct send_queue *sq) +static void virtnet_napi_tx_disable(struct send_queue *sq, bool netdev_locked) { struct virtnet_info *vi = sq->vq->vdev->priv; struct napi_struct *napi = &sq->napi; @@ -2837,18 +2841,24 @@ static void virtnet_napi_tx_disable(struct send_queue *sq) if (napi->weight) { netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, NULL); - napi_disable(napi); + if (netdev_locked) + napi_disable_locked(napi); + else + napi_disable(napi); } } -static void virtnet_napi_disable(struct receive_queue *rq) +static void virtnet_napi_disable(struct receive_queue *rq, bool netdev_locked) { struct virtnet_info *vi = rq->vq->vdev->priv; struct napi_struct *napi = &rq->napi; int qidx = vq2rxq(rq->vq); netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, NULL); - napi_disable(napi); + if (netdev_locked) + napi_disable_locked(napi); + else + napi_disable(napi); } static void refill_work(struct work_struct *work) @@ -2875,9 +2885,11 @@ static void refill_work(struct work_struct *work) * instance lock) * - check netif_running() and return early to avoid a race */ - napi_disable(&rq->napi); + netdev_lock(vi->dev); + napi_disable_locked(&rq->napi); still_empty = !try_fill_recv(vi, rq, GFP_KERNEL); - virtnet_napi_do_enable(rq->vq, &rq->napi); + virtnet_napi_do_enable(rq->vq, &rq->napi, true); + netdev_unlock(vi->dev); /* In theory, this can happen: if we don't get any buffers in * we will *never* try to fill again. @@ -3074,8 +3086,8 @@ static int virtnet_poll(struct napi_struct *napi, int budget) static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index) { - virtnet_napi_tx_disable(&vi->sq[qp_index]); - virtnet_napi_disable(&vi->rq[qp_index]); + virtnet_napi_tx_disable(&vi->sq[qp_index], false); + virtnet_napi_disable(&vi->rq[qp_index], false); xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq); } @@ -3094,8 +3106,8 @@ static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index) if (err < 0) goto err_xdp_reg_mem_model; - virtnet_napi_enable(&vi->rq[qp_index]); - virtnet_napi_tx_enable(&vi->sq[qp_index]); + virtnet_napi_enable(&vi->rq[qp_index], false); + virtnet_napi_tx_enable(&vi->sq[qp_index], false); return 0; @@ -3347,7 +3359,8 @@ static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) bool running = netif_running(vi->dev); if (running) { - virtnet_napi_disable(rq); + netdev_lock(vi->dev); + virtnet_napi_disable(rq, true); virtnet_cancel_dim(vi, &rq->dim); } } @@ -3359,8 +3372,10 @@ static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq) if (!try_fill_recv(vi, rq, GFP_KERNEL)) schedule_delayed_work(&vi->refill, 0); - if (running) - virtnet_napi_enable(rq); + if (running) { + virtnet_napi_enable(rq, true); + netdev_unlock(vi->dev); + } } static int virtnet_rx_resize(struct virtnet_info *vi, @@ -3389,7 +3404,7 @@ static void virtnet_tx_pause(struct virtnet_info *vi, struct send_queue *sq) qindex = sq - vi->sq; if (running) - virtnet_napi_tx_disable(sq); + virtnet_napi_tx_disable(sq, false); txq = netdev_get_tx_queue(vi->dev, qindex); @@ -3423,7 +3438,7 @@ static void virtnet_tx_resume(struct virtnet_info *vi, struct send_queue *sq) __netif_tx_unlock_bh(txq); if (running) - virtnet_napi_tx_enable(sq); + virtnet_napi_tx_enable(sq, false); } static int virtnet_tx_resize(struct virtnet_info *vi, struct send_queue *sq, @@ -5961,9 +5976,10 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, /* Make sure NAPI is not using any XDP TX queues for RX. */ if (netif_running(dev)) { + netdev_lock(dev); for (i = 0; i < vi->max_queue_pairs; i++) { - virtnet_napi_disable(&vi->rq[i]); - virtnet_napi_tx_disable(&vi->sq[i]); + virtnet_napi_disable(&vi->rq[i], true); + virtnet_napi_tx_disable(&vi->sq[i], true); } } @@ -6000,11 +6016,14 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, if (old_prog) bpf_prog_put(old_prog); if (netif_running(dev)) { - virtnet_napi_enable(&vi->rq[i]); - virtnet_napi_tx_enable(&vi->sq[i]); + virtnet_napi_enable(&vi->rq[i], true); + virtnet_napi_tx_enable(&vi->sq[i], true); } } + if (netif_running(dev)) + netdev_unlock(dev); + return 0; err: @@ -6016,9 +6035,10 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, if (netif_running(dev)) { for (i = 0; i < vi->max_queue_pairs; i++) { - virtnet_napi_enable(&vi->rq[i]); - virtnet_napi_tx_enable(&vi->sq[i]); + virtnet_napi_enable(&vi->rq[i], true); + virtnet_napi_tx_enable(&vi->sq[i], true); } + netdev_unlock(dev); } if (prog) bpf_prog_sub(prog, vi->max_queue_pairs - 1);