Message ID | 20250415074341.12461-2-minhquangbui99@gmail.com (mailing list archive) |
---|---|
State | Superseded |
Delegated to: | Netdev Maintainers |
Headers | show |
Series | virtio-net: disable delayed refill when pausing rx | expand |
On Tue, Apr 15, 2025 at 02:43:39PM +0700, Bui Quang Minh wrote: > When pausing rx (e.g. set up xdp, xsk pool, rx resize), we call > napi_disable() on the receive queue's napi. In delayed refill_work, it > also calls napi_disable() on the receive queue's napi. When > napi_disable() is called on an already disabled napi, it will sleep in > napi_disable_locked while still holding the netdev_lock. As a result, > later napi_enable gets stuck too as it cannot acquire the netdev_lock. > This leads to refill_work and the pause-then-resume tx are stuck > altogether. > > This scenario can be reproducible by binding a XDP socket to virtio-net > interface without setting up the fill ring. As a result, try_fill_recv > will fail until the fill ring is set up and refill_work is scheduled. > > This commit adds virtnet_rx_(pause/resume)_all helpers and fixes up the > virtnet_rx_resume to disable future and cancel all inflights delayed > refill_work before calling napi_disable() to pause the rx. > > Fixes: 413f0271f396 ("net: protect NAPI enablement with netdev_lock()") > Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> > --- > drivers/net/virtio_net.c | 69 +++++++++++++++++++++++++++++++++------- > 1 file changed, 57 insertions(+), 12 deletions(-) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index 7e4617216a4b..848fab51dfa1 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -3342,7 +3342,8 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) > return NETDEV_TX_OK; > } > > -static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) > +static void __virtnet_rx_pause(struct virtnet_info *vi, > + struct receive_queue *rq) > { > bool running = netif_running(vi->dev); > > @@ -3352,17 +3353,63 @@ static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) > } > } > > -static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq) > +static void virtnet_rx_pause_all(struct virtnet_info *vi) > +{ > + int i; > + > + /* > + * Make sure refill_work does not run concurrently to > + * avoid napi_disable race which leads to deadlock. > + */ > + disable_delayed_refill(vi); > + cancel_delayed_work_sync(&vi->refill); > + for (i = 0; i < vi->max_queue_pairs; i++) > + __virtnet_rx_pause(vi, &vi->rq[i]); > +} > + > +static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) > +{ > + /* > + * Make sure refill_work does not run concurrently to > + * avoid napi_disable race which leads to deadlock. > + */ > + disable_delayed_refill(vi); > + cancel_delayed_work_sync(&vi->refill); > + __virtnet_rx_pause(vi, rq); > +} > + > +static void __virtnet_rx_resume(struct virtnet_info *vi, > + struct receive_queue *rq, > + bool refill) > { > bool running = netif_running(vi->dev); > > - if (!try_fill_recv(vi, rq, GFP_KERNEL)) > + if (refill && !try_fill_recv(vi, rq, GFP_KERNEL)) > schedule_delayed_work(&vi->refill, 0); > > if (running) > virtnet_napi_enable(rq); > } > > +static void virtnet_rx_resume_all(struct virtnet_info *vi) > +{ > + int i; > + > + enable_delayed_refill(vi); > + for (i = 0; i < vi->max_queue_pairs; i++) { > + if (i < vi->curr_queue_pairs) > + __virtnet_rx_resume(vi, &vi->rq[i], true); > + else > + __virtnet_rx_resume(vi, &vi->rq[i], false); > + } > +} > + > +static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq) > +{ > + enable_delayed_refill(vi); > + __virtnet_rx_resume(vi, rq, true); > +} > + > static int virtnet_rx_resize(struct virtnet_info *vi, > struct receive_queue *rq, u32 ring_num) > { > @@ -5959,12 +6006,12 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, > if (prog) > bpf_prog_add(prog, vi->max_queue_pairs - 1); > > + virtnet_rx_pause_all(vi); > + > /* Make sure NAPI is not using any XDP TX queues for RX. */ > if (netif_running(dev)) { > - for (i = 0; i < vi->max_queue_pairs; i++) { > - virtnet_napi_disable(&vi->rq[i]); > + for (i = 0; i < vi->max_queue_pairs; i++) > virtnet_napi_tx_disable(&vi->sq[i]); > - } > } > > if (!prog) { > @@ -5996,13 +6043,12 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, > vi->xdp_enabled = false; > } > > + virtnet_rx_resume_all(vi); > for (i = 0; i < vi->max_queue_pairs; i++) { > if (old_prog) > bpf_prog_put(old_prog); > - if (netif_running(dev)) { > - virtnet_napi_enable(&vi->rq[i]); > + if (netif_running(dev)) > virtnet_napi_tx_enable(&vi->sq[i]); > - } > } > > return 0; > @@ -6014,11 +6060,10 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, > rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); > } > > + virtnet_rx_resume_all(vi); > if (netif_running(dev)) { > - for (i = 0; i < vi->max_queue_pairs; i++) { > - virtnet_napi_enable(&vi->rq[i]); > + for (i = 0; i < vi->max_queue_pairs; i++) > virtnet_napi_tx_enable(&vi->sq[i]); > - } > } > if (prog) > bpf_prog_sub(prog, vi->max_queue_pairs - 1); > -- > 2.43.0
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 7e4617216a4b..848fab51dfa1 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3342,7 +3342,8 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } -static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) +static void __virtnet_rx_pause(struct virtnet_info *vi, + struct receive_queue *rq) { bool running = netif_running(vi->dev); @@ -3352,17 +3353,63 @@ static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) } } -static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq) +static void virtnet_rx_pause_all(struct virtnet_info *vi) +{ + int i; + + /* + * Make sure refill_work does not run concurrently to + * avoid napi_disable race which leads to deadlock. + */ + disable_delayed_refill(vi); + cancel_delayed_work_sync(&vi->refill); + for (i = 0; i < vi->max_queue_pairs; i++) + __virtnet_rx_pause(vi, &vi->rq[i]); +} + +static void virtnet_rx_pause(struct virtnet_info *vi, struct receive_queue *rq) +{ + /* + * Make sure refill_work does not run concurrently to + * avoid napi_disable race which leads to deadlock. + */ + disable_delayed_refill(vi); + cancel_delayed_work_sync(&vi->refill); + __virtnet_rx_pause(vi, rq); +} + +static void __virtnet_rx_resume(struct virtnet_info *vi, + struct receive_queue *rq, + bool refill) { bool running = netif_running(vi->dev); - if (!try_fill_recv(vi, rq, GFP_KERNEL)) + if (refill && !try_fill_recv(vi, rq, GFP_KERNEL)) schedule_delayed_work(&vi->refill, 0); if (running) virtnet_napi_enable(rq); } +static void virtnet_rx_resume_all(struct virtnet_info *vi) +{ + int i; + + enable_delayed_refill(vi); + for (i = 0; i < vi->max_queue_pairs; i++) { + if (i < vi->curr_queue_pairs) + __virtnet_rx_resume(vi, &vi->rq[i], true); + else + __virtnet_rx_resume(vi, &vi->rq[i], false); + } +} + +static void virtnet_rx_resume(struct virtnet_info *vi, struct receive_queue *rq) +{ + enable_delayed_refill(vi); + __virtnet_rx_resume(vi, rq, true); +} + static int virtnet_rx_resize(struct virtnet_info *vi, struct receive_queue *rq, u32 ring_num) { @@ -5959,12 +6006,12 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, if (prog) bpf_prog_add(prog, vi->max_queue_pairs - 1); + virtnet_rx_pause_all(vi); + /* Make sure NAPI is not using any XDP TX queues for RX. */ if (netif_running(dev)) { - for (i = 0; i < vi->max_queue_pairs; i++) { - virtnet_napi_disable(&vi->rq[i]); + for (i = 0; i < vi->max_queue_pairs; i++) virtnet_napi_tx_disable(&vi->sq[i]); - } } if (!prog) { @@ -5996,13 +6043,12 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, vi->xdp_enabled = false; } + virtnet_rx_resume_all(vi); for (i = 0; i < vi->max_queue_pairs; i++) { if (old_prog) bpf_prog_put(old_prog); - if (netif_running(dev)) { - virtnet_napi_enable(&vi->rq[i]); + if (netif_running(dev)) virtnet_napi_tx_enable(&vi->sq[i]); - } } return 0; @@ -6014,11 +6060,10 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, rcu_assign_pointer(vi->rq[i].xdp_prog, old_prog); } + virtnet_rx_resume_all(vi); if (netif_running(dev)) { - for (i = 0; i < vi->max_queue_pairs; i++) { - virtnet_napi_enable(&vi->rq[i]); + for (i = 0; i < vi->max_queue_pairs; i++) virtnet_napi_tx_enable(&vi->sq[i]); - } } if (prog) bpf_prog_sub(prog, vi->max_queue_pairs - 1);
When pausing rx (e.g. set up xdp, xsk pool, rx resize), we call napi_disable() on the receive queue's napi. In delayed refill_work, it also calls napi_disable() on the receive queue's napi. When napi_disable() is called on an already disabled napi, it will sleep in napi_disable_locked while still holding the netdev_lock. As a result, later napi_enable gets stuck too as it cannot acquire the netdev_lock. This leads to refill_work and the pause-then-resume tx are stuck altogether. This scenario can be reproducible by binding a XDP socket to virtio-net interface without setting up the fill ring. As a result, try_fill_recv will fail until the fill ring is set up and refill_work is scheduled. This commit adds virtnet_rx_(pause/resume)_all helpers and fixes up the virtnet_rx_resume to disable future and cancel all inflights delayed refill_work before calling napi_disable() to pause the rx. Fixes: 413f0271f396 ("net: protect NAPI enablement with netdev_lock()") Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com> --- drivers/net/virtio_net.c | 69 +++++++++++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 12 deletions(-)