diff mbox series

[net-next,v5,3/4] virtio-net: Map NAPIs to queues

Message ID 20250227185017.206785-4-jdamato@fastly.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series virtio-net: Link queues to NAPIs | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 10 of 10 maintainers
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 0 this patch: 0
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 83 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2025-02-28--03-00 (tests: 894)

Commit Message

Joe Damato Feb. 27, 2025, 6:50 p.m. UTC
Use netif_queue_set_napi to map NAPIs to queue IDs so that the mapping
can be accessed by user apps. Note that the netif_queue_set_napi
currently requires RTNL, so care must be taken to ensure RTNL is held on
paths where this API might be reached.

The paths in the driver where this API can be reached appear to be:

  - ndo_open, ndo_close, which hold RTNL so no driver change is needed.
  - rx_pause, rx_resume, tx_pause, tx_resume are reached either via
    an ethtool ioctl or via XSK - neither path requires a driver change.
  - power management paths (which call open and close), which have been
    updated to hold/release RTNL.
  - refill_work, which has been updated to hold RTNL.

$ ethtool -i ens4 | grep driver
driver: virtio_net

$ sudo ethtool -L ens4 combined 4

$ ./tools/net/ynl/pyynl/cli.py \
       --spec Documentation/netlink/specs/netdev.yaml \
       --dump queue-get --json='{"ifindex": 2}'
[{'id': 0, 'ifindex': 2, 'napi-id': 8289, 'type': 'rx'},
 {'id': 1, 'ifindex': 2, 'napi-id': 8290, 'type': 'rx'},
 {'id': 2, 'ifindex': 2, 'napi-id': 8291, 'type': 'rx'},
 {'id': 3, 'ifindex': 2, 'napi-id': 8292, 'type': 'rx'},
 {'id': 0, 'ifindex': 2, 'type': 'tx'},
 {'id': 1, 'ifindex': 2, 'type': 'tx'},
 {'id': 2, 'ifindex': 2, 'type': 'tx'},
 {'id': 3, 'ifindex': 2, 'type': 'tx'}]

Note that virtio_net has TX-only NAPIs which do not have NAPI IDs, so
the lack of 'napi-id' in the above output is expected.

Signed-off-by: Joe Damato <jdamato@fastly.com>
---
 drivers/net/virtio_net.c | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

Comments

Jason Wang Feb. 28, 2025, 8:14 a.m. UTC | #1
On Fri, Feb 28, 2025 at 2:50 AM Joe Damato <jdamato@fastly.com> wrote:
>
> Use netif_queue_set_napi to map NAPIs to queue IDs so that the mapping
> can be accessed by user apps. Note that the netif_queue_set_napi
> currently requires RTNL, so care must be taken to ensure RTNL is held on
> paths where this API might be reached.
>
> The paths in the driver where this API can be reached appear to be:
>
>   - ndo_open, ndo_close, which hold RTNL so no driver change is needed.
>   - rx_pause, rx_resume, tx_pause, tx_resume are reached either via
>     an ethtool ioctl or via XSK - neither path requires a driver change.
>   - power management paths (which call open and close), which have been
>     updated to hold/release RTNL.
>   - refill_work, which has been updated to hold RTNL.
>
> $ ethtool -i ens4 | grep driver
> driver: virtio_net
>
> $ sudo ethtool -L ens4 combined 4
>
> $ ./tools/net/ynl/pyynl/cli.py \
>        --spec Documentation/netlink/specs/netdev.yaml \
>        --dump queue-get --json='{"ifindex": 2}'
> [{'id': 0, 'ifindex': 2, 'napi-id': 8289, 'type': 'rx'},
>  {'id': 1, 'ifindex': 2, 'napi-id': 8290, 'type': 'rx'},
>  {'id': 2, 'ifindex': 2, 'napi-id': 8291, 'type': 'rx'},
>  {'id': 3, 'ifindex': 2, 'napi-id': 8292, 'type': 'rx'},
>  {'id': 0, 'ifindex': 2, 'type': 'tx'},
>  {'id': 1, 'ifindex': 2, 'type': 'tx'},
>  {'id': 2, 'ifindex': 2, 'type': 'tx'},
>  {'id': 3, 'ifindex': 2, 'type': 'tx'}]
>
> Note that virtio_net has TX-only NAPIs which do not have NAPI IDs, so
> the lack of 'napi-id' in the above output is expected.
>
> Signed-off-by: Joe Damato <jdamato@fastly.com>
> ---

Acked-by: Jason Wang <jasowang@redhat.com>

Thanks
Jakub Kicinski March 1, 2025, 2:27 a.m. UTC | #2
On Thu, 27 Feb 2025 18:50:13 +0000 Joe Damato wrote:
> @@ -2870,9 +2883,15 @@ static void refill_work(struct work_struct *work)
>  	for (i = 0; i < vi->curr_queue_pairs; i++) {
>  		struct receive_queue *rq = &vi->rq[i];
>  
> +		rtnl_lock();
>  		virtnet_napi_disable(rq);
> +		rtnl_unlock();
> +
>  		still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
> +
> +		rtnl_lock();
>  		virtnet_napi_enable(rq);
> +		rtnl_unlock();

Looks to me like refill_work is cancelled _sync while holding rtnl_lock
from the close path. I think this could deadlock?
Joe Damato March 3, 2025, 4:46 p.m. UTC | #3
On Fri, Feb 28, 2025 at 06:27:59PM -0800, Jakub Kicinski wrote:
> On Thu, 27 Feb 2025 18:50:13 +0000 Joe Damato wrote:
> > @@ -2870,9 +2883,15 @@ static void refill_work(struct work_struct *work)
> >  	for (i = 0; i < vi->curr_queue_pairs; i++) {
> >  		struct receive_queue *rq = &vi->rq[i];
> >  
> > +		rtnl_lock();
> >  		virtnet_napi_disable(rq);
> > +		rtnl_unlock();
> > +
> >  		still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
> > +
> > +		rtnl_lock();
> >  		virtnet_napi_enable(rq);
> > +		rtnl_unlock();
> 
> Looks to me like refill_work is cancelled _sync while holding rtnl_lock
> from the close path. I think this could deadlock?

Good catch, thank you!

It looks like this is also the case in the failure path on
virtnet_open.

Jason: do you have any suggestions?

It looks like in both open and close disable_delayed_refill is
called first, before the cancel_delayed_work_sync.

Would something like this solve the problem?

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 76dcd65ec0f2..457115300f05 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2880,6 +2880,13 @@ static void refill_work(struct work_struct *work)
        bool still_empty;
        int i;

+       spin_lock(&vi->refill_lock);
+       if (!vi->refill_enabled) {
+               spin_unlock(&vi->refill_lock);
+               return;
+       }
+       spin_unlock(&vi->refill_lock);
+
        for (i = 0; i < vi->curr_queue_pairs; i++) {
                struct receive_queue *rq = &vi->rq[i];
Joe Damato March 3, 2025, 5 p.m. UTC | #4
On Mon, Mar 03, 2025 at 11:46:10AM -0500, Joe Damato wrote:
> On Fri, Feb 28, 2025 at 06:27:59PM -0800, Jakub Kicinski wrote:
> > On Thu, 27 Feb 2025 18:50:13 +0000 Joe Damato wrote:
> > > @@ -2870,9 +2883,15 @@ static void refill_work(struct work_struct *work)
> > >  	for (i = 0; i < vi->curr_queue_pairs; i++) {
> > >  		struct receive_queue *rq = &vi->rq[i];
> > >  
> > > +		rtnl_lock();
> > >  		virtnet_napi_disable(rq);
> > > +		rtnl_unlock();
> > > +
> > >  		still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
> > > +
> > > +		rtnl_lock();
> > >  		virtnet_napi_enable(rq);
> > > +		rtnl_unlock();
> > 
> > Looks to me like refill_work is cancelled _sync while holding rtnl_lock
> > from the close path. I think this could deadlock?
> 
> Good catch, thank you!
> 
> It looks like this is also the case in the failure path on
> virtnet_open.
> 
> Jason: do you have any suggestions?
> 
> It looks like in both open and close disable_delayed_refill is
> called first, before the cancel_delayed_work_sync.
> 
> Would something like this solve the problem?
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 76dcd65ec0f2..457115300f05 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -2880,6 +2880,13 @@ static void refill_work(struct work_struct *work)
>         bool still_empty;
>         int i;
> 
> +       spin_lock(&vi->refill_lock);
> +       if (!vi->refill_enabled) {
> +               spin_unlock(&vi->refill_lock);
> +               return;
> +       }
> +       spin_unlock(&vi->refill_lock);
> +
>         for (i = 0; i < vi->curr_queue_pairs; i++) {
>                 struct receive_queue *rq = &vi->rq[i];
>

Err, I suppose this also doesn't work because:

CPU0                       CPU1
rtnl_lock                  (before CPU0 calls disable_delayed_refill) 
  virtnet_close            refill_work
                             rtnl_lock()
  cancel_sync <= deadlock

Need to give this a bit more thought.
Joe Damato March 3, 2025, 6:33 p.m. UTC | #5
On Mon, Mar 03, 2025 at 12:00:10PM -0500, Joe Damato wrote:
> On Mon, Mar 03, 2025 at 11:46:10AM -0500, Joe Damato wrote:
> > On Fri, Feb 28, 2025 at 06:27:59PM -0800, Jakub Kicinski wrote:
> > > On Thu, 27 Feb 2025 18:50:13 +0000 Joe Damato wrote:
> > > > @@ -2870,9 +2883,15 @@ static void refill_work(struct work_struct *work)
> > > >  	for (i = 0; i < vi->curr_queue_pairs; i++) {
> > > >  		struct receive_queue *rq = &vi->rq[i];
> > > >  
> > > > +		rtnl_lock();
> > > >  		virtnet_napi_disable(rq);
> > > > +		rtnl_unlock();
> > > > +
> > > >  		still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
> > > > +
> > > > +		rtnl_lock();
> > > >  		virtnet_napi_enable(rq);
> > > > +		rtnl_unlock();
> > > 
> > > Looks to me like refill_work is cancelled _sync while holding rtnl_lock
> > > from the close path. I think this could deadlock?
> > 
> > Good catch, thank you!
> > 
> > It looks like this is also the case in the failure path on
> > virtnet_open.
> > 
> > Jason: do you have any suggestions?
> > 
> > It looks like in both open and close disable_delayed_refill is
> > called first, before the cancel_delayed_work_sync.
> > 
> > Would something like this solve the problem?
> > 
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index 76dcd65ec0f2..457115300f05 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -2880,6 +2880,13 @@ static void refill_work(struct work_struct *work)
> >         bool still_empty;
> >         int i;
> > 
> > +       spin_lock(&vi->refill_lock);
> > +       if (!vi->refill_enabled) {
> > +               spin_unlock(&vi->refill_lock);
> > +               return;
> > +       }
> > +       spin_unlock(&vi->refill_lock);
> > +
> >         for (i = 0; i < vi->curr_queue_pairs; i++) {
> >                 struct receive_queue *rq = &vi->rq[i];
> >
> 
> Err, I suppose this also doesn't work because:
> 
> CPU0                       CPU1
> rtnl_lock                  (before CPU0 calls disable_delayed_refill) 
>   virtnet_close            refill_work
>                              rtnl_lock()
>   cancel_sync <= deadlock
> 
> Need to give this a bit more thought.

How about we don't use the API at all from refill_work?

Patch 4 adds consistent NAPI config state and refill_work isn't a
queue resize maybe we don't need to call the netif_queue_set_napi at
all since the NAPI IDs are persisted in the NAPI config state and
refill_work shouldn't change that?

In which case, we could go back to what refill_work was doing
before and avoid the problem entirely.

What do you think ?

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 76dcd65ec0f2..d6c8fe670005 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2883,15 +2883,9 @@ static void refill_work(struct work_struct *work)
        for (i = 0; i < vi->curr_queue_pairs; i++) {
                struct receive_queue *rq = &vi->rq[i];

-               rtnl_lock();
-               virtnet_napi_disable(rq);
-               rtnl_unlock();
-
+               napi_disable(&rq->napi);
                still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
-
-               rtnl_lock();
-               virtnet_napi_enable(rq);
-               rtnl_unlock();
+               virtnet_napi_do_enable(rq->vq, &rq->napi);

                /* In theory, this can happen: if we don't get any buffers in
                 * we will *never* try to fill again.
Jakub Kicinski March 4, 2025, 12:03 a.m. UTC | #6
On Mon, 3 Mar 2025 13:33:10 -0500 Joe Damato wrote:
> > > @@ -2880,6 +2880,13 @@ static void refill_work(struct work_struct *work)
> > >         bool still_empty;
> > >         int i;
> > > 
> > > +       spin_lock(&vi->refill_lock);
> > > +       if (!vi->refill_enabled) {
> > > +               spin_unlock(&vi->refill_lock);
> > > +               return;
> > > +       }
> > > +       spin_unlock(&vi->refill_lock);
> > > +
> > >         for (i = 0; i < vi->curr_queue_pairs; i++) {
> > >                 struct receive_queue *rq = &vi->rq[i];
> > >  
> > 
> > Err, I suppose this also doesn't work because:
> > 
> > CPU0                       CPU1
> > rtnl_lock                  (before CPU0 calls disable_delayed_refill) 
> >   virtnet_close            refill_work
> >                              rtnl_lock()
> >   cancel_sync <= deadlock
> > 
> > Need to give this a bit more thought.  
> 
> How about we don't use the API at all from refill_work?
> 
> Patch 4 adds consistent NAPI config state and refill_work isn't a
> queue resize maybe we don't need to call the netif_queue_set_napi at
> all since the NAPI IDs are persisted in the NAPI config state and
> refill_work shouldn't change that?
> 
> In which case, we could go back to what refill_work was doing
> before and avoid the problem entirely.
> 
> What do you think ?

Should work, I think. Tho, I suspect someone will want to add queue API
support to virtio sooner or later, and they will run into the same
problem with the netdev instance lock, as all of ndo_close() will then
be covered with netdev->lock.

More thorough and idiomatic way to solve the problem would be to cancel
the work non-sync in ndo_close, add cancel with _sync after netdev is
unregistered (in virtnet_remove()) when the lock is no longer held, then
wrap the entire work with a relevant lock and check if netif_running()
to return early in case of a race.

Middle ground would be to do what you suggested above and just leave 
a well worded comment somewhere that will show up in diffs adding queue
API support?
Joe Damato March 4, 2025, 3:08 p.m. UTC | #7
On Mon, Mar 03, 2025 at 04:03:55PM -0800, Jakub Kicinski wrote:
> On Mon, 3 Mar 2025 13:33:10 -0500 Joe Damato wrote:
> > > > @@ -2880,6 +2880,13 @@ static void refill_work(struct work_struct *work)
> > > >         bool still_empty;
> > > >         int i;
> > > > 
> > > > +       spin_lock(&vi->refill_lock);
> > > > +       if (!vi->refill_enabled) {
> > > > +               spin_unlock(&vi->refill_lock);
> > > > +               return;
> > > > +       }
> > > > +       spin_unlock(&vi->refill_lock);
> > > > +
> > > >         for (i = 0; i < vi->curr_queue_pairs; i++) {
> > > >                 struct receive_queue *rq = &vi->rq[i];
> > > >  
> > > 
> > > Err, I suppose this also doesn't work because:
> > > 
> > > CPU0                       CPU1
> > > rtnl_lock                  (before CPU0 calls disable_delayed_refill) 
> > >   virtnet_close            refill_work
> > >                              rtnl_lock()
> > >   cancel_sync <= deadlock
> > > 
> > > Need to give this a bit more thought.  
> > 
> > How about we don't use the API at all from refill_work?
> > 
> > Patch 4 adds consistent NAPI config state and refill_work isn't a
> > queue resize maybe we don't need to call the netif_queue_set_napi at
> > all since the NAPI IDs are persisted in the NAPI config state and
> > refill_work shouldn't change that?
> > 
> > In which case, we could go back to what refill_work was doing
> > before and avoid the problem entirely.
> > 
> > What do you think ?
> 
> Should work, I think. Tho, I suspect someone will want to add queue API
> support to virtio sooner or later, and they will run into the same
> problem with the netdev instance lock, as all of ndo_close() will then
> be covered with netdev->lock.
> 
> More thorough and idiomatic way to solve the problem would be to cancel
> the work non-sync in ndo_close, add cancel with _sync after netdev is
> unregistered (in virtnet_remove()) when the lock is no longer held, then
> wrap the entire work with a relevant lock and check if netif_running()
> to return early in case of a race.

Thanks for the guidance. I am happy to make an attempt at
implementing this in a future, separate series that follows this
one (probably after netdev conf in a few weeks :).

> Middle ground would be to do what you suggested above and just leave 
> a well worded comment somewhere that will show up in diffs adding queue
> API support?

Jason, Michael, et. al.:  what do you think ? I don't want to spin
up a v6 if you are opposed to proceeding this way. Please let me
know.
Jason Wang March 5, 2025, 5:11 a.m. UTC | #8
On Tue, Mar 4, 2025 at 11:09 PM Joe Damato <jdamato@fastly.com> wrote:
>
> On Mon, Mar 03, 2025 at 04:03:55PM -0800, Jakub Kicinski wrote:
> > On Mon, 3 Mar 2025 13:33:10 -0500 Joe Damato wrote:
> > > > > @@ -2880,6 +2880,13 @@ static void refill_work(struct work_struct *work)
> > > > >         bool still_empty;
> > > > >         int i;
> > > > >
> > > > > +       spin_lock(&vi->refill_lock);
> > > > > +       if (!vi->refill_enabled) {
> > > > > +               spin_unlock(&vi->refill_lock);
> > > > > +               return;
> > > > > +       }
> > > > > +       spin_unlock(&vi->refill_lock);
> > > > > +
> > > > >         for (i = 0; i < vi->curr_queue_pairs; i++) {
> > > > >                 struct receive_queue *rq = &vi->rq[i];
> > > > >
> > > >
> > > > Err, I suppose this also doesn't work because:
> > > >
> > > > CPU0                       CPU1
> > > > rtnl_lock                  (before CPU0 calls disable_delayed_refill)
> > > >   virtnet_close            refill_work
> > > >                              rtnl_lock()
> > > >   cancel_sync <= deadlock
> > > >
> > > > Need to give this a bit more thought.
> > >
> > > How about we don't use the API at all from refill_work?
> > >
> > > Patch 4 adds consistent NAPI config state and refill_work isn't a
> > > queue resize maybe we don't need to call the netif_queue_set_napi at
> > > all since the NAPI IDs are persisted in the NAPI config state and
> > > refill_work shouldn't change that?
> > >
> > > In which case, we could go back to what refill_work was doing
> > > before and avoid the problem entirely.
> > >
> > > What do you think ?
> >
> > Should work, I think. Tho, I suspect someone will want to add queue API
> > support to virtio sooner or later, and they will run into the same
> > problem with the netdev instance lock, as all of ndo_close() will then
> > be covered with netdev->lock.
> >
> > More thorough and idiomatic way to solve the problem would be to cancel
> > the work non-sync in ndo_close, add cancel with _sync after netdev is
> > unregistered (in virtnet_remove()) when the lock is no longer held, then
> > wrap the entire work with a relevant lock and check if netif_running()
> > to return early in case of a race.
>
> Thanks for the guidance. I am happy to make an attempt at
> implementing this in a future, separate series that follows this
> one (probably after netdev conf in a few weeks :).
>
> > Middle ground would be to do what you suggested above and just leave
> > a well worded comment somewhere that will show up in diffs adding queue
> > API support?
>
> Jason, Michael, et. al.:  what do you think ? I don't want to spin
> up a v6 if you are opposed to proceeding this way. Please let me
> know.
>

Maybe, but need to make sure there's no use-after-free (etc.
virtnet_close() has several callers).

Thanks
Joe Damato March 5, 2025, 4:34 p.m. UTC | #9
On Wed, Mar 05, 2025 at 01:11:55PM +0800, Jason Wang wrote:
> On Tue, Mar 4, 2025 at 11:09 PM Joe Damato <jdamato@fastly.com> wrote:
> >
> > On Mon, Mar 03, 2025 at 04:03:55PM -0800, Jakub Kicinski wrote:
> > > On Mon, 3 Mar 2025 13:33:10 -0500 Joe Damato wrote:

[...]

> > > Middle ground would be to do what you suggested above and just leave
> > > a well worded comment somewhere that will show up in diffs adding queue
> > > API support?
> >
> > Jason, Michael, et. al.:  what do you think ? I don't want to spin
> > up a v6 if you are opposed to proceeding this way. Please let me
> > know.
> >
> 
> Maybe, but need to make sure there's no use-after-free (etc.
> virtnet_close() has several callers).

Sorry, I think I am missing something. Can you say more?

I was asking: if I add the following diff below to patch 3, will
that be acceptable for you as a middle ground until a more idiomatic
implementation can be done ?

Since this diff leaves refill_work as it functioned before, it
avoids the problem Jakub pointed out and shouldn't introduce any
bugs since refill_work isn't changing from the original
implementation ?

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 76dcd65ec0f2..d6c8fe670005 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2883,15 +2883,9 @@ static void refill_work(struct work_struct *work)
        for (i = 0; i < vi->curr_queue_pairs; i++) {
                struct receive_queue *rq = &vi->rq[i];

-               rtnl_lock();
-               virtnet_napi_disable(rq);
-               rtnl_unlock();
-
+               napi_disable(&rq->napi);
                still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
-
-               rtnl_lock();
-               virtnet_napi_enable(rq);
-               rtnl_unlock();
+               virtnet_napi_do_enable(rq->vq, &rq->napi);

                /* In theory, this can happen: if we don't get any buffers in
                 * we will *never* try to fill again.
Jason Wang March 6, 2025, 12:15 a.m. UTC | #10
On Thu, Mar 6, 2025 at 12:34 AM Joe Damato <jdamato@fastly.com> wrote:
>
> On Wed, Mar 05, 2025 at 01:11:55PM +0800, Jason Wang wrote:
> > On Tue, Mar 4, 2025 at 11:09 PM Joe Damato <jdamato@fastly.com> wrote:
> > >
> > > On Mon, Mar 03, 2025 at 04:03:55PM -0800, Jakub Kicinski wrote:
> > > > On Mon, 3 Mar 2025 13:33:10 -0500 Joe Damato wrote:
>
> [...]
>
> > > > Middle ground would be to do what you suggested above and just leave
> > > > a well worded comment somewhere that will show up in diffs adding queue
> > > > API support?
> > >
> > > Jason, Michael, et. al.:  what do you think ? I don't want to spin
> > > up a v6 if you are opposed to proceeding this way. Please let me
> > > know.
> > >
> >
> > Maybe, but need to make sure there's no use-after-free (etc.
> > virtnet_close() has several callers).
>
> Sorry, I think I am missing something. Can you say more?
>
> I was asking: if I add the following diff below to patch 3, will
> that be acceptable for you as a middle ground until a more idiomatic
> implementation can be done ?

Yes, I misunderstand you before.

>
> Since this diff leaves refill_work as it functioned before, it
> avoids the problem Jakub pointed out and shouldn't introduce any
> bugs since refill_work isn't changing from the original
> implementation ?
>
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index 76dcd65ec0f2..d6c8fe670005 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -2883,15 +2883,9 @@ static void refill_work(struct work_struct *work)
>         for (i = 0; i < vi->curr_queue_pairs; i++) {
>                 struct receive_queue *rq = &vi->rq[i];
>
> -               rtnl_lock();
> -               virtnet_napi_disable(rq);
> -               rtnl_unlock();
> -
> +               napi_disable(&rq->napi);
>                 still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
> -
> -               rtnl_lock();
> -               virtnet_napi_enable(rq);
> -               rtnl_unlock();
> +               virtnet_napi_do_enable(rq->vq, &rq->napi);
>
>                 /* In theory, this can happen: if we don't get any buffers in
>                  * we will *never* try to fill again.
>

Works for me.

Thanks
Joe Damato March 6, 2025, 1:42 a.m. UTC | #11
On Mon, Mar 03, 2025 at 04:03:55PM -0800, Jakub Kicinski wrote:
> On Mon, 3 Mar 2025 13:33:10 -0500 Joe Damato wrote:
> > 
> > How about we don't use the API at all from refill_work?
> > 
> > Patch 4 adds consistent NAPI config state and refill_work isn't a
> > queue resize maybe we don't need to call the netif_queue_set_napi at
> > all since the NAPI IDs are persisted in the NAPI config state and
> > refill_work shouldn't change that?
> > 
> > In which case, we could go back to what refill_work was doing
> > before and avoid the problem entirely.
> > 
> > What do you think ?
> 
> Should work, I think. Tho, I suspect someone will want to add queue API
> support to virtio sooner or later, and they will run into the same
> problem with the netdev instance lock, as all of ndo_close() will then
> be covered with netdev->lock.
> 
> More thorough and idiomatic way to solve the problem would be to cancel
> the work non-sync in ndo_close, add cancel with _sync after netdev is
> unregistered (in virtnet_remove()) when the lock is no longer held, then
> wrap the entire work with a relevant lock and check if netif_running()
> to return early in case of a race.
> 
> Middle ground would be to do what you suggested above and just leave 
> a well worded comment somewhere that will show up in diffs adding queue
> API support?

Seems like Jason agrees that leaving refill_work unmodified will
work [1].

I think leaving a comment is a good idea and am happy to do so. Not
sure where would be a good spot for it.

Two spots that come to mind are:
 - in virtnet_probe where all the other netdev ops are plumbed
   through, or
 - above virtnet_disable_queue_pair which I assume a future queue
   API implementor would need to call for ndo_queue_stop

I get the feeling you have a much better suggestion in mind though
:)

[1]: https://lore.kernel.org/netdev/CACGkMEvWuRjBbc3PvOUpDFkjcby5QNLw5hA_FpNSPyWjkEXD_Q@mail.gmail.com/
Jakub Kicinski March 6, 2025, 2:21 a.m. UTC | #12
On Wed, 5 Mar 2025 17:42:35 -0800 Joe Damato wrote:
> Two spots that come to mind are:
>  - in virtnet_probe where all the other netdev ops are plumbed
>    through, or
>  - above virtnet_disable_queue_pair which I assume a future queue
>    API implementor would need to call for ndo_queue_stop

I'd put it next to some call which will have to be inspected.
Normally we change napi_disable() to napi_disable_locked()
for drivers using the instance lock, so maybe on the napi_disable()
line in the refill?
Joe Damato March 6, 2025, 5 p.m. UTC | #13
On Wed, Mar 05, 2025 at 06:21:18PM -0800, Jakub Kicinski wrote:
> On Wed, 5 Mar 2025 17:42:35 -0800 Joe Damato wrote:
> > Two spots that come to mind are:
> >  - in virtnet_probe where all the other netdev ops are plumbed
> >    through, or
> >  - above virtnet_disable_queue_pair which I assume a future queue
> >    API implementor would need to call for ndo_queue_stop
> 
> I'd put it next to some call which will have to be inspected.
> Normally we change napi_disable() to napi_disable_locked()
> for drivers using the instance lock, so maybe on the napi_disable()
> line in the refill? 

Sure, that seems reasonable to me.

Does this comment seem reasonable? I tried to distill what you said
in your previous message (thanks for the guidance, btw):

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index d6c8fe670005..fe5f6313d422 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2883,6 +2883,18 @@ static void refill_work(struct work_struct *work)
        for (i = 0; i < vi->curr_queue_pairs; i++) {
                struct receive_queue *rq = &vi->rq[i];

+               /*
+                * When queue API support is added in the future and the call
+                * below becomes napi_disable_locked, this driver will need to
+                * be refactored.
+                *
+                * One possible solution would be to:
+                *   - cancel refill_work with cancel_delayed_work (note: non-sync)
+                *   - cancel refill_work with cancel_delayed_work_sync in
+                *     virtnet_remove after the netdev is unregistered
+                *   - wrap all of the work in a lock (perhaps vi->refill_lock?)
+                *   - check netif_running() and return early to avoid a race
+                */
Jakub Kicinski March 6, 2025, 6:21 p.m. UTC | #14
On Thu, 6 Mar 2025 09:00:02 -0800 Joe Damato wrote:
> +                *   - wrap all of the work in a lock (perhaps vi->refill_lock?)
> +                *   - check netif_running() and return early to avoid a race
> +                */

probably netdev instance lock is better here, as it will also
protect the return value of netif_running(). IOW we need to
base the "is the device up" test on some state that's protected
by the lock we take.
diff mbox series

Patch

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index e578885c1093..76dcd65ec0f2 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2823,13 +2823,18 @@  static void virtnet_napi_do_enable(struct virtqueue *vq,
 
 static void virtnet_napi_enable(struct receive_queue *rq)
 {
+	struct virtnet_info *vi = rq->vq->vdev->priv;
+	int qidx = vq2rxq(rq->vq);
+
 	virtnet_napi_do_enable(rq->vq, &rq->napi);
+	netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, &rq->napi);
 }
 
 static void virtnet_napi_tx_enable(struct send_queue *sq)
 {
 	struct virtnet_info *vi = sq->vq->vdev->priv;
 	struct napi_struct *napi = &sq->napi;
+	int qidx = vq2txq(sq->vq);
 
 	if (!napi->weight)
 		return;
@@ -2843,20 +2848,28 @@  static void virtnet_napi_tx_enable(struct send_queue *sq)
 	}
 
 	virtnet_napi_do_enable(sq->vq, napi);
+	netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, napi);
 }
 
 static void virtnet_napi_tx_disable(struct send_queue *sq)
 {
+	struct virtnet_info *vi = sq->vq->vdev->priv;
 	struct napi_struct *napi = &sq->napi;
+	int qidx = vq2txq(sq->vq);
 
-	if (napi->weight)
+	if (napi->weight) {
+		netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_TX, NULL);
 		napi_disable(napi);
+	}
 }
 
 static void virtnet_napi_disable(struct receive_queue *rq)
 {
+	struct virtnet_info *vi = rq->vq->vdev->priv;
 	struct napi_struct *napi = &rq->napi;
+	int qidx = vq2rxq(rq->vq);
 
+	netif_queue_set_napi(vi->dev, qidx, NETDEV_QUEUE_TYPE_RX, NULL);
 	napi_disable(napi);
 }
 
@@ -2870,9 +2883,15 @@  static void refill_work(struct work_struct *work)
 	for (i = 0; i < vi->curr_queue_pairs; i++) {
 		struct receive_queue *rq = &vi->rq[i];
 
+		rtnl_lock();
 		virtnet_napi_disable(rq);
+		rtnl_unlock();
+
 		still_empty = !try_fill_recv(vi, rq, GFP_KERNEL);
+
+		rtnl_lock();
 		virtnet_napi_enable(rq);
+		rtnl_unlock();
 
 		/* In theory, this can happen: if we don't get any buffers in
 		 * we will *never* try to fill again.
@@ -5650,8 +5669,11 @@  static void virtnet_freeze_down(struct virtio_device *vdev)
 	netif_tx_lock_bh(vi->dev);
 	netif_device_detach(vi->dev);
 	netif_tx_unlock_bh(vi->dev);
-	if (netif_running(vi->dev))
+	if (netif_running(vi->dev)) {
+		rtnl_lock();
 		virtnet_close(vi->dev);
+		rtnl_unlock();
+	}
 }
 
 static int init_vqs(struct virtnet_info *vi);
@@ -5671,7 +5693,9 @@  static int virtnet_restore_up(struct virtio_device *vdev)
 	enable_rx_mode_work(vi);
 
 	if (netif_running(vi->dev)) {
+		rtnl_lock();
 		err = virtnet_open(vi->dev);
+		rtnl_unlock();
 		if (err)
 			return err;
 	}