diff mbox series

[net-next,v8,02/10] net: Add queue and napi association

Message ID 170018380870.3767.15478317180336448511.stgit@anambiarhost.jf.intel.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series Introduce queue and NAPI support in netdev-genl (Was: Introduce NAPI queues support) | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 5300 this patch: 5300
netdev/cc_maintainers warning 2 maintainers not CCed: edumazet@google.com daniel@iogearbox.net
netdev/build_clang success Errors and warnings before: 1378 this patch: 1378
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 5632 this patch: 5632
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 84 lines checked
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

Nambiar, Amritha Nov. 17, 2023, 1:16 a.m. UTC
Add the napi pointer in netdev queue for tracking the napi
instance for each queue. This achieves the queue<->napi mapping.

Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
Reviewed-by: Sridhar Samudrala <sridhar.samudrala@intel.com>
---
 include/linux/netdevice.h     |   11 ++++++++++
 include/net/netdev_rx_queue.h |    4 ++++
 net/core/dev.c                |   45 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 60 insertions(+)

Comments

Jakub Kicinski Nov. 20, 2023, 11:54 p.m. UTC | #1
On Thu, 16 Nov 2023 17:16:48 -0800 Amritha Nambiar wrote:
> Add the napi pointer in netdev queue for tracking the napi
> instance for each queue. This achieves the queue<->napi mapping.

I took the series for a spin. I'll send a bnxt patch in a separate
reply, please add it to the series.

Three high level comments:

 - the netif_queue_set_napi() vs __netif_queue_set_napi() gave me pause;
   developers may be used to calling netif_*() functions from open/stop
   handlers, without worrying about locking.
   I think that netif_queue_set_napi() should assume rtnl_lock was
   already taken, as it will be in 90% of cases. A rare driver which
   does not hold it should take it locally for now.

 - drivers don't set real_num_*_queues to 0 when they go down,
   currently. So even tho we don't list any disabled queues when
   device is UP, we list queues when device is down.
   I mean:

   $ ifup eth0
   $ ethtool -L eth0 combined 4
   $ ./get-queues my-device
   ... will list 4 rx and 4 rx queues ...
   $ ethtool -L eth0 combined 2
   $ ./get-queues my-device
   ... will list 2 rx and 2 rx queues ...
   $ ifdown eth0
   $ ./get-queues my-device
   ... will list 2 rx and 2 rx queues ...
   ... even tho practically speaking there are no active queues ...

   I think we should skip listing queue and NAPI info of devices which
   are DOWN. Do you have any use case which would need looking at those?

 - We need a way to detach queues form NAPI. This is sort-of related to
   the above, maybe its not as crucial once we skip DOWN devices but
   nonetheless for symmetry we should let the driver clear the NAPI
   pointer. NAPIs may be allocated dynamically, the queue::napi pointer
   may get stale.

I hacked together the following for my local testing, feel free to fold
appropriate parts into your patches:

diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 1a0603b3529d..2ed7a3aeec40 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -2948,10 +2948,11 @@ static void
 ice_queue_set_napi(unsigned int queue_index, enum netdev_queue_type type,
 		   struct napi_struct *napi, bool locked)
 {
-	if (locked)
-		__netif_queue_set_napi(queue_index, type, napi);
-	else
-		netif_queue_set_napi(queue_index, type, napi);
+	if (!locked)
+		rtnl_lock();
+	netif_queue_set_napi(napi->dev, queue_index, type, napi);
+	if (!locked)
+		rtnl_unlock();
 }
 
 /**
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index dbc4ea74b8d6..e09a039a092a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2644,13 +2644,10 @@ static inline void *netdev_priv(const struct net_device *dev)
  */
 #define SET_NETDEV_DEVTYPE(net, devtype)	((net)->dev.type = (devtype))
 
-void netif_queue_set_napi(unsigned int queue_index, enum netdev_queue_type type,
+void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index,
+			  enum netdev_queue_type type,
 			  struct napi_struct *napi);
 
-void __netif_queue_set_napi(unsigned int queue_index,
-			    enum netdev_queue_type type,
-			    struct napi_struct *napi);
-
 static inline void netif_napi_set_irq(struct napi_struct *napi, int irq)
 {
 	napi->irq = irq;
diff --git a/net/core/dev.c b/net/core/dev.c
index 99ca59e18abf..bb93240c69b9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6400,25 +6400,27 @@ int dev_set_threaded(struct net_device *dev, bool threaded)
 EXPORT_SYMBOL(dev_set_threaded);
 
 /**
- * __netif_queue_set_napi - Associate queue with the napi
+ * netif_queue_set_napi - Associate queue with the napi
+ * @dev: device to which NAPI and queue belong
  * @queue_index: Index of queue
  * @type: queue type as RX or TX
- * @napi: NAPI context
+ * @napi: NAPI context, pass NULL to clear previously set NAPI
  *
  * Set queue with its corresponding napi context. This should be done after
  * registering the NAPI handler for the queue-vector and the queues have been
  * mapped to the corresponding interrupt vector.
  */
-void __netif_queue_set_napi(unsigned int queue_index,
-			    enum netdev_queue_type type,
-			    struct napi_struct *napi)
+void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index,
+			  enum netdev_queue_type type,
+			  struct napi_struct *napi)
 {
-	struct net_device *dev = napi->dev;
 	struct netdev_rx_queue *rxq;
 	struct netdev_queue *txq;
 
-	if (WARN_ON_ONCE(!dev))
+	if (WARN_ON_ONCE(napi && !napi->dev))
 		return;
+	if (dev->reg_state >= NETREG_REGISTERED)
+		ASSERT_RTNL();
 
 	switch (type) {
 	case NETDEV_QUEUE_TYPE_RX:
@@ -6433,15 +6435,6 @@ void __netif_queue_set_napi(unsigned int queue_index,
 		return;
 	}
 }
-EXPORT_SYMBOL(__netif_queue_set_napi);
-
-void netif_queue_set_napi(unsigned int queue_index, enum netdev_queue_type type,
-			  struct napi_struct *napi)
-{
-	rtnl_lock();
-	__netif_queue_set_napi(queue_index, type, napi);
-	rtnl_unlock();
-}
 EXPORT_SYMBOL(netif_queue_set_napi);
 
 void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
Nambiar, Amritha Nov. 21, 2023, 9:26 p.m. UTC | #2
On 11/20/2023 3:54 PM, Jakub Kicinski wrote:
> On Thu, 16 Nov 2023 17:16:48 -0800 Amritha Nambiar wrote:
>> Add the napi pointer in netdev queue for tracking the napi
>> instance for each queue. This achieves the queue<->napi mapping.
> 
> I took the series for a spin. I'll send a bnxt patch in a separate
> reply, please add it to the series.
> 

Sure, will add the bnxt patch to v9.

> Three high level comments:
> 
>   - the netif_queue_set_napi() vs __netif_queue_set_napi() gave me pause;
>     developers may be used to calling netif_*() functions from open/stop
>     handlers, without worrying about locking.
>     I think that netif_queue_set_napi() should assume rtnl_lock was
>     already taken, as it will be in 90% of cases. A rare driver which
>     does not hold it should take it locally for now.
> 

Okay, will make these changes in v9.

>   - drivers don't set real_num_*_queues to 0 when they go down,
>     currently. So even tho we don't list any disabled queues when
>     device is UP, we list queues when device is down.
>     I mean:
> 
>     $ ifup eth0
>     $ ethtool -L eth0 combined 4
>     $ ./get-queues my-device
>     ... will list 4 rx and 4 rx queues ...
>     $ ethtool -L eth0 combined 2
>     $ ./get-queues my-device
>     ... will list 2 rx and 2 rx queues ...
>     $ ifdown eth0
>     $ ./get-queues my-device
>     ... will list 2 rx and 2 rx queues ...
>     ... even tho practically speaking there are no active queues ...
> 
>     I think we should skip listing queue and NAPI info of devices which
>     are DOWN. Do you have any use case which would need looking at those?
> 

So, currently, 'ethtool --show-channels' and 'ps -aef | grep napi' would 
list all the queues and NAPIs if the device is DOWN. I think what you 
are pointing at is:
<ifdown and ./get-queues> should show something similar to <ethtool -L 
eth0 combined 0 (0 is not valid... but almost to that effect) and 
./get-queues>.

But, 'ethtool -L' actually deletes the queues vs 'device DOWN' which 
only disables or makes the queues inactive.

Maybe as a follow-up patch, would it be better to have an additional 
parameter called 'state' for 'queues-get' and 'napi-get' that indicates 
the queues or NAPIs as active/inactive. The queue/NAPI state would be 
inherited from the device state. This way we can still list the 
queues/NAPIs when the device is down, set/update parameter 
configurations and then bring UP the device (in case where we stop 
traffic and tune parameters).

Also, if in future, we have the interface to tune parameters per-queue 
without full reset (of all queues or the device itself, as the hardware 
supports this), the 'state' would report this for specific queue as 
active/inactive. Maybe:
'queue-set' can set 'state = active' for a single queue '{"ifindex": 12, 
"id": 0, "type": 0}' and start a queue.

>   - We need a way to detach queues form NAPI. This is sort-of related to
>     the above, maybe its not as crucial once we skip DOWN devices but
>     nonetheless for symmetry we should let the driver clear the NAPI
>     pointer. NAPIs may be allocated dynamically, the queue::napi pointer
>     may get stale.

Okay, will handle this in v9.

> 
> I hacked together the following for my local testing, feel free to fold
> appropriate parts into your patches:
> 

Sure, thank you!

> diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
> index 1a0603b3529d..2ed7a3aeec40 100644
> --- a/drivers/net/ethernet/intel/ice/ice_lib.c
> +++ b/drivers/net/ethernet/intel/ice/ice_lib.c
> @@ -2948,10 +2948,11 @@ static void
>   ice_queue_set_napi(unsigned int queue_index, enum netdev_queue_type type,
>   		   struct napi_struct *napi, bool locked)
>   {
> -	if (locked)
> -		__netif_queue_set_napi(queue_index, type, napi);
> -	else
> -		netif_queue_set_napi(queue_index, type, napi);
> +	if (!locked)
> +		rtnl_lock();
> +	netif_queue_set_napi(napi->dev, queue_index, type, napi);
> +	if (!locked)
> +		rtnl_unlock();
>   }
>   
>   /**
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index dbc4ea74b8d6..e09a039a092a 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -2644,13 +2644,10 @@ static inline void *netdev_priv(const struct net_device *dev)
>    */
>   #define SET_NETDEV_DEVTYPE(net, devtype)	((net)->dev.type = (devtype))
>   
> -void netif_queue_set_napi(unsigned int queue_index, enum netdev_queue_type type,
> +void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index,
> +			  enum netdev_queue_type type,
>   			  struct napi_struct *napi);
>   
> -void __netif_queue_set_napi(unsigned int queue_index,
> -			    enum netdev_queue_type type,
> -			    struct napi_struct *napi);
> -
>   static inline void netif_napi_set_irq(struct napi_struct *napi, int irq)
>   {
>   	napi->irq = irq;
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 99ca59e18abf..bb93240c69b9 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -6400,25 +6400,27 @@ int dev_set_threaded(struct net_device *dev, bool threaded)
>   EXPORT_SYMBOL(dev_set_threaded);
>   
>   /**
> - * __netif_queue_set_napi - Associate queue with the napi
> + * netif_queue_set_napi - Associate queue with the napi
> + * @dev: device to which NAPI and queue belong
>    * @queue_index: Index of queue
>    * @type: queue type as RX or TX
> - * @napi: NAPI context
> + * @napi: NAPI context, pass NULL to clear previously set NAPI
>    *
>    * Set queue with its corresponding napi context. This should be done after
>    * registering the NAPI handler for the queue-vector and the queues have been
>    * mapped to the corresponding interrupt vector.
>    */
> -void __netif_queue_set_napi(unsigned int queue_index,
> -			    enum netdev_queue_type type,
> -			    struct napi_struct *napi)
> +void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index,
> +			  enum netdev_queue_type type,
> +			  struct napi_struct *napi)
>   {
> -	struct net_device *dev = napi->dev;
>   	struct netdev_rx_queue *rxq;
>   	struct netdev_queue *txq;
>   
> -	if (WARN_ON_ONCE(!dev))
> +	if (WARN_ON_ONCE(napi && !napi->dev))
>   		return;
> +	if (dev->reg_state >= NETREG_REGISTERED)
> +		ASSERT_RTNL();
>   
>   	switch (type) {
>   	case NETDEV_QUEUE_TYPE_RX:
> @@ -6433,15 +6435,6 @@ void __netif_queue_set_napi(unsigned int queue_index,
>   		return;
>   	}
>   }
> -EXPORT_SYMBOL(__netif_queue_set_napi);
> -
> -void netif_queue_set_napi(unsigned int queue_index, enum netdev_queue_type type,
> -			  struct napi_struct *napi)
> -{
> -	rtnl_lock();
> -	__netif_queue_set_napi(queue_index, type, napi);
> -	rtnl_unlock();
> -}
>   EXPORT_SYMBOL(netif_queue_set_napi);
>   
>   void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
Jakub Kicinski Nov. 21, 2023, 10:22 p.m. UTC | #3
On Tue, 21 Nov 2023 13:26:27 -0800 Nambiar, Amritha wrote:
> So, currently, 'ethtool --show-channels' and 'ps -aef | grep napi' would 
> list all the queues and NAPIs if the device is DOWN. I think what you 
> are pointing at is:
> <ifdown and ./get-queues> should show something similar to <ethtool -L 
> eth0 combined 0 (0 is not valid... but almost to that effect) and 
> ./get-queues>.
> 
> But, 'ethtool -L' actually deletes the queues vs 'device DOWN' which 
> only disables or makes the queues inactive.
> 
> Maybe as a follow-up patch, would it be better to have an additional 
> parameter called 'state' for 'queues-get' and 'napi-get' that indicates 
> the queues or NAPIs as active/inactive. The queue/NAPI state would be 
> inherited from the device state. This way we can still list the 
> queues/NAPIs when the device is down, set/update parameter 
> configurations and then bring UP the device (in case where we stop 
> traffic and tune parameters).
> 
> Also, if in future, we have the interface to tune parameters per-queue 
> without full reset (of all queues or the device itself, as the hardware 
> supports this), the 'state' would report this for specific queue as 
> active/inactive. Maybe:
> 'queue-set' can set 'state = active' for a single queue '{"ifindex": 12, 
> "id": 0, "type": 0}' and start a queue.

To reiterate - the thing I find odd about the current situation is that
we hide the queues if they get disabled by lowering ethtool -L, but we
don't hide them when the entire interface is down. When the entire
interface is down there should be no queues, right?

Differently put - what logic that'd make sense to the user do we apply
when trying to decide if the queue is visible? < real_num_queues is
an implementation detail.

We can list all the queues, always, too. No preference. I just want to
make sure that the rules are clear and not very dependent on current
implementation and not different driver to driver.
Nambiar, Amritha Nov. 22, 2023, 12:08 a.m. UTC | #4
On 11/21/2023 2:22 PM, Jakub Kicinski wrote:
> On Tue, 21 Nov 2023 13:26:27 -0800 Nambiar, Amritha wrote:
>> So, currently, 'ethtool --show-channels' and 'ps -aef | grep napi' would
>> list all the queues and NAPIs if the device is DOWN. I think what you
>> are pointing at is:
>> <ifdown and ./get-queues> should show something similar to <ethtool -L
>> eth0 combined 0 (0 is not valid... but almost to that effect) and
>> ./get-queues>.
>>
>> But, 'ethtool -L' actually deletes the queues vs 'device DOWN' which
>> only disables or makes the queues inactive.
>>
>> Maybe as a follow-up patch, would it be better to have an additional
>> parameter called 'state' for 'queues-get' and 'napi-get' that indicates
>> the queues or NAPIs as active/inactive. The queue/NAPI state would be
>> inherited from the device state. This way we can still list the
>> queues/NAPIs when the device is down, set/update parameter
>> configurations and then bring UP the device (in case where we stop
>> traffic and tune parameters).
>>
>> Also, if in future, we have the interface to tune parameters per-queue
>> without full reset (of all queues or the device itself, as the hardware
>> supports this), the 'state' would report this for specific queue as
>> active/inactive. Maybe:
>> 'queue-set' can set 'state = active' for a single queue '{"ifindex": 12,
>> "id": 0, "type": 0}' and start a queue.
> 
> To reiterate - the thing I find odd about the current situation is that
> we hide the queues if they get disabled by lowering ethtool -L, but we
> don't hide them when the entire interface is down. When the entire
> interface is down there should be no queues, right?
> 

"When the entire interface is down there should be no queues" - 
currently, 'ethtool --show-channels' reports all the available queues 
when interface is DOWN (for all drivers, as drivers don't set 
real_num_queues to 0). Is this incorrect?

> Differently put - what logic that'd make sense to the user do we apply
> when trying to decide if the queue is visible? < real_num_queues is
> an implementation detail.
> 
> We can list all the queues, always, too. No preference. I just want to
> make sure that the rules are clear and not very dependent on current
> implementation and not different driver to driver.

I think currently, the queue dump results when the device is down aligns 
for both APIs (netdev-genl queue-get and ethtool show-channels) for all 
the drivers. If we decide to NOT show queues/NAPIs (with netdev-genl) 
when the device is down, the user would see conflicting results, the 
dump results with netdev-genl APIs would be different from what 'ethtool 
--show-channels' and 'ps -aef | grep napi' reports.
Jakub Kicinski Nov. 22, 2023, 1:15 a.m. UTC | #5
On Tue, 21 Nov 2023 16:08:07 -0800 Nambiar, Amritha wrote:
> > To reiterate - the thing I find odd about the current situation is that
> > we hide the queues if they get disabled by lowering ethtool -L, but we
> > don't hide them when the entire interface is down. When the entire
> > interface is down there should be no queues, right?
> 
> "When the entire interface is down there should be no queues" - 
> currently, 'ethtool --show-channels' reports all the available queues 
> when interface is DOWN

That's not the same. ethtool -l shows the configuration not 
the instantiated objects. ethtool -a will also show you the
pause settings even when cable is not plugged in.
sysfs objects of the queues are still exposed for devices which 
are down, that's true. But again, that's to expose the config.

> > Differently put - what logic that'd make sense to the user do we apply
> > when trying to decide if the queue is visible? < real_num_queues is
> > an implementation detail.
> > 
> > We can list all the queues, always, too. No preference. I just want to
> > make sure that the rules are clear and not very dependent on current
> > implementation and not different driver to driver.  
> 
> I think currently, the queue dump results when the device is down aligns 
> for both APIs (netdev-genl queue-get and ethtool show-channels) for all 
> the drivers. If we decide to NOT show queues/NAPIs (with netdev-genl) 
> when the device is down, the user would see conflicting results, the 
> dump results with netdev-genl APIs would be different from what 'ethtool 
> --show-channels' and 'ps -aef | grep napi' reports.

We should make the distinction between configuration and state of
instantiated objects clear before we get too far. Say we support
setting ring length for a specific queue. Global setting is 512,
queue X wants 256. How do we remove the override for queue X?
By setting it to 512? What if we want 512, and the default shifts
to something else? We'll need an explicit "reset" command.

I think it may be cleaner to keep queue-get as state of queues,
and configuration / settings / rules completely separate.

Am I wrong? Willem?
Nambiar, Amritha Nov. 22, 2023, 9:28 p.m. UTC | #6
On 11/21/2023 5:15 PM, Jakub Kicinski wrote:
> On Tue, 21 Nov 2023 16:08:07 -0800 Nambiar, Amritha wrote:
>>> To reiterate - the thing I find odd about the current situation is that
>>> we hide the queues if they get disabled by lowering ethtool -L, but we
>>> don't hide them when the entire interface is down. When the entire
>>> interface is down there should be no queues, right?
>>
>> "When the entire interface is down there should be no queues" -
>> currently, 'ethtool --show-channels' reports all the available queues
>> when interface is DOWN
> 
> That's not the same. ethtool -l shows the configuration not
> the instantiated objects. ethtool -a will also show you the
> pause settings even when cable is not plugged in.
> sysfs objects of the queues are still exposed for devices which
> are down, that's true. But again, that's to expose the config.
> 
>>> Differently put - what logic that'd make sense to the user do we apply
>>> when trying to decide if the queue is visible? < real_num_queues is
>>> an implementation detail.
>>>
>>> We can list all the queues, always, too. No preference. I just want to
>>> make sure that the rules are clear and not very dependent on current
>>> implementation and not different driver to driver.
>>
>> I think currently, the queue dump results when the device is down aligns
>> for both APIs (netdev-genl queue-get and ethtool show-channels) for all
>> the drivers. If we decide to NOT show queues/NAPIs (with netdev-genl)
>> when the device is down, the user would see conflicting results, the
>> dump results with netdev-genl APIs would be different from what 'ethtool
>> --show-channels' and 'ps -aef | grep napi' reports.
> 
> We should make the distinction between configuration and state of
> instantiated objects clear before we get too far. Say we support
> setting ring length for a specific queue. Global setting is 512,
> queue X wants 256. How do we remove the override for queue X?
> By setting it to 512? What if we want 512, and the default shifts
> to something else? We'll need an explicit "reset" command.
> 
> I think it may be cleaner to keep queue-get as state of queues,
> and configuration / settings / rules completely separate.
> 
> Am I wrong? Willem?
> 

So, the instantiated netdev objects and their states are more aligned to 
the netdev-level than the actual configuration in the hardware.

WRT to showing queues when the device is down, I see your point, the 
hardware has those queues available, but when the interface is down, 
those queues are not valid at the netdev-level and need not be reported 
to the user. So, it is worth showing no results.
Also, my concern about showing the queues when the device is down, to do 
the user-configuration and then bring up the device, does not hold 
strong, as configuring when the device is up would also need a reset to 
make the updates in the hardware.

Trying to understand this distinction bit more:
So, netdev-genl queue-get shows state of queue objects as reported from 
the netdev level. Now, unless there's the queue-set command changing the 
configuration, the state of queue-objects would match the hardware 
configurations.
When the user changes the configuration with a queue-set command:
- queue-get would report the new updates (as obtained from the netdev).
- The updates would not be reflected in the hardware till a reset is 
issued. At this point, ethtool or others would report the older 
configuration (before reset).
- After reset, the state of queue objects from queue-get would match the 
actual hardware configuration.

I agree, an explicit "reset" user-command would be great. This way all 
the set operations for the netdev objects (queue, NAPI, page pool etc.) 
would stay at the netdev level without needing ndo_op for each, and then 
the "reset" command can trigger the ndo callback and actuate the 
hardware changes.
Jakub Kicinski Nov. 22, 2023, 10 p.m. UTC | #7
On Wed, 22 Nov 2023 13:28:19 -0800 Nambiar, Amritha wrote:
> Trying to understand this distinction bit more:
> So, netdev-genl queue-get shows state of queue objects as reported from 
> the netdev level. Now, unless there's the queue-set command changing the 
> configuration, the state of queue-objects would match the hardware 
> configurations.
> When the user changes the configuration with a queue-set command:
> - queue-get would report the new updates (as obtained from the netdev).
> - The updates would not be reflected in the hardware till a reset is 
> issued. At this point, ethtool or others would report the older 
> configuration (before reset).
> - After reset, the state of queue objects from queue-get would match the 
> actual hardware configuration.
> 
> I agree, an explicit "reset" user-command would be great. This way all 
> the set operations for the netdev objects (queue, NAPI, page pool etc.) 
> would stay at the netdev level without needing ndo_op for each, and then 
> the "reset" command can trigger the ndo callback and actuate the 
> hardware changes.

How the changes are applied is a separate topic. I was only talking
about the fact that if the settings are controllable both at the device
level and queue level - the queue state is a result of combining device
settings with queue settings.
Nambiar, Amritha Nov. 23, 2023, 12:56 a.m. UTC | #8
On 11/22/2023 2:00 PM, Jakub Kicinski wrote:
> On Wed, 22 Nov 2023 13:28:19 -0800 Nambiar, Amritha wrote:
>> Trying to understand this distinction bit more:
>> So, netdev-genl queue-get shows state of queue objects as reported from
>> the netdev level. Now, unless there's the queue-set command changing the
>> configuration, the state of queue-objects would match the hardware
>> configurations.
>> When the user changes the configuration with a queue-set command:
>> - queue-get would report the new updates (as obtained from the netdev).
>> - The updates would not be reflected in the hardware till a reset is
>> issued. At this point, ethtool or others would report the older
>> configuration (before reset).
>> - After reset, the state of queue objects from queue-get would match the
>> actual hardware configuration.
>>
>> I agree, an explicit "reset" user-command would be great. This way all
>> the set operations for the netdev objects (queue, NAPI, page pool etc.)
>> would stay at the netdev level without needing ndo_op for each, and then
>> the "reset" command can trigger the ndo callback and actuate the
>> hardware changes.
> 
> How the changes are applied is a separate topic. I was only talking
> about the fact that if the settings are controllable both at the device
> level and queue level - the queue state is a result of combining device
> settings with queue settings.

Okay, makes sense. Will fix in v9 and skip listing queues and NAPIs of 
devices which are DOWN.
diff mbox series

Patch

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a16c9cc063fe..8c8010e78240 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -665,6 +665,10 @@  struct netdev_queue {
 #ifdef CONFIG_XDP_SOCKETS
 	struct xsk_buff_pool    *pool;
 #endif
+	/* NAPI instance for the queue
+	 * Readers and writers must hold RTNL
+	 */
+	struct napi_struct      *napi;
 /*
  * write-mostly part
  */
@@ -2639,6 +2643,13 @@  static inline void *netdev_priv(const struct net_device *dev)
  */
 #define SET_NETDEV_DEVTYPE(net, devtype)	((net)->dev.type = (devtype))
 
+void netif_queue_set_napi(unsigned int queue_index, enum netdev_queue_type type,
+			  struct napi_struct *napi);
+
+void __netif_queue_set_napi(unsigned int queue_index,
+			    enum netdev_queue_type type,
+			    struct napi_struct *napi);
+
 /* Default NAPI poll() weight
  * Device drivers are strongly advised to not use bigger value
  */
diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h
index cdcafb30d437..aa1716fb0e53 100644
--- a/include/net/netdev_rx_queue.h
+++ b/include/net/netdev_rx_queue.h
@@ -21,6 +21,10 @@  struct netdev_rx_queue {
 #ifdef CONFIG_XDP_SOCKETS
 	struct xsk_buff_pool            *pool;
 #endif
+	/* NAPI instance for the queue
+	 * Readers and writers must hold RTNL
+	 */
+	struct napi_struct		*napi;
 } ____cacheline_aligned_in_smp;
 
 /*
diff --git a/net/core/dev.c b/net/core/dev.c
index af53f6d838ce..cdbc916d0208 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6400,6 +6400,51 @@  int dev_set_threaded(struct net_device *dev, bool threaded)
 }
 EXPORT_SYMBOL(dev_set_threaded);
 
+/**
+ * __netif_queue_set_napi - Associate queue with the napi
+ * @queue_index: Index of queue
+ * @type: queue type as RX or TX
+ * @napi: NAPI context
+ *
+ * Set queue with its corresponding napi context. This should be done after
+ * registering the NAPI handler for the queue-vector and the queues have been
+ * mapped to the corresponding interrupt vector.
+ */
+void __netif_queue_set_napi(unsigned int queue_index,
+			    enum netdev_queue_type type,
+			    struct napi_struct *napi)
+{
+	struct net_device *dev = napi->dev;
+	struct netdev_rx_queue *rxq;
+	struct netdev_queue *txq;
+
+	if (WARN_ON_ONCE(!dev))
+		return;
+
+	switch (type) {
+	case NETDEV_QUEUE_TYPE_RX:
+		rxq = __netif_get_rx_queue(dev, queue_index);
+		rxq->napi = napi;
+		return;
+	case NETDEV_QUEUE_TYPE_TX:
+		txq = netdev_get_tx_queue(dev, queue_index);
+		txq->napi = napi;
+		return;
+	default:
+		return;
+	}
+}
+EXPORT_SYMBOL(__netif_queue_set_napi);
+
+void netif_queue_set_napi(unsigned int queue_index, enum netdev_queue_type type,
+			  struct napi_struct *napi)
+{
+	rtnl_lock();
+	__netif_queue_set_napi(queue_index, type, napi);
+	rtnl_unlock();
+}
+EXPORT_SYMBOL(netif_queue_set_napi);
+
 void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
 			   int (*poll)(struct napi_struct *, int), int weight)
 {