diff mbox series

[v4,net-next,6/7] net: ethtool: add a mutex protecting RSS contexts

Message ID b5d7b8e243178d63643c8efc1f1c48b3b2468dc7.1695838185.git.ecree.xilinx@gmail.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series ethtool: track custom RSS contexts in the core | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 3216 this patch: 3216
netdev/cc_maintainers warning 3 maintainers not CCed: d-tatianin@yandex-team.ru daniel@iogearbox.net vladimir.oltean@nxp.com
netdev/build_clang success Errors and warnings before: 1558 this patch: 1558
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 3465 this patch: 3465
netdev/checkpatch warning CHECK: struct mutex definition without comment
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

edward.cree@amd.com Sept. 27, 2023, 6:13 p.m. UTC
From: Edward Cree <ecree.xilinx@gmail.com>

While this is not needed to serialise the ethtool entry points (which
 are all under RTNL), drivers may have cause to asynchronously access
 dev->ethtool->rss_ctx; taking dev->ethtool->rss_lock allows them to
 do this safely without needing to take the RTNL.

Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>
---
 include/linux/ethtool.h | 3 +++
 net/core/dev.c          | 5 +++++
 net/ethtool/ioctl.c     | 7 +++++++
 3 files changed, 15 insertions(+)

Comments

Jacob Keller Sept. 29, 2023, 6:27 p.m. UTC | #1
On 9/27/2023 11:13 AM, edward.cree@amd.com wrote:
> From: Edward Cree <ecree.xilinx@gmail.com>
> 
> While this is not needed to serialise the ethtool entry points (which
>  are all under RTNL), drivers may have cause to asynchronously access
>  dev->ethtool->rss_ctx; taking dev->ethtool->rss_lock allows them to
>  do this safely without needing to take the RTNL.
> 
> Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>

Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>

> ---
>  include/linux/ethtool.h | 3 +++
>  net/core/dev.c          | 5 +++++
>  net/ethtool/ioctl.c     | 7 +++++++
>  3 files changed, 15 insertions(+)
> 
> diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
> index c8963bde9289..d15a21bd6f12 100644
> --- a/include/linux/ethtool.h
> +++ b/include/linux/ethtool.h
> @@ -1026,11 +1026,14 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev,
>  /**
>   * struct ethtool_netdev_state - per-netdevice state for ethtool features
>   * @rss_ctx:		XArray of custom RSS contexts
> + * @rss_lock:		Protects entries in @rss_ctx.  May be taken from
> + *			within RTNL.
>   * @rss_ctx_max_id:	maximum (exclusive) supported RSS context ID
>   * @wol_enabled:	Wake-on-LAN is enabled
>   */
>  struct ethtool_netdev_state {
>  	struct xarray		rss_ctx;
> +	struct mutex		rss_lock;
>  	u32			rss_ctx_max_id;
>  	u32			wol_enabled:1;
>  };
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 69579d9cd7ba..c57456ed4be8 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -10074,6 +10074,7 @@ int register_netdevice(struct net_device *dev)
>  
>  	/* rss ctx ID 0 is reserved for the default context, start from 1 */
>  	xa_init_flags(&dev->ethtool->rss_ctx, XA_FLAGS_ALLOC1);
> +	mutex_init(&dev->ethtool->rss_lock);
>  
>  	spin_lock_init(&dev->addr_list_lock);
>  	netdev_set_addr_lockdep_class(dev);
> @@ -10882,6 +10883,7 @@ static void netdev_rss_contexts_free(struct net_device *dev)
>  	struct ethtool_rxfh_context *ctx;
>  	unsigned long context;
>  
> +	mutex_lock(&dev->ethtool->rss_lock);
>  	if (dev->ethtool_ops->create_rxfh_context ||
>  	    dev->ethtool_ops->set_rxfh_context)
>  		xa_for_each(&dev->ethtool->rss_ctx, context, ctx) {
> @@ -10903,6 +10905,7 @@ static void netdev_rss_contexts_free(struct net_device *dev)
>  			kfree(ctx);
>  		}
>  	xa_destroy(&dev->ethtool->rss_ctx);
> +	mutex_unlock(&dev->ethtool->rss_lock);
>  }
>  
>  /**
> @@ -11016,6 +11019,8 @@ void unregister_netdevice_many_notify(struct list_head *head,
>  		if (dev->netdev_ops->ndo_uninit)
>  			dev->netdev_ops->ndo_uninit(dev);
>  
> +		mutex_destroy(&dev->ethtool->rss_lock);
> +
>  		if (skb)
>  			rtmsg_ifinfo_send(skb, dev, GFP_KERNEL, portid, nlh);
>  
> diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
> index 3920ddee3ee2..d21bbc92e6fc 100644
> --- a/net/ethtool/ioctl.c
> +++ b/net/ethtool/ioctl.c
> @@ -1258,6 +1258,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
>  	u8 *rss_config;
>  	u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]);
>  	bool create = false, delete = false;
> +	bool locked = false; /* dev->ethtool->rss_lock taken */
>  
>  	if (!ops->get_rxnfc || !ops->set_rxfh)
>  		return -EOPNOTSUPP;
> @@ -1335,6 +1336,10 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
>  		}
>  	}
>  
> +	if (rxfh.rss_context) {
> +		mutex_lock(&dev->ethtool->rss_lock);
> +		locked = true;
> +	}
>  	if (create) {
>  		if (delete) {
>  			ret = -EINVAL;
> @@ -1455,6 +1460,8 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
>  	}
>  
>  out:
> +	if (locked)
> +		mutex_unlock(&dev->ethtool->rss_lock);
>  	kfree(rss_config);
>  	return ret;
>  }
>
Martin Habets Oct. 2, 2023, 12:16 p.m. UTC | #2
On Wed, Sep 27, 2023 at 07:13:37PM +0100, edward.cree@amd.com wrote:
> From: Edward Cree <ecree.xilinx@gmail.com>
> 
> While this is not needed to serialise the ethtool entry points (which
>  are all under RTNL), drivers may have cause to asynchronously access
>  dev->ethtool->rss_ctx; taking dev->ethtool->rss_lock allows them to
>  do this safely without needing to take the RTNL.
> 
> Signed-off-by: Edward Cree <ecree.xilinx@gmail.com>

Reviewed-by: Martin Habets <habetsm.xilinx@gmail.com>

> ---
>  include/linux/ethtool.h | 3 +++
>  net/core/dev.c          | 5 +++++
>  net/ethtool/ioctl.c     | 7 +++++++
>  3 files changed, 15 insertions(+)
> 
> diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
> index c8963bde9289..d15a21bd6f12 100644
> --- a/include/linux/ethtool.h
> +++ b/include/linux/ethtool.h
> @@ -1026,11 +1026,14 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev,
>  /**
>   * struct ethtool_netdev_state - per-netdevice state for ethtool features
>   * @rss_ctx:		XArray of custom RSS contexts
> + * @rss_lock:		Protects entries in @rss_ctx.  May be taken from
> + *			within RTNL.
>   * @rss_ctx_max_id:	maximum (exclusive) supported RSS context ID
>   * @wol_enabled:	Wake-on-LAN is enabled
>   */
>  struct ethtool_netdev_state {
>  	struct xarray		rss_ctx;
> +	struct mutex		rss_lock;
>  	u32			rss_ctx_max_id;
>  	u32			wol_enabled:1;
>  };
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 69579d9cd7ba..c57456ed4be8 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -10074,6 +10074,7 @@ int register_netdevice(struct net_device *dev)
>  
>  	/* rss ctx ID 0 is reserved for the default context, start from 1 */
>  	xa_init_flags(&dev->ethtool->rss_ctx, XA_FLAGS_ALLOC1);
> +	mutex_init(&dev->ethtool->rss_lock);
>  
>  	spin_lock_init(&dev->addr_list_lock);
>  	netdev_set_addr_lockdep_class(dev);
> @@ -10882,6 +10883,7 @@ static void netdev_rss_contexts_free(struct net_device *dev)
>  	struct ethtool_rxfh_context *ctx;
>  	unsigned long context;
>  
> +	mutex_lock(&dev->ethtool->rss_lock);
>  	if (dev->ethtool_ops->create_rxfh_context ||
>  	    dev->ethtool_ops->set_rxfh_context)
>  		xa_for_each(&dev->ethtool->rss_ctx, context, ctx) {
> @@ -10903,6 +10905,7 @@ static void netdev_rss_contexts_free(struct net_device *dev)
>  			kfree(ctx);
>  		}
>  	xa_destroy(&dev->ethtool->rss_ctx);
> +	mutex_unlock(&dev->ethtool->rss_lock);
>  }
>  
>  /**
> @@ -11016,6 +11019,8 @@ void unregister_netdevice_many_notify(struct list_head *head,
>  		if (dev->netdev_ops->ndo_uninit)
>  			dev->netdev_ops->ndo_uninit(dev);
>  
> +		mutex_destroy(&dev->ethtool->rss_lock);
> +
>  		if (skb)
>  			rtmsg_ifinfo_send(skb, dev, GFP_KERNEL, portid, nlh);
>  
> diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
> index 3920ddee3ee2..d21bbc92e6fc 100644
> --- a/net/ethtool/ioctl.c
> +++ b/net/ethtool/ioctl.c
> @@ -1258,6 +1258,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
>  	u8 *rss_config;
>  	u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]);
>  	bool create = false, delete = false;
> +	bool locked = false; /* dev->ethtool->rss_lock taken */
>  
>  	if (!ops->get_rxnfc || !ops->set_rxfh)
>  		return -EOPNOTSUPP;
> @@ -1335,6 +1336,10 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
>  		}
>  	}
>  
> +	if (rxfh.rss_context) {
> +		mutex_lock(&dev->ethtool->rss_lock);
> +		locked = true;
> +	}
>  	if (create) {
>  		if (delete) {
>  			ret = -EINVAL;
> @@ -1455,6 +1460,8 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
>  	}
>  
>  out:
> +	if (locked)
> +		mutex_unlock(&dev->ethtool->rss_lock);
>  	kfree(rss_config);
>  	return ret;
>  }
Jakub Kicinski Oct. 4, 2023, 11:16 p.m. UTC | #3
On Wed, 27 Sep 2023 19:13:37 +0100 edward.cree@amd.com wrote:
> While this is not needed to serialise the ethtool entry points (which
>  are all under RTNL), drivers may have cause to asynchronously access
>  dev->ethtool->rss_ctx; taking dev->ethtool->rss_lock allows them to
>  do this safely without needing to take the RTNL.

Can we use a replay mechanism, like we do in TC offloads and VxLAN/UDP
ports? The driver which lost config can ask for the rss contexts to be
"replayed" and the core will issue a series of ->create calls for all
existing entries?

Regarding the lock itself - can we hide it under ethtool_rss_lock(dev)
/ ethtool_rss_unlock(dev) helpers?
Edward Cree Oct. 5, 2023, 8:56 p.m. UTC | #4
On 05/10/2023 00:16, Jakub Kicinski wrote:
> On Wed, 27 Sep 2023 19:13:37 +0100 edward.cree@amd.com wrote:
>> While this is not needed to serialise the ethtool entry points (which
>>  are all under RTNL), drivers may have cause to asynchronously access
>>  dev->ethtool->rss_ctx; taking dev->ethtool->rss_lock allows them to
>>  do this safely without needing to take the RTNL.
> 
> Can we use a replay mechanism, like we do in TC offloads and VxLAN/UDP
> ports? The driver which lost config can ask for the rss contexts to be
> "replayed" and the core will issue a series of ->create calls for all
> existing entries?

I like that idea, yes.  Will try to implement it for v5.
There is a question as to how the core should react if the ->create call
 then fails; see my reply to Martin on #7.

> Regarding the lock itself - can we hide it under ethtool_rss_lock(dev)
> / ethtool_rss_unlock(dev) helpers?

Sure.  If I can't get replay to work then I'll do that.

-ed
Jakub Kicinski Oct. 6, 2023, 12:07 a.m. UTC | #5
On Thu, 5 Oct 2023 21:56:47 +0100 Edward Cree wrote:
> > Can we use a replay mechanism, like we do in TC offloads and VxLAN/UDP
> > ports? The driver which lost config can ask for the rss contexts to be
> > "replayed" and the core will issue a series of ->create calls for all
> > existing entries?  
> 
> I like that idea, yes.  Will try to implement it for v5.
> There is a question as to how the core should react if the ->create call
>  then fails; see my reply to Martin on #7.

Hm. The application asked for a config which is no longer applied.
The machine needs to raise some form of an alarm or be taken out
of commission. My first thought would be to print an error message
in the core, and expect the driver to fail some devlink health
reporter.

I don't think a "broken" flag local to RSS would be monitored, there'd
be too many of such local flags throughout the APIs. devlink health
may be monitored.

> > Regarding the lock itself - can we hide it under ethtool_rss_lock(dev)
> > / ethtool_rss_unlock(dev) helpers?  
> 
> Sure.  If I can't get replay to work then I'll do that.
Edward Cree Dec. 7, 2023, 2:15 p.m. UTC | #6
On 05/10/2023 00:16, Jakub Kicinski wrote:
> On Wed, 27 Sep 2023 19:13:37 +0100 edward.cree@amd.com wrote:
>> While this is not needed to serialise the ethtool entry points (which
>>  are all under RTNL), drivers may have cause to asynchronously access
>>  dev->ethtool->rss_ctx; taking dev->ethtool->rss_lock allows them to
>>  do this safely without needing to take the RTNL.
> 
> Can we use a replay mechanism, like we do in TC offloads and VxLAN/UDP
> ports? The driver which lost config can ask for the rss contexts to be
> "replayed" and the core will issue a series of ->create calls for all
> existing entries?
So I tried to prototype this, and unfortunately I ran into a problem.
While we can replay the contexts alright, that still leaves the ntuple
 filters which we also want to restore, and which might depend on the
 contexts, so that can't be done until after context restore is done.
So to do this we'd need to *also* have the core replay the filters,
 which would mean adding a filter array to the core similar to this
 context array.  Now that's a thing that might be useful to have,
 enabling netlink dumps and so on, but it would considerably extend
 the scope of this work, in which case who knows if it'll ever be
 ready to merge :S
Moreover, at least in the case of sfc (as usual, no idea about other
 NICs), the filter table on the device contains more than just ntuple
 filters; stuff like the device's unicast address list, PTP filters
 and representor filters, some of which are required for correct
 operation, live in the same table.
When coming up after a reset, currently we:
1) restore RSS contexts
2) restore all filters (both driver-internal and ethtool ntuple) from
   the software shadow filter table into the hardware
3) bring up the NIC datapath.
Instead we would need to:
1) restore all the 'internal' filters (which do not, and after these
   changes could not ever, use custom RSS contexts), and discard all
   ntuple filters from the software shadow filter table in the driver
2) request RSS+ntuple replay
3) bring up the NIC datapath
4) the replay workitem runs, and reinserts RSS contexts and ntuple
   filters.
This would also mean that the default RSS context, which is used by
 the unicast/multicast address and Ethernet broadcast filters, could
 not ever migrate to be tracked in the XArray (otherwise a desirable
 simplification).

tl;dr: none of this is impossible, but it'd be a lot of work just to
 get rid of one mutex, and would paint us into a bit of a corner.
So I propose to stick with the locking scheme for now; I'll post v5
 with the other review comments addressed; and if at some point in
 the future core tracking of ntuple filters gets added, we can
 revisit then whether moving to a replay scheme is viable.  Okay?

-ed
Jakub Kicinski Dec. 7, 2023, 4:45 p.m. UTC | #7
On Thu, 7 Dec 2023 14:15:30 +0000 Edward Cree wrote:
> tl;dr: none of this is impossible, but it'd be a lot of work just to
>  get rid of one mutex, and would paint us into a bit of a corner.
> So I propose to stick with the locking scheme for now; I'll post v5
>  with the other review comments addressed; and if at some point in
>  the future core tracking of ntuple filters gets added, we can
>  revisit then whether moving to a replay scheme is viable.  Okay?

Sounds good, thanks for investigating.
diff mbox series

Patch

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index c8963bde9289..d15a21bd6f12 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -1026,11 +1026,14 @@  int ethtool_virtdev_set_link_ksettings(struct net_device *dev,
 /**
  * struct ethtool_netdev_state - per-netdevice state for ethtool features
  * @rss_ctx:		XArray of custom RSS contexts
+ * @rss_lock:		Protects entries in @rss_ctx.  May be taken from
+ *			within RTNL.
  * @rss_ctx_max_id:	maximum (exclusive) supported RSS context ID
  * @wol_enabled:	Wake-on-LAN is enabled
  */
 struct ethtool_netdev_state {
 	struct xarray		rss_ctx;
+	struct mutex		rss_lock;
 	u32			rss_ctx_max_id;
 	u32			wol_enabled:1;
 };
diff --git a/net/core/dev.c b/net/core/dev.c
index 69579d9cd7ba..c57456ed4be8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -10074,6 +10074,7 @@  int register_netdevice(struct net_device *dev)
 
 	/* rss ctx ID 0 is reserved for the default context, start from 1 */
 	xa_init_flags(&dev->ethtool->rss_ctx, XA_FLAGS_ALLOC1);
+	mutex_init(&dev->ethtool->rss_lock);
 
 	spin_lock_init(&dev->addr_list_lock);
 	netdev_set_addr_lockdep_class(dev);
@@ -10882,6 +10883,7 @@  static void netdev_rss_contexts_free(struct net_device *dev)
 	struct ethtool_rxfh_context *ctx;
 	unsigned long context;
 
+	mutex_lock(&dev->ethtool->rss_lock);
 	if (dev->ethtool_ops->create_rxfh_context ||
 	    dev->ethtool_ops->set_rxfh_context)
 		xa_for_each(&dev->ethtool->rss_ctx, context, ctx) {
@@ -10903,6 +10905,7 @@  static void netdev_rss_contexts_free(struct net_device *dev)
 			kfree(ctx);
 		}
 	xa_destroy(&dev->ethtool->rss_ctx);
+	mutex_unlock(&dev->ethtool->rss_lock);
 }
 
 /**
@@ -11016,6 +11019,8 @@  void unregister_netdevice_many_notify(struct list_head *head,
 		if (dev->netdev_ops->ndo_uninit)
 			dev->netdev_ops->ndo_uninit(dev);
 
+		mutex_destroy(&dev->ethtool->rss_lock);
+
 		if (skb)
 			rtmsg_ifinfo_send(skb, dev, GFP_KERNEL, portid, nlh);
 
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 3920ddee3ee2..d21bbc92e6fc 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -1258,6 +1258,7 @@  static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
 	u8 *rss_config;
 	u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]);
 	bool create = false, delete = false;
+	bool locked = false; /* dev->ethtool->rss_lock taken */
 
 	if (!ops->get_rxnfc || !ops->set_rxfh)
 		return -EOPNOTSUPP;
@@ -1335,6 +1336,10 @@  static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
 		}
 	}
 
+	if (rxfh.rss_context) {
+		mutex_lock(&dev->ethtool->rss_lock);
+		locked = true;
+	}
 	if (create) {
 		if (delete) {
 			ret = -EINVAL;
@@ -1455,6 +1460,8 @@  static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
 	}
 
 out:
+	if (locked)
+		mutex_unlock(&dev->ethtool->rss_lock);
 	kfree(rss_config);
 	return ret;
 }