diff mbox series

[net-next,02/11] net: add helpers for lookup and walking netdevs under netdev_lock()

Message ID 20250114035118.110297-3-kuba@kernel.org (mailing list archive)
State New
Delegated to: Netdev Maintainers
Headers show
Series net: use netdev->lock to protect NAPI | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 1 this patch: 1
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers success CCed 6 of 6 maintainers
netdev/build_clang success Errors and warnings before: 56 this patch: 56
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api warning Found: 'dev_hold(' was: 0 now: 2; 'dev_put(' was: 0 now: 2
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 41 this patch: 41
netdev/checkpatch fail CHECK: Macro argument 'ifindex' may be better as '(ifindex)' to avoid precedence issues CHECK: Macro argument 'var_name' may be better as '(var_name)' to avoid precedence issues ERROR: trailing statements should be on next line
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 79 this patch: 79
netdev/source_inline success Was 0 now: 0

Commit Message

Jakub Kicinski Jan. 14, 2025, 3:51 a.m. UTC
Add helpers for accessing netdevs under netdev_lock().
There's some careful handling needed to find the device and lock it
safely, without it getting unregistered, and without taking rtnl_lock
(the latter being the whole point of the new locking, after all).

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/core/dev.h |  16 +++++++
 net/core/dev.c | 110 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 126 insertions(+)

Comments

Eric Dumazet Jan. 14, 2025, 1:03 p.m. UTC | #1
On Tue, Jan 14, 2025 at 4:51 AM Jakub Kicinski <kuba@kernel.org> wrote:
>
> Add helpers for accessing netdevs under netdev_lock().
> There's some careful handling needed to find the device and lock it
> safely, without it getting unregistered, and without taking rtnl_lock
> (the latter being the whole point of the new locking, after all).
>
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
> ---
>  net/core/dev.h |  16 +++++++
>  net/core/dev.c | 110 +++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 126 insertions(+)
>
> diff --git a/net/core/dev.h b/net/core/dev.h
> index d8966847794c..25ae732c0775 100644
> --- a/net/core/dev.h
> +++ b/net/core/dev.h
> @@ -2,6 +2,7 @@
>  #ifndef _NET_CORE_DEV_H
>  #define _NET_CORE_DEV_H
>
> +#include <linux/cleanup.h>
>  #include <linux/types.h>
>  #include <linux/rwsem.h>
>  #include <linux/netdevice.h>
> @@ -23,8 +24,23 @@ struct sd_flow_limit {
>  extern int netdev_flow_limit_table_len;
>
>  struct napi_struct *netdev_napi_by_id(struct net *net, unsigned int napi_id);
> +struct napi_struct *
> +netdev_napi_by_id_lock(struct net *net, unsigned int napi_id);
>  struct net_device *dev_get_by_napi_id(unsigned int napi_id);
>
> +struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex);
> +struct net_device *__netdev_put_lock(struct net_device *dev);
> +struct net_device *
> +netdev_xa_find_lock(struct net *net, struct net_device *dev,
> +                   unsigned long *index);
> +
> +DEFINE_FREE(netdev_unlock, struct net_device *, if (_T) netdev_unlock(_T));
> +
> +#define for_each_netdev_lock_scoped(net, var_name, ifindex)            \
> +       for (struct net_device *var_name __free(netdev_unlock) = NULL;  \
> +            (var_name = netdev_xa_find_lock(net, var_name, &ifindex)); \
> +            ifindex++)
> +
>  #ifdef CONFIG_PROC_FS
>  int __init dev_proc_init(void);
>  #else
> diff --git a/net/core/dev.c b/net/core/dev.c
> index fda4e1039bf0..5c1e71afbe1c 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -783,6 +783,49 @@ struct napi_struct *netdev_napi_by_id(struct net *net, unsigned int napi_id)
>         return napi;
>  }
>
> +/**
> + *     netdev_napi_by_id_lock() - find a device by NAPI ID and lock it
> + *     @net: the applicable net namespace
> + *     @napi_id: ID of a NAPI of a target device
> + *
> + *     Find a NAPI instance with @napi_id. Lock its device.
> + *     The device must be in %NETREG_REGISTERED state for lookup to succeed.
> + *     netdev_unlock() must be called to release it.
> + *
> + *     Return: pointer to NAPI, its device with lock held, NULL if not found.
> + */
> +struct napi_struct *
> +netdev_napi_by_id_lock(struct net *net, unsigned int napi_id)
> +{
> +       struct napi_struct *napi;
> +       struct net_device *dev;
> +
> +       rcu_read_lock();
> +       napi = netdev_napi_by_id(net, napi_id);
> +       if (!napi || napi->dev->reg_state != NETREG_REGISTERED) {

nit: this should be READ_ONCE(napi->dev->reg_state) != NETREG_REGISTERED

> +               rcu_read_unlock();
> +               return NULL;
> +       }
> +
> +       dev = napi->dev;
> +       dev_hold(dev);
> +       rcu_read_unlock();
> +
> +       dev = __netdev_put_lock(dev);
> +       if (!dev)
> +               return NULL;
> +
> +       rcu_read_lock();
> +       napi = netdev_napi_by_id(net, napi_id);
> +       if (napi && napi->dev != dev)
> +               napi = NULL;
> +       rcu_read_unlock();
> +
> +       if (!napi)
> +               netdev_unlock(dev);
> +       return napi;
> +}
> +
>  /**
>   *     __dev_get_by_name       - find a device by its name
>   *     @net: the applicable net namespace
> @@ -971,6 +1014,73 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id)
>         return napi ? napi->dev : NULL;
>  }
>
> +/* Release the held reference on the net_device, and if the net_device
> + * is still registered try to lock the instance lock. If device is being
> + * unregistered NULL will be returned (but the reference has been released,
> + * either way!)
> + *
> + * This helper is intended for locking net_device after it has been looked up
> + * using a lockless lookup helper. Lock prevents the instance from going away.
> + */
> +struct net_device *__netdev_put_lock(struct net_device *dev)
> +{
> +       netdev_lock(dev);
> +       if (dev->reg_state > NETREG_REGISTERED) {

I presume the netdev lock will be held at some point in
netdev_run_todo and/or unregister_netdevice_many_notify
so no need for a READ_ONCE() here.

Reviewed-by: Eric Dumazet <edumazet@google.com>
Jakub Kicinski Jan. 14, 2025, 2:45 p.m. UTC | #2
On Tue, 14 Jan 2025 14:03:35 +0100 Eric Dumazet wrote:
> > +struct net_device *__netdev_put_lock(struct net_device *dev)
> > +{
> > +       netdev_lock(dev);
> > +       if (dev->reg_state > NETREG_REGISTERED) {  
> 
> I presume the netdev lock will be held at some point in
> netdev_run_todo and/or unregister_netdevice_many_notify
> so no need for a READ_ONCE() here.

Yes, the only unprotected write is in free_netdev(), but we're holding
a reference here so if we get to free there's a bug somewhere else.
I'll reorder patches 2 and 3.
Joe Damato Jan. 14, 2025, 10:53 p.m. UTC | #3
On Mon, Jan 13, 2025 at 07:51:08PM -0800, Jakub Kicinski wrote:
> Add helpers for accessing netdevs under netdev_lock().
> There's some careful handling needed to find the device and lock it
> safely, without it getting unregistered, and without taking rtnl_lock
> (the latter being the whole point of the new locking, after all).
> 
> Signed-off-by: Jakub Kicinski <kuba@kernel.org>
> ---
>  net/core/dev.h |  16 +++++++
>  net/core/dev.c | 110 +++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 126 insertions(+)

[...]

> diff --git a/net/core/dev.c b/net/core/dev.c
> index fda4e1039bf0..5c1e71afbe1c 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -783,6 +783,49 @@ struct napi_struct *netdev_napi_by_id(struct net *net, unsigned int napi_id)
>  	return napi;
>  }

[...]

> +
> +struct net_device *
> +netdev_xa_find_lock(struct net *net, struct net_device *dev,
> +		    unsigned long *index)

Minor nit, the other added helper functions have docs, but (unless I
missed it somewhere) this one doesn't. Maybe worthwhile to add
docs if sending a v2, but probably not worth a re-roll just for
this.
diff mbox series

Patch

diff --git a/net/core/dev.h b/net/core/dev.h
index d8966847794c..25ae732c0775 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -2,6 +2,7 @@ 
 #ifndef _NET_CORE_DEV_H
 #define _NET_CORE_DEV_H
 
+#include <linux/cleanup.h>
 #include <linux/types.h>
 #include <linux/rwsem.h>
 #include <linux/netdevice.h>
@@ -23,8 +24,23 @@  struct sd_flow_limit {
 extern int netdev_flow_limit_table_len;
 
 struct napi_struct *netdev_napi_by_id(struct net *net, unsigned int napi_id);
+struct napi_struct *
+netdev_napi_by_id_lock(struct net *net, unsigned int napi_id);
 struct net_device *dev_get_by_napi_id(unsigned int napi_id);
 
+struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex);
+struct net_device *__netdev_put_lock(struct net_device *dev);
+struct net_device *
+netdev_xa_find_lock(struct net *net, struct net_device *dev,
+		    unsigned long *index);
+
+DEFINE_FREE(netdev_unlock, struct net_device *, if (_T) netdev_unlock(_T));
+
+#define for_each_netdev_lock_scoped(net, var_name, ifindex)		\
+	for (struct net_device *var_name __free(netdev_unlock) = NULL;	\
+	     (var_name = netdev_xa_find_lock(net, var_name, &ifindex)); \
+	     ifindex++)
+
 #ifdef CONFIG_PROC_FS
 int __init dev_proc_init(void);
 #else
diff --git a/net/core/dev.c b/net/core/dev.c
index fda4e1039bf0..5c1e71afbe1c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -783,6 +783,49 @@  struct napi_struct *netdev_napi_by_id(struct net *net, unsigned int napi_id)
 	return napi;
 }
 
+/**
+ *	netdev_napi_by_id_lock() - find a device by NAPI ID and lock it
+ *	@net: the applicable net namespace
+ *	@napi_id: ID of a NAPI of a target device
+ *
+ *	Find a NAPI instance with @napi_id. Lock its device.
+ *	The device must be in %NETREG_REGISTERED state for lookup to succeed.
+ *	netdev_unlock() must be called to release it.
+ *
+ *	Return: pointer to NAPI, its device with lock held, NULL if not found.
+ */
+struct napi_struct *
+netdev_napi_by_id_lock(struct net *net, unsigned int napi_id)
+{
+	struct napi_struct *napi;
+	struct net_device *dev;
+
+	rcu_read_lock();
+	napi = netdev_napi_by_id(net, napi_id);
+	if (!napi || napi->dev->reg_state != NETREG_REGISTERED) {
+		rcu_read_unlock();
+		return NULL;
+	}
+
+	dev = napi->dev;
+	dev_hold(dev);
+	rcu_read_unlock();
+
+	dev = __netdev_put_lock(dev);
+	if (!dev)
+		return NULL;
+
+	rcu_read_lock();
+	napi = netdev_napi_by_id(net, napi_id);
+	if (napi && napi->dev != dev)
+		napi = NULL;
+	rcu_read_unlock();
+
+	if (!napi)
+		netdev_unlock(dev);
+	return napi;
+}
+
 /**
  *	__dev_get_by_name	- find a device by its name
  *	@net: the applicable net namespace
@@ -971,6 +1014,73 @@  struct net_device *dev_get_by_napi_id(unsigned int napi_id)
 	return napi ? napi->dev : NULL;
 }
 
+/* Release the held reference on the net_device, and if the net_device
+ * is still registered try to lock the instance lock. If device is being
+ * unregistered NULL will be returned (but the reference has been released,
+ * either way!)
+ *
+ * This helper is intended for locking net_device after it has been looked up
+ * using a lockless lookup helper. Lock prevents the instance from going away.
+ */
+struct net_device *__netdev_put_lock(struct net_device *dev)
+{
+	netdev_lock(dev);
+	if (dev->reg_state > NETREG_REGISTERED) {
+		netdev_unlock(dev);
+		dev_put(dev);
+		return NULL;
+	}
+	dev_put(dev);
+	return dev;
+}
+
+/**
+ *	netdev_get_by_index_lock() - find a device by its ifindex
+ *	@net: the applicable net namespace
+ *	@ifindex: index of device
+ *
+ *	Search for an interface by index. If a valid device
+ *	with @ifindex is found it will be returned with netdev->lock held.
+ *	netdev_unlock() must be called to release it.
+ *
+ *	Return: pointer to a device with lock held, NULL if not found.
+ */
+struct net_device *netdev_get_by_index_lock(struct net *net, int ifindex)
+{
+	struct net_device *dev;
+
+	dev = dev_get_by_index(net, ifindex);
+	if (!dev)
+		return NULL;
+
+	return __netdev_put_lock(dev);
+}
+
+struct net_device *
+netdev_xa_find_lock(struct net *net, struct net_device *dev,
+		    unsigned long *index)
+{
+	if (dev)
+		netdev_unlock(dev);
+
+	do {
+		rcu_read_lock();
+		dev = xa_find(&net->dev_by_index, index, ULONG_MAX, XA_PRESENT);
+		if (!dev) {
+			rcu_read_unlock();
+			return NULL;
+		}
+		dev_hold(dev);
+		rcu_read_unlock();
+
+		dev = __netdev_put_lock(dev);
+		if (dev)
+			return dev;
+
+		(*index)++;
+	} while (true);
+}
+
 static DEFINE_SEQLOCK(netdev_rename_lock);
 
 void netdev_copy_name(struct net_device *dev, char *name)