diff mbox series

[net-next,2/2] Create netdev->neighbour association

Message ID 20241001050959.1799151-3-gnaaman@drivenets.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series Improve neigh_flush_dev performance | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next, async
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 46 this patch: 46
netdev/build_tools success Errors and warnings before: 0 this patch: 0
netdev/cc_maintainers warning 2 maintainers not CCed: corbet@lwn.net linux-doc@vger.kernel.org
netdev/build_clang success Errors and warnings before: 87 this patch: 87
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn fail Errors and warnings before: 12 this patch: 12
netdev/checkpatch warning WARNING: added, moved or deleted file(s), does MAINTAINERS need updating? WARNING: line length of 88 exceeds 80 columns WARNING: line length of 91 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc fail Errors and warnings before: 22 this patch: 23
netdev/source_inline success Was 0 now: 0

Commit Message

Gilad Naaman Oct. 1, 2024, 5:09 a.m. UTC
Create a mapping between a netdev and its neighoburs,
allowing for much cheaper flushes.

Signed-off-by: Gilad Naaman <gnaaman@drivenets.com>
---
 .../networking/net_cachelines/net_device.rst  |   1 +
 include/linux/netdevice.h                     |   3 +
 include/net/neighbour.h                       |  10 +-
 include/net/neighbour_tables.h                |  13 +++
 net/core/neighbour.c                          | 100 +++++++++++++-----
 5 files changed, 94 insertions(+), 33 deletions(-)
 create mode 100644 include/net/neighbour_tables.h

Comments

Simon Horman Oct. 3, 2024, 11:26 a.m. UTC | #1
On Tue, Oct 01, 2024 at 05:09:57AM +0000, Gilad Naaman wrote:
> Create a mapping between a netdev and its neighoburs,
> allowing for much cheaper flushes.
> 
> Signed-off-by: Gilad Naaman <gnaaman@drivenets.com>

Hi Gilad,

As per my comment on patch 1/2, This is not a full review, but rather some
feedback to take into account once a proper review arrives.

...

> diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst

...

> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h

...

> @@ -2399,6 +2400,8 @@ struct net_device {
>  	/** @irq_moder: dim parameters used if IS_ENABLED(CONFIG_DIMLIB). */
>  	struct dim_irq_moder	*irq_moder;
>  
> +	struct hlist_head neighbours[NEIGH_NR_TABLES];
> +

Please add an entry for neighbours in the Kernel doc for this
structure, which is immediately above it.

This is flagged by ./scripts/kernel-doc -none, and W=1 allmodconfig builds.

>  	u8			priv[] ____cacheline_aligned
>  				       __counted_by(priv_len);
>  } ____cacheline_aligned;

...
diff mbox series

Patch

diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst
index 22b07c814f4a..510c407d7268 100644
--- a/Documentation/networking/net_cachelines/net_device.rst
+++ b/Documentation/networking/net_cachelines/net_device.rst
@@ -183,3 +183,4 @@  struct_devlink_port*                devlink_port
 struct_dpll_pin*                    dpll_pin                                                        
 struct hlist_head                   page_pools
 struct dim_irq_moder*               irq_moder
+struct hlist_head                   neighbours[3]
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e87b5e488325..7b24a792280c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -52,6 +52,7 @@ 
 #include <net/net_trackers.h>
 #include <net/net_debug.h>
 #include <net/dropreason-core.h>
+#include <net/neighbour_tables.h>
 
 struct netpoll_info;
 struct device;
@@ -2399,6 +2400,8 @@  struct net_device {
 	/** @irq_moder: dim parameters used if IS_ENABLED(CONFIG_DIMLIB). */
 	struct dim_irq_moder	*irq_moder;
 
+	struct hlist_head neighbours[NEIGH_NR_TABLES];
+
 	u8			priv[] ____cacheline_aligned
 				       __counted_by(priv_len);
 } ____cacheline_aligned;
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 77a4aa53aecb..580c2d00e4d5 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -29,6 +29,7 @@ 
 #include <linux/sysctl.h>
 #include <linux/workqueue.h>
 #include <net/rtnetlink.h>
+#include <net/neighbour_tables.h>
 
 /*
  * NUD stands for "neighbor unreachability detection"
@@ -136,6 +137,7 @@  struct neigh_statistics {
 
 struct neighbour {
 	struct hlist_node __rcu list;
+	struct hlist_node __rcu dev_list;
 	struct neigh_table	*tbl;
 	struct neigh_parms	*parms;
 	unsigned long		confirmed;
@@ -236,14 +238,6 @@  struct neigh_table {
 	struct pneigh_entry	**phash_buckets;
 };
 
-enum {
-	NEIGH_ARP_TABLE = 0,
-	NEIGH_ND_TABLE = 1,
-	NEIGH_DN_TABLE = 2,
-	NEIGH_NR_TABLES,
-	NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */
-};
-
 static inline int neigh_parms_family(struct neigh_parms *p)
 {
 	return p->tbl->family;
diff --git a/include/net/neighbour_tables.h b/include/net/neighbour_tables.h
new file mode 100644
index 000000000000..ad98b49d58db
--- /dev/null
+++ b/include/net/neighbour_tables.h
@@ -0,0 +1,13 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _NET_NEIGHBOUR_TABLES_H
+#define _NET_NEIGHBOUR_TABLES_H
+
+enum {
+	NEIGH_ARP_TABLE = 0,
+	NEIGH_ND_TABLE = 1,
+	NEIGH_DN_TABLE = 2,
+	NEIGH_NR_TABLES,
+	NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */
+};
+
+#endif
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 5b48ed1fdcf0..f3a9a220b343 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -62,6 +62,20 @@  static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
 static const struct seq_operations neigh_stat_seq_ops;
 #endif
 
+static int family_to_neightbl_index(int family)
+{
+	switch (family) {
+	case AF_INET:
+		return NEIGH_ARP_TABLE;
+	case AF_INET6:
+		return NEIGH_ND_TABLE;
+	case AF_DECnet:
+		return NEIGH_DN_TABLE;
+	default:
+		return -1;
+	}
+}
+
 /*
    Neighbour hash table buckets are protected with rwlock tbl->lock.
 
@@ -213,6 +227,7 @@  static bool neigh_del(struct neighbour *n, struct neigh_table *tbl)
 	write_lock(&n->lock);
 	if (refcount_read(&n->refcnt) == 1) {
 		hlist_del_rcu(&n->list);
+		hlist_del_rcu(&n->dev_list);
 		neigh_mark_dead(n);
 		retval = true;
 	}
@@ -355,12 +370,63 @@  static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net,
 	}
 }
 
+static void _neigh_flush_free_neigh(struct neighbour *n)
+{
+	hlist_del_rcu(&n->list);
+	hlist_del_rcu(&n->dev_list);
+	write_lock(&n->lock);
+	neigh_del_timer(n);
+	neigh_mark_dead(n);
+	if (refcount_read(&n->refcnt) != 1) {
+		/* The most unpleasant situation.
+		 * We must destroy neighbour entry,
+		 * but someone still uses it.
+		 *
+		 * The destroy will be delayed until
+		 * the last user releases us, but
+		 * we must kill timers etc. and move
+		 * it to safe state.
+		 */
+		__skb_queue_purge(&n->arp_queue);
+		n->arp_queue_len_bytes = 0;
+		WRITE_ONCE(n->output, neigh_blackhole);
+		if (n->nud_state & NUD_VALID)
+			n->nud_state = NUD_NOARP;
+		else
+			n->nud_state = NUD_NONE;
+		neigh_dbg(2, "neigh %p is stray\n", n);
+	}
+	write_unlock(&n->lock);
+	neigh_cleanup_and_release(n);
+}
+
+static void neigh_flush_dev_fast(struct neigh_table *tbl, struct hlist_node __rcu *next,
+				 bool skip_perm)
+{
+	struct neighbour *n;
+
+	while (next) {
+		n = container_of(next, struct neighbour, dev_list);
+		next = hlist_next_rcu(next);
+		if (skip_perm && n->nud_state & NUD_PERMANENT)
+			continue;
+
+		_neigh_flush_free_neigh(n);
+	}
+}
+
 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
 			    bool skip_perm)
 {
 	int i;
 	struct neigh_hash_table *nht;
 
+	i = family_to_neightbl_index(tbl->family);
+	if (i != -1) {
+		neigh_flush_dev_fast(tbl, hlist_first_rcu(&dev->neighbours[i]), skip_perm);
+		return;
+	}
+
 	nht = rcu_dereference_protected(tbl->nht,
 					lockdep_is_held(&tbl->lock));
 
@@ -379,31 +445,8 @@  static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
 				np = (struct neighbour __rcu **)&n->list.next;
 				continue;
 			}
-			hlist_del_rcu(&n->list);
-			write_lock(&n->lock);
-			neigh_del_timer(n);
-			neigh_mark_dead(n);
-			if (refcount_read(&n->refcnt) != 1) {
-				/* The most unpleasant situation.
-				   We must destroy neighbour entry,
-				   but someone still uses it.
-
-				   The destroy will be delayed until
-				   the last user releases us, but
-				   we must kill timers etc. and move
-				   it to safe state.
-				 */
-				__skb_queue_purge(&n->arp_queue);
-				n->arp_queue_len_bytes = 0;
-				WRITE_ONCE(n->output, neigh_blackhole);
-				if (n->nud_state & NUD_VALID)
-					n->nud_state = NUD_NOARP;
-				else
-					n->nud_state = NUD_NONE;
-				neigh_dbg(2, "neigh %p is stray\n", n);
-			}
-			write_unlock(&n->lock);
-			neigh_cleanup_and_release(n);
+
+			_neigh_flush_free_neigh(n);
 		}
 	}
 }
@@ -686,6 +729,11 @@  ___neigh_create(struct neigh_table *tbl, const void *pkey,
 	if (want_ref)
 		neigh_hold(n);
 	hlist_add_head_rcu(&n->list, &nht->hash_buckets[hash_val]);
+
+	error = family_to_neightbl_index(tbl->family);
+	if (error != -1)
+		hlist_add_head_rcu(&n->dev_list, &dev->neighbours[error]);
+
 	write_unlock_bh(&tbl->lock);
 	neigh_dbg(2, "neigh %p is created\n", n);
 	rc = n;
@@ -969,6 +1017,7 @@  static void neigh_periodic_work(struct work_struct *work)
 			     !time_in_range_open(jiffies, n->used,
 						 n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
 				hlist_del_rcu(&n->list);
+				hlist_del_rcu(&n->dev_list);
 				neigh_mark_dead(n);
 				write_unlock(&n->lock);
 				neigh_cleanup_and_release(n);
@@ -3092,6 +3141,7 @@  void __neigh_for_each_release(struct neigh_table *tbl,
 			release = cb(n);
 			if (release) {
 				hlist_del_rcu(&n->list);
+				hlist_del_rcu(&n->dev_list);
 				neigh_mark_dead(n);
 			} else
 				np = (struct neighbour __rcu **)&n->list.next;