diff mbox series

[net-next,v4,6/6] Create netdev->neighbour association

Message ID 20241015165929.3203216-7-gnaaman@drivenets.com (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series Improve neigh_flush_dev performance | expand

Checks

Context Check Description
netdev/tree_selection success Clearly marked for net-next, async
netdev/apply fail Patch does not apply to net-next-1

Commit Message

Gilad Naaman Oct. 15, 2024, 4:59 p.m. UTC
Create a mapping between a netdev and its neighoburs,
allowing for much cheaper flushes.

Signed-off-by: Gilad Naaman <gnaaman@drivenets.com>
---
 .../networking/net_cachelines/net_device.rst  |  1 +
 include/linux/netdevice.h                     |  7 ++
 include/net/neighbour.h                       | 10 +--
 include/net/neighbour_tables.h                | 12 +++
 net/core/neighbour.c                          | 85 ++++++++++++-------
 net/mpls/af_mpls.c                            |  2 +-
 6 files changed, 75 insertions(+), 42 deletions(-)
 create mode 100644 include/net/neighbour_tables.h

Comments

Kuniyuki Iwashima Oct. 15, 2024, 11:54 p.m. UTC | #1
From: Gilad Naaman <gnaaman@drivenets.com>
Date: Tue, 15 Oct 2024 16:59:26 +0000
> diff --git a/net/core/neighbour.c b/net/core/neighbour.c
> index 61b5f0d4896a..dbfd27f79bb8 100644
> --- a/net/core/neighbour.c
> +++ b/net/core/neighbour.c
> @@ -61,6 +61,19 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
>  static const struct seq_operations neigh_stat_seq_ops;
>  #endif
>  
> +static int family_to_neightbl_index(int family)
> +{

Let's return hlist_head * like this:

static hlist_head *neigh_get_dev_table(struct net_device *dev, int family)
{
	int i;

	switch (family) {
	case default:
		DEBUG_NET_WARN_ON_ONCE(1);
		fallthrough; /* to avoid panic by null-ptr-deref */
	case AF_INET:
		i = NEIGH_ARP_TABLE;
		break;
	case AF_INET6:
		i = NEIGH_ND_TABLE;
		break;
	}

	return &dev->neighbours[i];
}


>  @@ -357,46 +371,45 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
>  			    bool skip_perm)
>  {
>  	int i;
> +	struct neighbour *n;
> +	struct hlist_node *tmp;

nit: reverse xmas tree order.

and cache hlist_head * from neigh_get_dev_table().

>  	struct neigh_hash_table *nht;

nht is no longer used ?


>  
> +	i = family_to_neightbl_index(tbl->family);
> +
>  	nht = rcu_dereference_protected(tbl->nht,
>  					lockdep_is_held(&tbl->lock));
>  
> -	for (i = 0; i < (1 << nht->hash_shift); i++) {
> -		struct neighbour *n;
> -
> -		neigh_for_each(n, &nht->hash_heads[i]) {
> -			if (dev && n->dev != dev)
> -				continue;
> -			if (skip_perm && n->nud_state & NUD_PERMANENT)
> -				continue;
> +	hlist_for_each_entry_safe(n, tmp, &dev->neighbours[i], dev_list) {
> +		if (skip_perm && n->nud_state & NUD_PERMANENT)
> +			continue;
>  
> -			hlist_del_rcu(&n->hash);
> -			write_lock(&n->lock);
> -			neigh_del_timer(n);
> -			neigh_mark_dead(n);
> -			if (refcount_read(&n->refcnt) != 1) {
> -				/* The most unpleasant situation.
> -				   We must destroy neighbour entry,
> -				   but someone still uses it.
> -
> -				   The destroy will be delayed until
> -				   the last user releases us, but
> -				   we must kill timers etc. and move
> -				   it to safe state.
> -				 */
> -				__skb_queue_purge(&n->arp_queue);
> -				n->arp_queue_len_bytes = 0;
> -				WRITE_ONCE(n->output, neigh_blackhole);
> -				if (n->nud_state & NUD_VALID)
> -					n->nud_state = NUD_NOARP;
> -				else
> -					n->nud_state = NUD_NONE;
> -				neigh_dbg(2, "neigh %p is stray\n", n);
> -			}
> -			write_unlock(&n->lock);
> -			neigh_cleanup_and_release(n);
> +		hlist_del_rcu(&n->hash);
> +		hlist_del_rcu(&n->dev_list);
> +		write_lock(&n->lock);
> +		neigh_del_timer(n);
> +		neigh_mark_dead(n);
> +		if (refcount_read(&n->refcnt) != 1) {
> +			/* The most unpleasant situation.
> +			 * We must destroy neighbour entry,
> +			 * but someone still uses it.
> +			 *
> +			 * The destroy will be delayed until
> +			 * the last user releases us, but
> +			 * we must kill timers etc. and move
> +			 * it to safe state.
> +			 */
> +			__skb_queue_purge(&n->arp_queue);
> +			n->arp_queue_len_bytes = 0;
> +			WRITE_ONCE(n->output, neigh_blackhole);
> +			if (n->nud_state & NUD_VALID)
> +				n->nud_state = NUD_NOARP;
> +			else
> +				n->nud_state = NUD_NONE;
> +			neigh_dbg(2, "neigh %p is stray\n", n);
>  		}
> +		write_unlock(&n->lock);
> +		neigh_cleanup_and_release(n);
>  	}
>  }
>  
> @@ -672,6 +685,10 @@ ___neigh_create(struct neigh_table *tbl, const void *pkey,
>  	if (want_ref)
>  		neigh_hold(n);
>  	hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]);
> +
> +	error = family_to_neightbl_index(tbl->family);
> +	hlist_add_head_rcu(&n->dev_list, &dev->neighbours[error]);

Let's use neigh_dev_get_table() directly.

> +
>  	write_unlock_bh(&tbl->lock);
>  	neigh_dbg(2, "neigh %p is created\n", n);
>  	rc = n;
diff mbox series

Patch

diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst
index db6192b2bb50..2edb6ac1cab4 100644
--- a/Documentation/networking/net_cachelines/net_device.rst
+++ b/Documentation/networking/net_cachelines/net_device.rst
@@ -189,4 +189,5 @@  u64                                 max_pacing_offload_horizon
 struct_napi_config*                 napi_config
 unsigned_long                       gro_flush_timeout
 u32                                 napi_defer_hard_irqs
+struct hlist_head                   neighbours[2]
 =================================== =========================== =================== =================== ===================================================================================
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 8feaca12655e..80bde95cc302 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -52,6 +52,7 @@ 
 #include <net/net_trackers.h>
 #include <net/net_debug.h>
 #include <net/dropreason-core.h>
+#include <net/neighbour_tables.h>
 
 struct netpoll_info;
 struct device;
@@ -2034,6 +2035,9 @@  enum netdev_reg_state {
  *	@napi_defer_hard_irqs:	If not zero, provides a counter that would
  *				allow to avoid NIC hard IRQ, on busy queues.
  *
+ *	@neighbours:	List heads pointing to this device's neighbours'
+ *			dev_list, one per address-family.
+ *
  *	FIXME: cleanup struct net_device such that network protocol info
  *	moves out.
  */
@@ -2443,6 +2447,9 @@  struct net_device {
 	 */
 	struct net_shaper_hierarchy *net_shaper_hierarchy;
 #endif
+
+	struct hlist_head neighbours[NEIGH_NR_TABLES];
+
 	u8			priv[] ____cacheline_aligned
 				       __counted_by(priv_len);
 } ____cacheline_aligned;
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 21c0c20a0ed5..d0bc618d4fe2 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -29,6 +29,7 @@ 
 #include <linux/sysctl.h>
 #include <linux/workqueue.h>
 #include <net/rtnetlink.h>
+#include <net/neighbour_tables.h>
 
 /*
  * NUD stands for "neighbor unreachability detection"
@@ -136,6 +137,7 @@  struct neigh_statistics {
 
 struct neighbour {
 	struct hlist_node	hash;
+	struct hlist_node	dev_list;
 	struct neigh_table	*tbl;
 	struct neigh_parms	*parms;
 	unsigned long		confirmed;
@@ -236,14 +238,6 @@  struct neigh_table {
 	struct pneigh_entry	**phash_buckets;
 };
 
-enum {
-	NEIGH_ARP_TABLE = 0,
-	NEIGH_ND_TABLE = 1,
-	NEIGH_DN_TABLE = 2,
-	NEIGH_NR_TABLES,
-	NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */
-};
-
 static inline int neigh_parms_family(struct neigh_parms *p)
 {
 	return p->tbl->family;
diff --git a/include/net/neighbour_tables.h b/include/net/neighbour_tables.h
new file mode 100644
index 000000000000..bcffbe8f7601
--- /dev/null
+++ b/include/net/neighbour_tables.h
@@ -0,0 +1,12 @@ 
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _NET_NEIGHBOUR_TABLES_H
+#define _NET_NEIGHBOUR_TABLES_H
+
+enum {
+	NEIGH_ARP_TABLE = 0,
+	NEIGH_ND_TABLE = 1,
+	NEIGH_NR_TABLES,
+	NEIGH_LINK_TABLE = NEIGH_NR_TABLES /* Pseudo table for neigh_xmit */
+};
+
+#endif
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 61b5f0d4896a..dbfd27f79bb8 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -61,6 +61,19 @@  static int pneigh_ifdown_and_unlock(struct neigh_table *tbl,
 static const struct seq_operations neigh_stat_seq_ops;
 #endif
 
+static int family_to_neightbl_index(int family)
+{
+	switch (family) {
+	case AF_INET:
+		return NEIGH_ARP_TABLE;
+	case AF_INET6:
+		return NEIGH_ND_TABLE;
+	default:
+		DEBUG_NET_WARN_ON_ONCE(1);
+		return 0; /* to avoid panic by null-ptr-deref */
+	}
+}
+
 /*
    Neighbour hash table buckets are protected with rwlock tbl->lock.
 
@@ -216,6 +229,7 @@  bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl)
 	write_lock(&ndel->lock);
 	if (refcount_read(&ndel->refcnt) == 1) {
 		hlist_del_rcu(&ndel->hash);
+		hlist_del_rcu(&ndel->dev_list);
 		neigh_mark_dead(ndel);
 		retval = true;
 	}
@@ -357,46 +371,45 @@  static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
 			    bool skip_perm)
 {
 	int i;
+	struct neighbour *n;
+	struct hlist_node *tmp;
 	struct neigh_hash_table *nht;
 
+	i = family_to_neightbl_index(tbl->family);
+
 	nht = rcu_dereference_protected(tbl->nht,
 					lockdep_is_held(&tbl->lock));
 
-	for (i = 0; i < (1 << nht->hash_shift); i++) {
-		struct neighbour *n;
-
-		neigh_for_each(n, &nht->hash_heads[i]) {
-			if (dev && n->dev != dev)
-				continue;
-			if (skip_perm && n->nud_state & NUD_PERMANENT)
-				continue;
+	hlist_for_each_entry_safe(n, tmp, &dev->neighbours[i], dev_list) {
+		if (skip_perm && n->nud_state & NUD_PERMANENT)
+			continue;
 
-			hlist_del_rcu(&n->hash);
-			write_lock(&n->lock);
-			neigh_del_timer(n);
-			neigh_mark_dead(n);
-			if (refcount_read(&n->refcnt) != 1) {
-				/* The most unpleasant situation.
-				   We must destroy neighbour entry,
-				   but someone still uses it.
-
-				   The destroy will be delayed until
-				   the last user releases us, but
-				   we must kill timers etc. and move
-				   it to safe state.
-				 */
-				__skb_queue_purge(&n->arp_queue);
-				n->arp_queue_len_bytes = 0;
-				WRITE_ONCE(n->output, neigh_blackhole);
-				if (n->nud_state & NUD_VALID)
-					n->nud_state = NUD_NOARP;
-				else
-					n->nud_state = NUD_NONE;
-				neigh_dbg(2, "neigh %p is stray\n", n);
-			}
-			write_unlock(&n->lock);
-			neigh_cleanup_and_release(n);
+		hlist_del_rcu(&n->hash);
+		hlist_del_rcu(&n->dev_list);
+		write_lock(&n->lock);
+		neigh_del_timer(n);
+		neigh_mark_dead(n);
+		if (refcount_read(&n->refcnt) != 1) {
+			/* The most unpleasant situation.
+			 * We must destroy neighbour entry,
+			 * but someone still uses it.
+			 *
+			 * The destroy will be delayed until
+			 * the last user releases us, but
+			 * we must kill timers etc. and move
+			 * it to safe state.
+			 */
+			__skb_queue_purge(&n->arp_queue);
+			n->arp_queue_len_bytes = 0;
+			WRITE_ONCE(n->output, neigh_blackhole);
+			if (n->nud_state & NUD_VALID)
+				n->nud_state = NUD_NOARP;
+			else
+				n->nud_state = NUD_NONE;
+			neigh_dbg(2, "neigh %p is stray\n", n);
 		}
+		write_unlock(&n->lock);
+		neigh_cleanup_and_release(n);
 	}
 }
 
@@ -672,6 +685,10 @@  ___neigh_create(struct neigh_table *tbl, const void *pkey,
 	if (want_ref)
 		neigh_hold(n);
 	hlist_add_head_rcu(&n->hash, &nht->hash_heads[hash_val]);
+
+	error = family_to_neightbl_index(tbl->family);
+	hlist_add_head_rcu(&n->dev_list, &dev->neighbours[error]);
+
 	write_unlock_bh(&tbl->lock);
 	neigh_dbg(2, "neigh %p is created\n", n);
 	rc = n;
@@ -953,6 +970,7 @@  static void neigh_periodic_work(struct work_struct *work)
 			     !time_in_range_open(jiffies, n->used,
 						 n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
 				hlist_del_rcu(&n->hash);
+				hlist_del_rcu(&n->dev_list);
 				neigh_mark_dead(n);
 				write_unlock(&n->lock);
 				neigh_cleanup_and_release(n);
@@ -3052,6 +3070,7 @@  void __neigh_for_each_release(struct neigh_table *tbl,
 			release = cb(n);
 			if (release) {
 				hlist_del_rcu(&n->hash);
+				hlist_del_rcu(&n->dev_list);
 				neigh_mark_dead(n);
 			}
 			write_unlock(&n->lock);
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index df62638b6498..a0573847bc55 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1664,7 +1664,7 @@  static int nla_put_via(struct sk_buff *skb,
 		       u8 table, const void *addr, int alen)
 {
 	static const int table_to_family[NEIGH_NR_TABLES + 1] = {
-		AF_INET, AF_INET6, AF_DECnet, AF_PACKET,
+		AF_INET, AF_INET6, AF_PACKET,
 	};
 	struct nlattr *nla;
 	struct rtvia *via;