diff mbox series

[net-next,2/2] inet: call inet6_ehashfn() once from inet6_hash_connect()

Message ID 20250305034550.879255-3-edumazet@google.com (mailing list archive)
State Accepted
Commit d4438ce68bf145aa1d7ed03ebf3b8ece337e3f64
Delegated to: Netdev Maintainers
Headers show
Series tcp: even faster connect() under stress | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 10 this patch: 10
netdev/build_tools success Errors and warnings before: 26 (+0) this patch: 26 (+0)
netdev/cc_maintainers warning 1 maintainers not CCed: dsahern@kernel.org
netdev/build_clang success Errors and warnings before: 16 this patch: 16
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 1742 this patch: 1742
netdev/checkpatch warning WARNING: line length of 90 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2025-03-05--06-00 (tests: 894)

Commit Message

Eric Dumazet March 5, 2025, 3:45 a.m. UTC
inet6_ehashfn() being called from __inet6_check_established()
has a big impact on performance, as shown in the Tested section.

After prior patch, we can compute the hash for port 0
from inet6_hash_connect(), and derive each hash in
__inet_hash_connect() from this initial hash:

hash(saddr, lport, daddr, dport) == hash(saddr, 0, daddr, dport) + lport

Apply the same principle for __inet_check_established(),
although inet_ehashfn() has a smaller cost.

Tested:

Server: ulimit -n 40000; neper/tcp_crr -T 200 -F 30000 -6 --nolog
Client: ulimit -n 40000; neper/tcp_crr -T 200 -F 30000 -6 --nolog -c -H server

Before this patch:

  utime_start=0.286131
  utime_end=4.378886
  stime_start=11.952556
  stime_end=1991.655533
  num_transactions=1446830
  latency_min=0.001061085
  latency_max=12.075275028
  latency_mean=0.376375302
  latency_stddev=1.361969596
  num_samples=306383
  throughput=151866.56

perf top:

 50.01%  [kernel]       [k] __inet6_check_established
 20.65%  [kernel]       [k] __inet_hash_connect
 15.81%  [kernel]       [k] inet6_ehashfn
  2.92%  [kernel]       [k] rcu_all_qs
  2.34%  [kernel]       [k] __cond_resched
  0.50%  [kernel]       [k] _raw_spin_lock
  0.34%  [kernel]       [k] sched_balance_trigger
  0.24%  [kernel]       [k] queued_spin_lock_slowpath

After this patch:

  utime_start=0.315047
  utime_end=9.257617
  stime_start=7.041489
  stime_end=1923.688387
  num_transactions=3057968
  latency_min=0.003041375
  latency_max=7.056589232
  latency_mean=0.141075048    # Better latency metrics
  latency_stddev=0.526900516
  num_samples=312996
  throughput=320677.21        # 111 % increase, and 229 % for the series

perf top: inet6_ehashfn is no longer seen.

 39.67%  [kernel]       [k] __inet_hash_connect
 37.06%  [kernel]       [k] __inet6_check_established
  4.79%  [kernel]       [k] rcu_all_qs
  3.82%  [kernel]       [k] __cond_resched
  1.76%  [kernel]       [k] sched_balance_domains
  0.82%  [kernel]       [k] _raw_spin_lock
  0.81%  [kernel]       [k] sched_balance_rq
  0.81%  [kernel]       [k] sched_balance_trigger
  0.76%  [kernel]       [k] queued_spin_lock_slowpath

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 include/net/inet_hashtables.h |  4 +++-
 include/net/ip.h              |  2 +-
 net/ipv4/inet_hashtables.c    | 26 ++++++++++++++++++--------
 net/ipv6/inet6_hashtables.c   | 15 +++++++++++----
 4 files changed, 33 insertions(+), 14 deletions(-)

Comments

Kuniyuki Iwashima March 6, 2025, 4:26 a.m. UTC | #1
From: Eric Dumazet <edumazet@google.com>
Date: Wed,  5 Mar 2025 03:45:50 +0000
> inet6_ehashfn() being called from __inet6_check_established()
> has a big impact on performance, as shown in the Tested section.
> 
> After prior patch, we can compute the hash for port 0
> from inet6_hash_connect(), and derive each hash in
> __inet_hash_connect() from this initial hash:
> 
> hash(saddr, lport, daddr, dport) == hash(saddr, 0, daddr, dport) + lport
> 
> Apply the same principle for __inet_check_established(),
> although inet_ehashfn() has a smaller cost.
> 
> Tested:
> 
> Server: ulimit -n 40000; neper/tcp_crr -T 200 -F 30000 -6 --nolog
> Client: ulimit -n 40000; neper/tcp_crr -T 200 -F 30000 -6 --nolog -c -H server
> 
> Before this patch:
> 
>   utime_start=0.286131
>   utime_end=4.378886
>   stime_start=11.952556
>   stime_end=1991.655533
>   num_transactions=1446830
>   latency_min=0.001061085
>   latency_max=12.075275028
>   latency_mean=0.376375302
>   latency_stddev=1.361969596
>   num_samples=306383
>   throughput=151866.56
> 
> perf top:
> 
>  50.01%  [kernel]       [k] __inet6_check_established
>  20.65%  [kernel]       [k] __inet_hash_connect
>  15.81%  [kernel]       [k] inet6_ehashfn
>   2.92%  [kernel]       [k] rcu_all_qs
>   2.34%  [kernel]       [k] __cond_resched
>   0.50%  [kernel]       [k] _raw_spin_lock
>   0.34%  [kernel]       [k] sched_balance_trigger
>   0.24%  [kernel]       [k] queued_spin_lock_slowpath
> 
> After this patch:
> 
>   utime_start=0.315047
>   utime_end=9.257617
>   stime_start=7.041489
>   stime_end=1923.688387
>   num_transactions=3057968
>   latency_min=0.003041375
>   latency_max=7.056589232
>   latency_mean=0.141075048    # Better latency metrics
>   latency_stddev=0.526900516
>   num_samples=312996
>   throughput=320677.21        # 111 % increase, and 229 % for the series
> 
> perf top: inet6_ehashfn is no longer seen.
> 
>  39.67%  [kernel]       [k] __inet_hash_connect
>  37.06%  [kernel]       [k] __inet6_check_established
>   4.79%  [kernel]       [k] rcu_all_qs
>   3.82%  [kernel]       [k] __cond_resched
>   1.76%  [kernel]       [k] sched_balance_domains
>   0.82%  [kernel]       [k] _raw_spin_lock
>   0.81%  [kernel]       [k] sched_balance_rq
>   0.81%  [kernel]       [k] sched_balance_trigger
>   0.76%  [kernel]       [k] queued_spin_lock_slowpath
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>

Interesting optimisation, thanks!
Jason Xing March 6, 2025, 8:22 a.m. UTC | #2
On Wed, Mar 5, 2025 at 11:46 AM Eric Dumazet <edumazet@google.com> wrote:
>
> inet6_ehashfn() being called from __inet6_check_established()
> has a big impact on performance, as shown in the Tested section.
>
> After prior patch, we can compute the hash for port 0
> from inet6_hash_connect(), and derive each hash in
> __inet_hash_connect() from this initial hash:
>
> hash(saddr, lport, daddr, dport) == hash(saddr, 0, daddr, dport) + lport
>
> Apply the same principle for __inet_check_established(),
> although inet_ehashfn() has a smaller cost.
>
> Tested:
>
> Server: ulimit -n 40000; neper/tcp_crr -T 200 -F 30000 -6 --nolog
> Client: ulimit -n 40000; neper/tcp_crr -T 200 -F 30000 -6 --nolog -c -H server
>
> Before this patch:
>
>   utime_start=0.286131
>   utime_end=4.378886
>   stime_start=11.952556
>   stime_end=1991.655533
>   num_transactions=1446830
>   latency_min=0.001061085
>   latency_max=12.075275028
>   latency_mean=0.376375302
>   latency_stddev=1.361969596
>   num_samples=306383
>   throughput=151866.56
>
> perf top:
>
>  50.01%  [kernel]       [k] __inet6_check_established
>  20.65%  [kernel]       [k] __inet_hash_connect
>  15.81%  [kernel]       [k] inet6_ehashfn
>   2.92%  [kernel]       [k] rcu_all_qs
>   2.34%  [kernel]       [k] __cond_resched
>   0.50%  [kernel]       [k] _raw_spin_lock
>   0.34%  [kernel]       [k] sched_balance_trigger
>   0.24%  [kernel]       [k] queued_spin_lock_slowpath
>
> After this patch:
>
>   utime_start=0.315047
>   utime_end=9.257617
>   stime_start=7.041489
>   stime_end=1923.688387
>   num_transactions=3057968
>   latency_min=0.003041375
>   latency_max=7.056589232
>   latency_mean=0.141075048    # Better latency metrics
>   latency_stddev=0.526900516
>   num_samples=312996
>   throughput=320677.21        # 111 % increase, and 229 % for the series
>
> perf top: inet6_ehashfn is no longer seen.
>
>  39.67%  [kernel]       [k] __inet_hash_connect
>  37.06%  [kernel]       [k] __inet6_check_established
>   4.79%  [kernel]       [k] rcu_all_qs
>   3.82%  [kernel]       [k] __cond_resched
>   1.76%  [kernel]       [k] sched_balance_domains
>   0.82%  [kernel]       [k] _raw_spin_lock
>   0.81%  [kernel]       [k] sched_balance_rq
>   0.81%  [kernel]       [k] sched_balance_trigger
>   0.76%  [kernel]       [k] queued_spin_lock_slowpath
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>

Thank you!

Tested-by: Jason Xing <kerneljasonxing@gmail.com>
Reviewed-by: Jason Xing <kerneljasonxing@gmail.com>
diff mbox series

Patch

diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index f447d61d95982090aac492b31e4199534970c4fb..949641e925398f741f2d4dda5898efc683b305dc 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -527,10 +527,12 @@  static inline void sk_rcv_saddr_set(struct sock *sk, __be32 addr)
 
 int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 			struct sock *sk, u64 port_offset,
+			u32 hash_port0,
 			int (*check_established)(struct inet_timewait_death_row *,
 						 struct sock *, __u16,
 						 struct inet_timewait_sock **,
-						 bool rcu_lookup));
+						 bool rcu_lookup,
+						 u32 hash));
 
 int inet_hash_connect(struct inet_timewait_death_row *death_row,
 		      struct sock *sk);
diff --git a/include/net/ip.h b/include/net/ip.h
index ce5e59957dd553697536ddf111bb1406d9d99408..8a48ade24620b4c8e2ebb4726f27a69aac7138b0 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -357,7 +357,7 @@  static inline void inet_get_local_port_range(const struct net *net, int *low, in
 bool inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high);
 
 #ifdef CONFIG_SYSCTL
-static inline bool inet_is_local_reserved_port(struct net *net, unsigned short port)
+static inline bool inet_is_local_reserved_port(const struct net *net, unsigned short port)
 {
 	if (!net->ipv4.sysctl_local_reserved_ports)
 		return false;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 3025d2b708852acd9744709a897fca17564523d5..1e3a9573c19834cc96d0b4cbf816f86433134450 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -538,7 +538,8 @@  EXPORT_SYMBOL_GPL(__inet_lookup_established);
 static int __inet_check_established(struct inet_timewait_death_row *death_row,
 				    struct sock *sk, __u16 lport,
 				    struct inet_timewait_sock **twp,
-				    bool rcu_lookup)
+				    bool rcu_lookup,
+				    u32 hash)
 {
 	struct inet_hashinfo *hinfo = death_row->hashinfo;
 	struct inet_sock *inet = inet_sk(sk);
@@ -549,8 +550,6 @@  static int __inet_check_established(struct inet_timewait_death_row *death_row,
 	int sdif = l3mdev_master_ifindex_by_index(net, dif);
 	INET_ADDR_COOKIE(acookie, saddr, daddr);
 	const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
-	unsigned int hash = inet_ehashfn(net, daddr, lport,
-					 saddr, inet->inet_dport);
 	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
 	struct inet_timewait_sock *tw = NULL;
 	const struct hlist_nulls_node *node;
@@ -1007,9 +1006,10 @@  static u32 *table_perturb;
 
 int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 		struct sock *sk, u64 port_offset,
+		u32 hash_port0,
 		int (*check_established)(struct inet_timewait_death_row *,
 			struct sock *, __u16, struct inet_timewait_sock **,
-			bool rcu_lookup))
+			bool rcu_lookup, u32 hash))
 {
 	struct inet_hashinfo *hinfo = death_row->hashinfo;
 	struct inet_bind_hashbucket *head, *head2;
@@ -1027,7 +1027,8 @@  int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 
 	if (port) {
 		local_bh_disable();
-		ret = check_established(death_row, sk, port, NULL, false);
+		ret = check_established(death_row, sk, port, NULL, false,
+					hash_port0 + port);
 		local_bh_enable();
 		return ret;
 	}
@@ -1071,7 +1072,8 @@  int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 				rcu_read_unlock();
 				goto next_port;
 			}
-			if (!check_established(death_row, sk, port, &tw, true))
+			if (!check_established(death_row, sk, port, &tw, true,
+					       hash_port0 + port))
 				break;
 			rcu_read_unlock();
 			goto next_port;
@@ -1090,7 +1092,8 @@  int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 					goto next_port_unlock;
 				WARN_ON(hlist_empty(&tb->bhash2));
 				if (!check_established(death_row, sk,
-						       port, &tw, false))
+						       port, &tw, false,
+						       hash_port0 + port))
 					goto ok;
 				goto next_port_unlock;
 			}
@@ -1197,11 +1200,18 @@  int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 int inet_hash_connect(struct inet_timewait_death_row *death_row,
 		      struct sock *sk)
 {
+	const struct inet_sock *inet = inet_sk(sk);
+	const struct net *net = sock_net(sk);
 	u64 port_offset = 0;
+	u32 hash_port0;
 
 	if (!inet_sk(sk)->inet_num)
 		port_offset = inet_sk_port_offset(sk);
-	return __inet_hash_connect(death_row, sk, port_offset,
+
+	hash_port0 = inet_ehashfn(net, inet->inet_rcv_saddr, 0,
+				  inet->inet_daddr, inet->inet_dport);
+
+	return __inet_hash_connect(death_row, sk, port_offset, hash_port0,
 				   __inet_check_established);
 }
 EXPORT_SYMBOL_GPL(inet_hash_connect);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 3d95f1e75a118ff8027d4ec0f33910d23b6af832..76ee521189eb77c48845eeeac9d50b3a93a250a6 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -264,7 +264,8 @@  EXPORT_SYMBOL_GPL(inet6_lookup);
 static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 				     struct sock *sk, const __u16 lport,
 				     struct inet_timewait_sock **twp,
-				     bool rcu_lookup)
+				     bool rcu_lookup,
+				     u32 hash)
 {
 	struct inet_hashinfo *hinfo = death_row->hashinfo;
 	struct inet_sock *inet = inet_sk(sk);
@@ -274,8 +275,6 @@  static int __inet6_check_established(struct inet_timewait_death_row *death_row,
 	struct net *net = sock_net(sk);
 	const int sdif = l3mdev_master_ifindex_by_index(net, dif);
 	const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);
-	const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr,
-						inet->inet_dport);
 	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
 	struct inet_timewait_sock *tw = NULL;
 	const struct hlist_nulls_node *node;
@@ -354,11 +353,19 @@  static u64 inet6_sk_port_offset(const struct sock *sk)
 int inet6_hash_connect(struct inet_timewait_death_row *death_row,
 		       struct sock *sk)
 {
+	const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr;
+	const struct in6_addr *saddr = &sk->sk_v6_daddr;
+	const struct inet_sock *inet = inet_sk(sk);
+	const struct net *net = sock_net(sk);
 	u64 port_offset = 0;
+	u32 hash_port0;
 
 	if (!inet_sk(sk)->inet_num)
 		port_offset = inet6_sk_port_offset(sk);
-	return __inet_hash_connect(death_row, sk, port_offset,
+
+	hash_port0 = inet6_ehashfn(net, daddr, 0, saddr, inet->inet_dport);
+
+	return __inet_hash_connect(death_row, sk, port_offset, hash_port0,
 				   __inet6_check_established);
 }
 EXPORT_SYMBOL_GPL(inet6_hash_connect);