diff mbox series

[net-next] net: rfs: hash function change

Message ID 20250321171309.634100-1-edumazet@google.com (mailing list archive)
State New
Delegated to: Netdev Maintainers
Headers show
Series [net-next] net: rfs: hash function change | expand

Checks

Context Check Description
netdev/series_format success Single patches do not need cover letters
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/build_tools success Errors and warnings before: 26 (+0) this patch: 26 (+0)
netdev/cc_maintainers warning 1 maintainers not CCed: atenart@kernel.org
netdev/build_clang success Errors and warnings before: 0 this patch: 0
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 6 this patch: 6
netdev/checkpatch warning WARNING: 'controled' may be misspelled - perhaps 'controlled'?
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 69 this patch: 69
netdev/source_inline success Was 0 now: 0
netdev/contest warning net-next-2025-03-21--18-00 (tests: 896)

Commit Message

Eric Dumazet March 21, 2025, 5:13 p.m. UTC
RFS is using two kinds of hash tables.

First one is controled by /proc/sys/net/core/rps_sock_flow_entries = 2^N
and using the N low order bits of the l4 hash is good enough.

Then each RX queue has its own hash table, controled by
/sys/class/net/eth1/queues/rx-$q/rps_flow_cnt = 2^X

Current hash function, using the X low order bits is suboptimal,
because RSS is usually using Func(hash) = (hash % power_of_two);

For example, with 32 RX queues, 6 low order bits have no entropy
for a given queue.

Switch this hash function to hash_32(hash, log) to increase
chances to use all possible slots and reduce collisions.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Tom Herbert <tom@herbertland.com>
---
 include/net/rps.h    |  2 +-
 net/core/dev.c       | 13 +++++++++----
 net/core/net-sysfs.c |  4 ++--
 3 files changed, 12 insertions(+), 7 deletions(-)
diff mbox series

Patch

diff --git a/include/net/rps.h b/include/net/rps.h
index a93401d23d66e45210acc73f0326087813b69d59..e358e9711f27523534fdf4cbf57729cbdf629b8a 100644
--- a/include/net/rps.h
+++ b/include/net/rps.h
@@ -39,7 +39,7 @@  struct rps_dev_flow {
  * The rps_dev_flow_table structure contains a table of flow mappings.
  */
 struct rps_dev_flow_table {
-	unsigned int		mask;
+	u8			log;
 	struct rcu_head		rcu;
 	struct rps_dev_flow	flows[];
 };
diff --git a/net/core/dev.c b/net/core/dev.c
index 2355603417650fe10d075c8e85416a488e00626d..c5bf3b1684fe6dc7141b1c341b14423111fe8894 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4731,6 +4731,11 @@  EXPORT_SYMBOL(rps_needed);
 struct static_key_false rfs_needed __read_mostly;
 EXPORT_SYMBOL(rfs_needed);
 
+static u32 rfs_slot(u32 hash, const struct rps_dev_flow_table *flow_table)
+{
+	return hash_32(hash, flow_table->log);
+}
+
 static struct rps_dev_flow *
 set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 	    struct rps_dev_flow *rflow, u16 next_cpu)
@@ -4757,7 +4762,7 @@  set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 		flow_table = rcu_dereference(rxqueue->rps_flow_table);
 		if (!flow_table)
 			goto out;
-		flow_id = skb_get_hash(skb) & flow_table->mask;
+		flow_id = rfs_slot(skb_get_hash(skb), flow_table);
 		rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb,
 							rxq_index, flow_id);
 		if (rc < 0)
@@ -4836,7 +4841,7 @@  static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 		/* OK, now we know there is a match,
 		 * we can look at the local (per receive queue) flow table
 		 */
-		rflow = &flow_table->flows[hash & flow_table->mask];
+		rflow = &flow_table->flows[rfs_slot(hash, flow_table)];
 		tcpu = rflow->cpu;
 
 		/*
@@ -4903,13 +4908,13 @@  bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
 
 	rcu_read_lock();
 	flow_table = rcu_dereference(rxqueue->rps_flow_table);
-	if (flow_table && flow_id <= flow_table->mask) {
+	if (flow_table && flow_id < (1UL << flow_table->log)) {
 		rflow = &flow_table->flows[flow_id];
 		cpu = READ_ONCE(rflow->cpu);
 		if (READ_ONCE(rflow->filter) == filter_id && cpu < nr_cpu_ids &&
 		    ((int)(READ_ONCE(per_cpu(softnet_data, cpu).input_queue_head) -
 			   READ_ONCE(rflow->last_qtail)) <
-		     (int)(10 * flow_table->mask)))
+		     (int)(10 << flow_table->log)))
 			expire = false;
 	}
 	rcu_read_unlock();
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index abaa1c919b984cb3340e99bf1af26864a0cc3405..b6fbe629ccee1ea874a796c6146e4a6e8296c5dc 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1056,7 +1056,7 @@  static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
 	rcu_read_lock();
 	flow_table = rcu_dereference(queue->rps_flow_table);
 	if (flow_table)
-		val = (unsigned long)flow_table->mask + 1;
+		val = 1UL << flow_table->log;
 	rcu_read_unlock();
 
 	return sysfs_emit(buf, "%lu\n", val);
@@ -1109,7 +1109,7 @@  static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
 		if (!table)
 			return -ENOMEM;
 
-		table->mask = mask;
+		table->log = ilog2(mask) + 1;
 		for (count = 0; count <= mask; count++)
 			table->flows[count].cpu = RPS_NO_CPU;
 	} else {