diff mbox series

[v2] net: sort queues in xps maps

Message ID 1657852318-33812-1-git-send-email-liyonglong@chinatelecom.cn (mailing list archive)
State Superseded
Delegated to: Netdev Maintainers
Headers show
Series [v2] net: sort queues in xps maps | expand

Checks

Context Check Description
netdev/tree_selection success Guessed tree name to be net-next
netdev/fixes_present success Fixes tag not required for -next series
netdev/subject_prefix warning Target tree name not specified in the subject
netdev/cover_letter success Single patches do not need cover letters
netdev/patch_count success Link
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 8 this patch: 8
netdev/cc_maintainers warning 1 maintainers not CCed: petrm@nvidia.com
netdev/build_clang success Errors and warnings before: 6 this patch: 6
netdev/module_param success Was 0 now: 0
netdev/verify_signedoff fail author Signed-off-by missing
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 8 this patch: 8
netdev/checkpatch warning WARNING: line length of 85 exceeds 80 columns
netdev/kdoc success Errors and warnings before: 0 this patch: 0
netdev/source_inline success Was 0 now: 0

Commit Message

YonglongLi July 15, 2022, 2:31 a.m. UTC
in the following case that set xps of each tx-queue with same cpu mask,
packets in the same tcp stream may be hash to different tx queue. Because
the order of queues in each xps map is not the same.

first set each tx-queue with different cpu mask
$ echo 0 > /sys/class/net/eth0/queues/tx-0/xps_cpus
$ echo 1 > /sys/class/net/eth0/queues/tx-1/xps_cpus
$ echo 2 > /sys/class/net/eth0/queues/tx-2/xps_cpus
$ echo 4 > /sys/class/net/eth0/queues/tx-3/xps_cpus
and then set each tx-queue with same cpu mask
$ echo f > /sys/class/net/eth0/queues/tx-0/xps_cpus
$ echo f > /sys/class/net/eth0/queues/tx-1/xps_cpus
$ echo f > /sys/class/net/eth0/queues/tx-2/xps_cpus
$ echo f > /sys/class/net/eth0/queues/tx-3/xps_cpus

at this point the order of each map queues is differnet, It will cause
packets in the same stream be hashed to diffetent tx queue:
attr_map[0].queues = [0,1,2,3]
attr_map[1].queues = [1,0,2,3]
attr_map[2].queues = [2,0,1,3]
attr_map[3].queues = [3,0,1,2]

It is more reasonable that pacekts in the same stream be hashed to the same
tx queue when all tx queue bind with the same CPUs.

---
v1 -> v2:
Jakub suggestion: factor out the second loop in __netif_set_xps_queue() -
starting from the "add tx-queue to CPU/rx-queue maps" comment into a helper
---

Signed-off-by: Yonglong Li <liyonglong@chinatelecom.cn>
---
 net/core/dev.c | 49 ++++++++++++++++++++++++++++++++-----------------
 1 file changed, 32 insertions(+), 17 deletions(-)

Comments

Jakub Kicinski July 15, 2022, 4:57 a.m. UTC | #1
On Fri, 15 Jul 2022 10:31:58 +0800 Yonglong Li wrote:
> It is more reasonable that pacekts in the same stream be hashed to the same
> tx queue when all tx queue bind with the same CPUs.
> 
> ---
> v1 -> v2:
> Jakub suggestion: factor out the second loop in __netif_set_xps_queue() -
> starting from the "add tx-queue to CPU/rx-queue maps" comment into a helper
> ---

Please put the changelog under the --- after your s-o-b.
git am will cut off the commit message at the first ---
it finds which means right now it'll cut off before the 
changelog, and also discard your s-o-b.

> Signed-off-by: Yonglong Li <liyonglong@chinatelecom.cn>
> ---
>  net/core/dev.c | 49 ++++++++++++++++++++++++++++++++-----------------
>  1 file changed, 32 insertions(+), 17 deletions(-)
> 
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 978ed06..b9ae5d5 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -150,6 +150,7 @@
>  #include <linux/pm_runtime.h>
>  #include <linux/prandom.h>
>  #include <linux/once_lite.h>
> +#include <linux/sort.h>
>  
>  #include "dev.h"
>  #include "net-sysfs.h"
> @@ -199,6 +200,11 @@ static int call_netdevice_notifiers_extack(unsigned long val,
>  
>  static DECLARE_RWSEM(devnet_rename_sem);
>  
> +static int cmp_u16(const void *a, const void *b)
> +{
> +	return *(u16 *)a - *(u16 *)b;
> +}
> +
>  static inline void dev_base_seq_inc(struct net *net)
>  {
>  	while (++net->dev_base_seq == 0)
> @@ -2537,6 +2543,31 @@ static void xps_copy_dev_maps(struct xps_dev_maps *dev_maps,
>  	}
>  }
>  
> +static void update_xps_map(struct xps_map *map, int cpu, u16 index,
> +			   bool *skip_tc, int *numa_node_id,
> +			   enum xps_map_type type)
> +{
> +	int pos = 0;
> +
> +	*skip_tc = true;

Maybe let's keep the skip_tc in the main function, output arguments are
not universally liked in the kernel, and it's just a single line.

Otherwise looks good, thank you!
diff mbox series

Patch

diff --git a/net/core/dev.c b/net/core/dev.c
index 978ed06..b9ae5d5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -150,6 +150,7 @@ 
 #include <linux/pm_runtime.h>
 #include <linux/prandom.h>
 #include <linux/once_lite.h>
+#include <linux/sort.h>
 
 #include "dev.h"
 #include "net-sysfs.h"
@@ -199,6 +200,11 @@  static int call_netdevice_notifiers_extack(unsigned long val,
 
 static DECLARE_RWSEM(devnet_rename_sem);
 
+static int cmp_u16(const void *a, const void *b)
+{
+	return *(u16 *)a - *(u16 *)b;
+}
+
 static inline void dev_base_seq_inc(struct net *net)
 {
 	while (++net->dev_base_seq == 0)
@@ -2537,6 +2543,31 @@  static void xps_copy_dev_maps(struct xps_dev_maps *dev_maps,
 	}
 }
 
+static void update_xps_map(struct xps_map *map, int cpu, u16 index,
+			   bool *skip_tc, int *numa_node_id,
+			   enum xps_map_type type)
+{
+	int pos = 0;
+
+	*skip_tc = true;
+
+	while ((pos < map->len) && (map->queues[pos] != index))
+		pos++;
+
+	if (pos == map->len)
+		map->queues[map->len++] = index;
+
+	sort(map->queues, map->len, sizeof(u16), cmp_u16, NULL);
+#ifdef CONFIG_NUMA
+	if (type == XPS_CPUS) {
+		if (*numa_node_id == -2)
+			*numa_node_id = cpu_to_node(cpu);
+		else if (*numa_node_id != cpu_to_node(cpu))
+			*numa_node_id = -1;
+	}
+#endif
+}
+
 /* Must be called under cpus_read_lock */
 int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 			  u16 index, enum xps_map_type type)
@@ -2629,24 +2660,8 @@  int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
 		if (netif_attr_test_mask(j, mask, nr_ids) &&
 		    netif_attr_test_online(j, online_mask, nr_ids)) {
 			/* add tx-queue to CPU/rx-queue maps */
-			int pos = 0;
-
-			skip_tc = true;
-
 			map = xmap_dereference(new_dev_maps->attr_map[tci]);
-			while ((pos < map->len) && (map->queues[pos] != index))
-				pos++;
-
-			if (pos == map->len)
-				map->queues[map->len++] = index;
-#ifdef CONFIG_NUMA
-			if (type == XPS_CPUS) {
-				if (numa_node_id == -2)
-					numa_node_id = cpu_to_node(j);
-				else if (numa_node_id != cpu_to_node(j))
-					numa_node_id = -1;
-			}
-#endif
+			update_xps_map(map, j, index, &skip_tc, &numa_node_id, type);
 		}
 
 		if (copy)