diff mbox series

[bpf-next,2/3] net: bonding: Use per-cpu rr_tx_counter

Message ID 20210609135537.1460244-3-joamaki@gmail.com (mailing list archive)
State Changes Requested
Delegated to: BPF
Headers show
Series XDP bonding support | expand

Checks

Context Check Description
netdev/cover_letter success Link
netdev/fixes_present success Link
netdev/patch_count success Link
netdev/tree_selection success Clearly marked for bpf-next
netdev/subject_prefix success Link
netdev/cc_maintainers warning 8 maintainers not CCed: yhs@fb.com kpsingh@kernel.org kafai@fb.com ast@kernel.org john.fastabend@gmail.com songliubraving@fb.com davem@davemloft.net kuba@kernel.org
netdev/source_inline success Was 0 now: 0
netdev/verify_signedoff success Link
netdev/module_param success Was 0 now: 0
netdev/build_32bit success Errors and warnings before: 19 this patch: 19
netdev/kdoc success Errors and warnings before: 3 this patch: 3
netdev/verify_fixes success Link
netdev/checkpatch success total: 0 errors, 0 warnings, 0 checks, 51 lines checked
netdev/build_allmodconfig_warn success Errors and warnings before: 19 this patch: 19
netdev/header_inline success Link

Commit Message

Jussi Maki June 9, 2021, 1:55 p.m. UTC
The round-robin rr_tx_counter was shared across CPUs leading
to significant cache trashing at high packet rates. This patch
switches the round-robin mechanism to use a per-cpu counter to
decide the destination device.

On a 100Gbit 64 byte packet test this reduces the CPU load from
50% to 10% on the test system.

Signed-off-by: Jussi Maki <joamaki@gmail.com>
---
 drivers/net/bonding/bond_main.c | 18 +++++++++++++++---
 include/net/bonding.h           |  2 +-
 2 files changed, 16 insertions(+), 4 deletions(-)

Comments

Jay Vosburgh June 10, 2021, 12:04 a.m. UTC | #1
Jussi Maki <joamaki@gmail.com> wrote:

>The round-robin rr_tx_counter was shared across CPUs leading
>to significant cache trashing at high packet rates. This patch

	"trashing" -> "thrashing" ?

>switches the round-robin mechanism to use a per-cpu counter to
>decide the destination device.
>
>On a 100Gbit 64 byte packet test this reduces the CPU load from
>50% to 10% on the test system.
>
>Signed-off-by: Jussi Maki <joamaki@gmail.com>
>---
> drivers/net/bonding/bond_main.c | 18 +++++++++++++++---
> include/net/bonding.h           |  2 +-
> 2 files changed, 16 insertions(+), 4 deletions(-)
>
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index 38eea7e096f3..917dd2cdcbf4 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -4314,16 +4314,16 @@ static u32 bond_rr_gen_slave_id(struct bonding *bond)
> 		slave_id = prandom_u32();
> 		break;
> 	case 1:
>-		slave_id = bond->rr_tx_counter;
>+		slave_id = this_cpu_inc_return(*bond->rr_tx_counter);
> 		break;
> 	default:
> 		reciprocal_packets_per_slave =
> 			bond->params.reciprocal_packets_per_slave;
>-		slave_id = reciprocal_divide(bond->rr_tx_counter,
>+		slave_id = this_cpu_inc_return(*bond->rr_tx_counter);
>+		slave_id = reciprocal_divide(slave_id,
> 					     reciprocal_packets_per_slave);

	With the rr_tx_counter is per-cpu, each CPU is essentially doing
its own round-robin logic, independently of other CPUs, so the resulting
spread of transmitted packets may not be as evenly distributed (as
multiple CPUs could select the same interface to transmit on
approximately in lock-step).  I'm not sure if this could cause actual
problems in practice, though, as particular flows shouldn't skip between
CPUs (and thus rr_tx_counters) very often, and round-robin already
shouldn't be the first choice if no packet reordering is a hard
requirement.

	I think this patch could be submitted against net-next
independently of the rest of the series.

Acked-by: Jay Vosburgh <jay.vosburgh@canonical.com>

	-J

> 		break;
> 	}
>-	bond->rr_tx_counter++;
> 
> 	return slave_id;
> }
>@@ -5278,6 +5278,9 @@ static void bond_uninit(struct net_device *bond_dev)
> 
> 	list_del(&bond->bond_list);
> 
>+	if (BOND_MODE(bond) == BOND_MODE_ROUNDROBIN)
>+		free_percpu(bond->rr_tx_counter);
>+
> 	bond_debug_unregister(bond);
> }
> 
>@@ -5681,6 +5684,15 @@ static int bond_init(struct net_device *bond_dev)
> 	if (!bond->wq)
> 		return -ENOMEM;
> 
>+	if (BOND_MODE(bond) == BOND_MODE_ROUNDROBIN) {
>+		bond->rr_tx_counter = alloc_percpu(u32);
>+		if (!bond->rr_tx_counter) {
>+			destroy_workqueue(bond->wq);
>+			bond->wq = NULL;
>+			return -ENOMEM;
>+		}
>+	}
>+
> 	spin_lock_init(&bond->stats_lock);
> 	netdev_lockdep_set_classes(bond_dev);
> 
>diff --git a/include/net/bonding.h b/include/net/bonding.h
>index 34acb81b4234..8de8180f1be8 100644
>--- a/include/net/bonding.h
>+++ b/include/net/bonding.h
>@@ -232,7 +232,7 @@ struct bonding {
> 	char     proc_file_name[IFNAMSIZ];
> #endif /* CONFIG_PROC_FS */
> 	struct   list_head bond_list;
>-	u32      rr_tx_counter;
>+	u32 __percpu *rr_tx_counter;
> 	struct   ad_bond_info ad_info;
> 	struct   alb_bond_info alb_info;
> 	struct   bond_params params;
>-- 
>2.30.2
>
Jussi Maki June 14, 2021, 7:54 a.m. UTC | #2
On Thu, Jun 10, 2021 at 2:04 AM Jay Vosburgh <jay.vosburgh@canonical.com> wrote:
>
> Jussi Maki <joamaki@gmail.com> wrote:
>
>         With the rr_tx_counter is per-cpu, each CPU is essentially doing
> its own round-robin logic, independently of other CPUs, so the resulting
> spread of transmitted packets may not be as evenly distributed (as
> multiple CPUs could select the same interface to transmit on
> approximately in lock-step).  I'm not sure if this could cause actual
> problems in practice, though, as particular flows shouldn't skip between
> CPUs (and thus rr_tx_counters) very often, and round-robin already
> shouldn't be the first choice if no packet reordering is a hard
> requirement.
>
>         I think this patch could be submitted against net-next
> independently of the rest of the series.

Yes this makes sense. I'll submit it separately against net-next today
and drop it off from this patchset.
diff mbox series

Patch

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 38eea7e096f3..917dd2cdcbf4 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4314,16 +4314,16 @@  static u32 bond_rr_gen_slave_id(struct bonding *bond)
 		slave_id = prandom_u32();
 		break;
 	case 1:
-		slave_id = bond->rr_tx_counter;
+		slave_id = this_cpu_inc_return(*bond->rr_tx_counter);
 		break;
 	default:
 		reciprocal_packets_per_slave =
 			bond->params.reciprocal_packets_per_slave;
-		slave_id = reciprocal_divide(bond->rr_tx_counter,
+		slave_id = this_cpu_inc_return(*bond->rr_tx_counter);
+		slave_id = reciprocal_divide(slave_id,
 					     reciprocal_packets_per_slave);
 		break;
 	}
-	bond->rr_tx_counter++;
 
 	return slave_id;
 }
@@ -5278,6 +5278,9 @@  static void bond_uninit(struct net_device *bond_dev)
 
 	list_del(&bond->bond_list);
 
+	if (BOND_MODE(bond) == BOND_MODE_ROUNDROBIN)
+		free_percpu(bond->rr_tx_counter);
+
 	bond_debug_unregister(bond);
 }
 
@@ -5681,6 +5684,15 @@  static int bond_init(struct net_device *bond_dev)
 	if (!bond->wq)
 		return -ENOMEM;
 
+	if (BOND_MODE(bond) == BOND_MODE_ROUNDROBIN) {
+		bond->rr_tx_counter = alloc_percpu(u32);
+		if (!bond->rr_tx_counter) {
+			destroy_workqueue(bond->wq);
+			bond->wq = NULL;
+			return -ENOMEM;
+		}
+	}
+
 	spin_lock_init(&bond->stats_lock);
 	netdev_lockdep_set_classes(bond_dev);
 
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 34acb81b4234..8de8180f1be8 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -232,7 +232,7 @@  struct bonding {
 	char     proc_file_name[IFNAMSIZ];
 #endif /* CONFIG_PROC_FS */
 	struct   list_head bond_list;
-	u32      rr_tx_counter;
+	u32 __percpu *rr_tx_counter;
 	struct   ad_bond_info ad_info;
 	struct   alb_bond_info alb_info;
 	struct   bond_params params;