diff mbox series

[net-next,11/14] net/mlx5: qos: Store rate groups in a qos domain

Message ID 20241008183222.137702-12-tariqt@nvidia.com (mailing list archive)
State Accepted
Commit 107a034d5c1e9cf86fdf4c8801ec8a07e6669520
Delegated to: Netdev Maintainers
Headers show
Series net/mlx5: qos: Refactor esw qos to support new features | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; no diff in generated;
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 6 this patch: 6
netdev/build_tools success No tools touched, skip
netdev/cc_maintainers warning 1 maintainers not CCed: linux-rdma@vger.kernel.org
netdev/build_clang success Errors and warnings before: 6 this patch: 6
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 5 this patch: 5
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 6 this patch: 6
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2024-10-10--09-00 (tests: 775)

Commit Message

Tariq Toukan Oct. 8, 2024, 6:32 p.m. UTC
From: Cosmin Ratiu <cratiu@nvidia.com>

Groups are currently maintained as a list in their corresponding
eswitch, protected by the esw state_lock.
The upcoming cross-eswitch scheduling feature cannot work with this
approach, as it would require acquiring multiple eswitch locks (in the
correct order) in order to maintain group membership.

This commit moves the rate groups into a new 'qos domain' struct and
adds explicit qos init/cleanup steps to the eswitch init/cleanup.
Upcoming patches will expand the qos domain struct and allow it to be
shared between eswitches. For now, qos domains are private to each esw
so there's only an extra indirection.

Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
 .../net/ethernet/mellanox/mlx5/core/esw/qos.c | 58 ++++++++++++++++---
 .../net/ethernet/mellanox/mlx5/core/esw/qos.h |  3 +
 .../net/ethernet/mellanox/mlx5/core/eswitch.c | 12 +++-
 .../net/ethernet/mellanox/mlx5/core/eswitch.h |  3 +-
 4 files changed, 65 insertions(+), 11 deletions(-)
diff mbox series

Patch

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
index 5891a68633af..06b3a21a7475 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
@@ -11,6 +11,37 @@ 
 /* Minimum supported BW share value by the HW is 1 Mbit/sec */
 #define MLX5_MIN_BW_SHARE 1
 
+/* Holds rate groups associated with an E-Switch. */
+struct mlx5_qos_domain {
+	/* List of all mlx5_esw_rate_groups. */
+	struct list_head groups;
+};
+
+static struct mlx5_qos_domain *esw_qos_domain_alloc(void)
+{
+	struct mlx5_qos_domain *qos_domain;
+
+	qos_domain = kzalloc(sizeof(*qos_domain), GFP_KERNEL);
+	if (!qos_domain)
+		return NULL;
+
+	INIT_LIST_HEAD(&qos_domain->groups);
+
+	return qos_domain;
+}
+
+static int esw_qos_domain_init(struct mlx5_eswitch *esw)
+{
+	esw->qos.domain = esw_qos_domain_alloc();
+
+	return esw->qos.domain ? 0 : -ENOMEM;
+}
+
+static void esw_qos_domain_release(struct mlx5_eswitch *esw)
+{
+	kfree(esw->qos.domain);
+	esw->qos.domain = NULL;
+}
 
 struct mlx5_esw_rate_group {
 	u32 tsar_ix;
@@ -19,6 +50,7 @@  struct mlx5_esw_rate_group {
 	u32 min_rate;
 	/* A computed value indicating relative min_rate between group members. */
 	u32 bw_share;
+	/* Membership in the qos domain 'groups' list. */
 	struct list_head parent_entry;
 	/* The eswitch this group belongs to. */
 	struct mlx5_eswitch *esw;
@@ -128,10 +160,10 @@  static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw)
 	/* Find max min_rate across all esw groups.
 	 * This will correspond to fw_max_bw_share in the final bw_share calculation.
 	 */
-	list_for_each_entry(group, &esw->qos.groups, parent_entry) {
-		if (group->min_rate < max_guarantee || group->tsar_ix == esw->qos.root_tsar_ix)
-			continue;
-		max_guarantee = group->min_rate;
+	list_for_each_entry(group, &esw->qos.domain->groups, parent_entry) {
+		if (group->esw == esw && group->tsar_ix != esw->qos.root_tsar_ix &&
+		    group->min_rate > max_guarantee)
+			max_guarantee = group->min_rate;
 	}
 
 	if (max_guarantee)
@@ -183,8 +215,8 @@  static int esw_qos_normalize_min_rate(struct mlx5_eswitch *esw, struct netlink_e
 	u32 bw_share;
 	int err;
 
-	list_for_each_entry(group, &esw->qos.groups, parent_entry) {
-		if (group->tsar_ix == esw->qos.root_tsar_ix)
+	list_for_each_entry(group, &esw->qos.domain->groups, parent_entry) {
+		if (group->esw != esw || group->tsar_ix == esw->qos.root_tsar_ix)
 			continue;
 		bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share);
 
@@ -452,7 +484,7 @@  __esw_qos_alloc_rate_group(struct mlx5_eswitch *esw, u32 tsar_ix)
 	group->esw = esw;
 	group->tsar_ix = tsar_ix;
 	INIT_LIST_HEAD(&group->members);
-	list_add_tail(&group->parent_entry, &esw->qos.groups);
+	list_add_tail(&group->parent_entry, &esw->qos.domain->groups);
 	return group;
 }
 
@@ -586,7 +618,6 @@  static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta
 		return err;
 	}
 
-	INIT_LIST_HEAD(&esw->qos.groups);
 	if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) {
 		esw->qos.group0 = __esw_qos_create_rate_group(esw, extack);
 	} else {
@@ -868,6 +899,17 @@  static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *
 	return 0;
 }
 
+int mlx5_esw_qos_init(struct mlx5_eswitch *esw)
+{
+	return esw_qos_domain_init(esw);
+}
+
+void mlx5_esw_qos_cleanup(struct mlx5_eswitch *esw)
+{
+	if (esw->qos.domain)
+		esw_qos_domain_release(esw);
+}
+
 /* Eswitch devlink rate API */
 
 int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h
index c4f04c3e6a59..44fb339c5dcc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h
@@ -6,6 +6,9 @@ 
 
 #ifdef CONFIG_MLX5_ESWITCH
 
+int mlx5_esw_qos_init(struct mlx5_eswitch *esw);
+void mlx5_esw_qos_cleanup(struct mlx5_eswitch *esw);
+
 int mlx5_esw_qos_set_vport_rate(struct mlx5_vport *evport, u32 max_rate, u32 min_rate);
 void mlx5_esw_qos_vport_disable(struct mlx5_vport *vport);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 4a187f39daba..9de819c45d33 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1481,6 +1481,10 @@  int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs)
 	MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE);
 	mlx5_eq_notifier_register(esw->dev, &esw->nb);
 
+	err = mlx5_esw_qos_init(esw);
+	if (err)
+		goto err_qos_init;
+
 	if (esw->mode == MLX5_ESWITCH_LEGACY) {
 		err = esw_legacy_enable(esw);
 	} else {
@@ -1489,7 +1493,7 @@  int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs)
 	}
 
 	if (err)
-		goto abort;
+		goto err_esw_enable;
 
 	esw->fdb_table.flags |= MLX5_ESW_FDB_CREATED;
 
@@ -1503,7 +1507,10 @@  int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs)
 
 	return 0;
 
-abort:
+err_esw_enable:
+	mlx5_esw_qos_cleanup(esw);
+err_qos_init:
+	mlx5_eq_notifier_unregister(esw->dev, &esw->nb);
 	mlx5_esw_acls_ns_cleanup(esw);
 	return err;
 }
@@ -1631,6 +1638,7 @@  void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw)
 
 	if (esw->mode == MLX5_ESWITCH_OFFLOADS)
 		devl_rate_nodes_destroy(devlink);
+	mlx5_esw_qos_cleanup(esw);
 }
 
 void mlx5_eswitch_disable(struct mlx5_eswitch *esw)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 567276900a37..e57be2eeec85 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -336,6 +336,7 @@  enum {
 };
 
 struct dentry;
+struct mlx5_qos_domain;
 
 struct mlx5_eswitch {
 	struct mlx5_core_dev    *dev;
@@ -368,12 +369,12 @@  struct mlx5_eswitch {
 		 */
 		refcount_t refcnt;
 		u32 root_tsar_ix;
+		struct mlx5_qos_domain *domain;
 		/* Contains all vports with QoS enabled but no explicit group.
 		 * Cannot be NULL if QoS is enabled, but may be a fake group
 		 * referencing the root TSAR if the esw doesn't support groups.
 		 */
 		struct mlx5_esw_rate_group *group0;
-		struct list_head groups; /* Protected by esw->state_lock */
 	} qos;
 
 	struct mlx5_esw_bridge_offloads *br_offloads;