diff mbox series

[net-next,04/10] devlink: Introduce shared rate domains

Message ID 20250213180134.323929-5-tariqt@nvidia.com (mailing list archive)
State Changes Requested
Delegated to: Netdev Maintainers
Headers show
Series devlink and mlx5: Introduce rate domains | expand

Checks

Context Check Description
netdev/series_format success Posting correctly formatted
netdev/tree_selection success Clearly marked for net-next
netdev/ynl success Generated files up to date; no warnings/errors; GEN HAS DIFF 2 files changed, 62 insertions(+);
netdev/fixes_present success Fixes tag not required for -next series
netdev/header_inline success No static functions without inline keyword in header files
netdev/build_32bit success Errors and warnings before: 0 this patch: 0
netdev/build_tools success Errors and warnings before: 26 (+1) this patch: 26 (+1)
netdev/cc_maintainers warning 1 maintainers not CCed: horms@kernel.org
netdev/build_clang success Errors and warnings before: 1070 this patch: 1070
netdev/verify_signedoff success Signed-off-by tag matches author and committer
netdev/deprecated_api success None detected
netdev/check_selftest success No net selftest shell script
netdev/verify_fixes success No Fixes tag
netdev/build_allmodconfig_warn success Errors and warnings before: 523 this patch: 523
netdev/checkpatch warning WARNING: line length of 81 exceeds 80 columns WARNING: line length of 83 exceeds 80 columns WARNING: line length of 84 exceeds 80 columns
netdev/build_clang_rust success No Rust files in patch. Skipping build
netdev/kdoc success Errors and warnings before: 5 this patch: 5
netdev/source_inline success Was 0 now: 0
netdev/contest success net-next-2025-02-14--18-00 (tests: 889)

Commit Message

Tariq Toukan Feb. 13, 2025, 6:01 p.m. UTC
From: Cosmin Ratiu <cratiu@nvidia.com>

The underlying idea is modeling a piece of hardware which:
1. Exposes multiple functions as separate devlink objects.
2. Is capable of instantiating a transmit scheduling tree spanning
   multiple functions.

Modeling this requires devlink rate nodes with parents across other
devlink objects. A naive approach that relies on the current
one-lock-per-devlink model is impossible, as it would require in some
cases acquiring multiple devlink locks in the correct order.

Based on the preliminary patches in this series, this commit introduces
the concept of a shared rate domain.

1. A shared rate domain stores rate nodes for a piece of hardware that
   has the properties described at the beginning.
2. A shared rate domain is identified by the devlink operations pointer
   (a proxy for the device type) and a unique u64 hardware identifier
   provided by the driver.
3. There is a global registry of reference counted shared rate domains.
4. A devlink object starts out with a private rate domain, and can be
   switched once to use a shared rate domain with
   devlink_shared_rate_domain_init. Further calls do nothing.
5. Shared rate domains have an additional mutex serializing access to
   rate nodes, acquired by the previously introduced functions
   devl_rate_domain_lock and devl_rate_domain_unlock.

These new code paths are unused for now. A caller to
devlink_shared_rate_domain_init will be added in a subsequent patch.

Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
Reviewed-by: Carolina Jubran <cjubran@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
---
 include/net/devlink.h       |  8 ++++
 net/devlink/core.c          | 79 ++++++++++++++++++++++++++++++++++++-
 net/devlink/dev.c           |  2 +-
 net/devlink/devl_internal.h | 26 ++++++++++--
 net/devlink/rate.c          | 15 +++++++
 5 files changed, 124 insertions(+), 6 deletions(-)
diff mbox series

Patch

diff --git a/include/net/devlink.h b/include/net/devlink.h
index b8783126c1ed..a9675c1810e6 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1561,6 +1561,14 @@  void devlink_register(struct devlink *devlink);
 void devlink_unregister(struct devlink *devlink);
 void devlink_free(struct devlink *devlink);
 
+/* Can be used to tell devlink that shared rate domains are supported.
+ * The same id needs to be provided for devlink objects that can share
+ * rate nodes in hw (e.g. contain nodes with parents in other devlink objects).
+ * This requires holding the devlink lock and can only be called once per object.
+ * Rate node relationships across different rate domains are not supported.
+ */
+int devlink_shared_rate_domain_init(struct devlink *devlink, u64 id);
+
 /**
  * struct devlink_port_ops - Port operations
  * @port_split: Callback used to split the port into multiple ones.
diff --git a/net/devlink/core.c b/net/devlink/core.c
index 06a2e2dce558..9d374d84225a 100644
--- a/net/devlink/core.c
+++ b/net/devlink/core.c
@@ -289,6 +289,80 @@  void devl_unlock(struct devlink *devlink)
 }
 EXPORT_SYMBOL_GPL(devl_unlock);
 
+/* A global data struct with all shared rate domains. */
+static struct {
+	struct mutex lock;    /* Acquired after the devlink lock. */
+	struct list_head rate_domains;
+} devlink_rate_domains = {
+	.lock = __MUTEX_INITIALIZER(devlink_rate_domains.lock),
+	.rate_domains = LIST_HEAD_INIT(devlink_rate_domains.rate_domains),
+};
+
+static bool devlink_rate_domain_eq(struct devlink_rate_domain *rate_domain,
+				   const struct devlink_ops *ops, u64 id)
+{
+	return rate_domain->ops == ops && rate_domain->id == id;
+}
+
+int devlink_shared_rate_domain_init(struct devlink *devlink, u64 id)
+{
+	struct devlink_rate_domain *rate_domain;
+	int err = 0;
+
+	devl_assert_locked(devlink);
+
+	if (devlink->rate_domain->shared) {
+		if (devlink_rate_domain_eq(devlink->rate_domain, devlink->ops, id))
+			return 0;
+		return -EEXIST;
+	}
+	if (!list_empty(&devlink->rate_domain->rate_list))
+		return -EINVAL;
+
+	mutex_lock(&devlink_rate_domains.lock);
+	list_for_each_entry(rate_domain, &devlink_rate_domains.rate_domains, list) {
+		if (devlink_rate_domain_eq(rate_domain, devlink->ops, id)) {
+			refcount_inc(&rate_domain->refcount);
+			goto replace_domain;
+		}
+	}
+
+	/* Shared domain not found, create one. */
+	rate_domain = kvzalloc(sizeof(*rate_domain), GFP_KERNEL);
+	if (!rate_domain) {
+		err = -ENOMEM;
+		goto unlock;
+	}
+	rate_domain->shared = true;
+	rate_domain->ops = devlink->ops;
+	rate_domain->id = id;
+	mutex_init(&rate_domain->lock);
+	INIT_LIST_HEAD(&rate_domain->rate_list);
+	refcount_set(&rate_domain->refcount, 1);
+	list_add_tail(&rate_domain->list, &devlink_rate_domains.rate_domains);
+replace_domain:
+	kvfree(devlink->rate_domain);
+	devlink->rate_domain = rate_domain;
+unlock:
+	mutex_unlock(&devlink_rate_domains.lock);
+	return err;
+}
+EXPORT_SYMBOL_GPL(devlink_shared_rate_domain_init);
+
+static void devlink_rate_domain_put(struct devlink_rate_domain *rate_domain)
+{
+	if (rate_domain->shared) {
+		if (!refcount_dec_and_test(&rate_domain->refcount))
+			return;
+
+		WARN_ON(!list_empty(&rate_domain->rate_list));
+		mutex_lock(&devlink_rate_domains.lock);
+		list_del(&rate_domain->list);
+		mutex_unlock(&devlink_rate_domains.lock);
+	}
+	kvfree(rate_domain);
+}
+
 /**
  * devlink_try_get() - try to obtain a reference on a devlink instance
  * @devlink: instance to reference
@@ -314,7 +388,7 @@  static void devlink_release(struct work_struct *work)
 	mutex_destroy(&devlink->lock);
 	lockdep_unregister_key(&devlink->lock_key);
 	put_device(devlink->dev);
-	kvfree(devlink->rate_domain);
+	devlink_rate_domain_put(devlink->rate_domain);
 	kvfree(devlink);
 }
 
@@ -428,6 +502,7 @@  struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
 	devlink->rate_domain = kvzalloc(sizeof(*devlink->rate_domain), GFP_KERNEL);
 	if (!devlink->rate_domain)
 		goto err_rate_domain;
+	devlink->rate_domain->shared = false;
 	INIT_LIST_HEAD(&devlink->rate_domain->rate_list);
 
 	ret = xa_alloc_cyclic(&devlinks, &devlink->index, devlink, xa_limit_31b,
@@ -484,7 +559,7 @@  void devlink_free(struct devlink *devlink)
 	WARN_ON(!list_empty(&devlink->resource_list));
 	WARN_ON(!list_empty(&devlink->dpipe_table_list));
 	WARN_ON(!list_empty(&devlink->sb_list));
-	WARN_ON(!list_empty(&devlink->rate_domain->rate_list));
+	WARN_ON(devlink_rates_check(devlink));
 	WARN_ON(!list_empty(&devlink->linecard_list));
 	WARN_ON(!xa_empty(&devlink->ports));
 
diff --git a/net/devlink/dev.c b/net/devlink/dev.c
index c926c75cc10d..84353a85e8fe 100644
--- a/net/devlink/dev.c
+++ b/net/devlink/dev.c
@@ -434,7 +434,7 @@  static void devlink_reload_reinit_sanity_check(struct devlink *devlink)
 	WARN_ON(!list_empty(&devlink->trap_list));
 	WARN_ON(!list_empty(&devlink->dpipe_table_list));
 	WARN_ON(!list_empty(&devlink->sb_list));
-	WARN_ON(!list_empty(&devlink->rate_domain->rate_list));
+	WARN_ON(devlink_rates_check(devlink));
 	WARN_ON(!list_empty(&devlink->linecard_list));
 	WARN_ON(!xa_empty(&devlink->ports));
 }
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index fae81dd6953f..7401aab274e5 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -30,9 +30,20 @@  struct devlink_dev_stats {
 	u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
 };
 
-/* Stores devlink rates associated with a rate domain. */
+/* Stores devlink rates associated with a rate domain.
+ * Multiple devlink objects may share the same domain (when 'shared' is true)
+ * and rate nodes can have members from multiple devices.
+ */
 struct devlink_rate_domain {
+	bool shared;
+	struct list_head list;
 	struct list_head rate_list;
+	/* Fields below are only used for shared rate domains. */
+	const struct devlink_ops *ops;
+	u64 id;
+	refcount_t refcount;
+	/* Serializes access to rates. */
+	struct mutex lock;
 };
 
 struct devlink {
@@ -121,9 +132,17 @@  static inline void devl_dev_unlock(struct devlink *devlink, bool dev_lock)
 		device_unlock(devlink->dev);
 }
 
-static inline void devl_rate_domain_lock(struct devlink *devlink) { }
+static inline void devl_rate_domain_lock(struct devlink *devlink)
+{
+	if (devlink->rate_domain->shared)
+		mutex_lock(&devlink->rate_domain->lock);
+}
 
-static inline void devl_rate_domain_unlock(struct devlink *devlink) { }
+static inline void devl_rate_domain_unlock(struct devlink *devlink)
+{
+	if (devlink->rate_domain->shared)
+		mutex_unlock(&devlink->rate_domain->lock);
+}
 
 typedef void devlink_rel_notify_cb_t(struct devlink *devlink, u32 obj_index);
 typedef void devlink_rel_cleanup_cb_t(struct devlink *devlink, u32 obj_index,
@@ -307,6 +326,7 @@  int devlink_resources_validate(struct devlink *devlink,
 
 /* Rates */
 int devlink_rate_nodes_check(struct devlink *devlink, struct netlink_ext_ack *extack);
+int devlink_rates_check(struct devlink *devlink);
 
 /* Linecards */
 unsigned int devlink_linecard_index(struct devlink_linecard *linecard);
diff --git a/net/devlink/rate.c b/net/devlink/rate.c
index 54e6a9893e3d..38f18216eb80 100644
--- a/net/devlink/rate.c
+++ b/net/devlink/rate.c
@@ -621,6 +621,21 @@  int devlink_rate_nodes_check(struct devlink *devlink, struct netlink_ext_ack *ex
 	return err;
 }
 
+int devlink_rates_check(struct devlink *devlink)
+{
+	struct devlink_rate *devlink_rate;
+	int err = 0;
+
+	devl_rate_domain_lock(devlink);
+	list_for_each_entry(devlink_rate, &devlink->rate_domain->rate_list, list)
+		if (devlink_rate->devlink == devlink) {
+			err = -EBUSY;
+			break;
+		}
+	devl_rate_domain_unlock(devlink);
+	return err;
+}
+
 /**
  * devl_rate_node_create - create devlink rate node
  * @devlink: devlink instance