diff mbox series

[05/10] devlink: Introduce shared rate domains

Message ID 20241113203317.2507537-6-cratiu@nvidia.com (mailing list archive)
State Not Applicable
Delegated to: Netdev Maintainers
Headers show
Series None | expand

Commit Message

Cosmin Ratiu Nov. 13, 2024, 8:30 p.m. UTC
The underlying idea is modeling a piece of hardware which:
1. Exposes multiple functions as separate devlink objects.
2. Is capable of instantiating a transmit scheduling tree spanning
   multiple functions.

Modeling this requires devlink rate nodes with parents across other
devlink objects. A naive approach that relies on the current
one-lock-per-devlink model is impossible, as it would require in some
cases acquiring multiple devlink locks in the correct order.

Based on the preliminary patches in this series, this commit introduces
the concept of a shared rate domain.

1. A shared rate domain stores rate nodes for a piece of hardware that
   has the properties described at the beginning.
2. A shared rate domain is identified by the devlink operations pointer
   (a proxy for the device type) and a unique u64 hardware identifier
   provided by the driver.
3. There is a global registry of reference counted shared rate domains.
4. A devlink object starts out with a private rate domain, and can be
   switched once to use a shared rate domain with
   devlink_shared_rate_domain_init. Further calls do nothing.
5. Shared rate domains have an additional mutex serializing access to
   rate nodes, acquired by the previously introduced functions
   devl_rate_domain_lock and devl_rate_domain_unlock.

These new code paths are unused for now. A caller to
devlink_shared_rate_domain_init will be added in a subsequent patch.

Issue: 3645895
Change-Id: I045456c4d06fd7018ba0da345688ee43e1d0fd74
Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
---
 include/net/devlink.h       |  8 ++++
 net/devlink/core.c          | 79 ++++++++++++++++++++++++++++++++++++-
 net/devlink/dev.c           |  2 +-
 net/devlink/devl_internal.h | 26 ++++++++++--
 net/devlink/rate.c          | 15 +++++++
 5 files changed, 124 insertions(+), 6 deletions(-)
diff mbox series

Patch

diff --git a/include/net/devlink.h b/include/net/devlink.h
index 917bc006a5a4..18da697f6607 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -1553,6 +1553,14 @@  void devlink_register(struct devlink *devlink);
 void devlink_unregister(struct devlink *devlink);
 void devlink_free(struct devlink *devlink);
 
+/* Can be used to tell devlink that shared rate domains are supported.
+ * The same id needs to be provided for devlink objects that can share
+ * rate nodes in hw (e.g. contain nodes with parents in other devlink objects).
+ * This requires holding the devlink lock and can only be called once per object.
+ * Rate node relationships across different rate domains are not supported.
+ */
+int devlink_shared_rate_domain_init(struct devlink *devlink, u64 id);
+
 /**
  * struct devlink_port_ops - Port operations
  * @port_split: Callback used to split the port into multiple ones.
diff --git a/net/devlink/core.c b/net/devlink/core.c
index 06a2e2dce558..eb96a97a9e44 100644
--- a/net/devlink/core.c
+++ b/net/devlink/core.c
@@ -289,6 +289,80 @@  void devl_unlock(struct devlink *devlink)
 }
 EXPORT_SYMBOL_GPL(devl_unlock);
 
+/* A global data struct with all shared rate domains. */
+static struct {
+	struct mutex lock;    /* Acquired AFTER the devlink lock. */
+	struct list_head rate_domains;
+} devlink_rate_domains = {
+	.lock = __MUTEX_INITIALIZER(devlink_rate_domains.lock),
+	.rate_domains = LIST_HEAD_INIT(devlink_rate_domains.rate_domains),
+};
+
+static bool devlink_rate_domain_eq(struct devlink_rate_domain *rate_domain,
+				   const struct devlink_ops *ops, u64 id)
+{
+	return rate_domain->ops == ops && rate_domain->id == id;
+}
+
+int devlink_shared_rate_domain_init(struct devlink *devlink, u64 id)
+{
+	struct devlink_rate_domain *rate_domain;
+	int err = 0;
+
+	devl_assert_locked(devlink);
+
+	if (devlink->rate_domain->shared) {
+		if (devlink_rate_domain_eq(devlink->rate_domain, devlink->ops, id))
+			return 0;
+		return -EEXIST;
+	}
+	if (!list_empty(&devlink->rate_domain->rate_list))
+		return -EINVAL;
+
+	mutex_lock(&devlink_rate_domains.lock);
+	list_for_each_entry(rate_domain, &devlink_rate_domains.rate_domains, list) {
+		if (devlink_rate_domain_eq(rate_domain, devlink->ops, id)) {
+			refcount_inc(&rate_domain->refcount);
+			goto replace_domain;
+		}
+	}
+
+	/* Shared domain not found, create one. */
+	rate_domain = kvzalloc(sizeof(*rate_domain), GFP_KERNEL);
+	if (!rate_domain) {
+		err = -ENOMEM;
+		goto unlock;
+	}
+	rate_domain->shared = true;
+	rate_domain->ops = devlink->ops;
+	rate_domain->id = id;
+	mutex_init(&rate_domain->lock);
+	INIT_LIST_HEAD(&rate_domain->rate_list);
+	refcount_set(&rate_domain->refcount, 1);
+	list_add_tail(&rate_domain->list, &devlink_rate_domains.rate_domains);
+replace_domain:
+	kvfree(devlink->rate_domain);
+	devlink->rate_domain = rate_domain;
+unlock:
+	mutex_unlock(&devlink_rate_domains.lock);
+	return err;
+}
+EXPORT_SYMBOL_GPL(devlink_shared_rate_domain_init);
+
+static void devlink_rate_domain_put(struct devlink_rate_domain *rate_domain)
+{
+	if (rate_domain->shared) {
+		if (!refcount_dec_and_test(&rate_domain->refcount))
+			return;
+
+		WARN_ON(!list_empty(&rate_domain->rate_list));
+		mutex_lock(&devlink_rate_domains.lock);
+		list_del(&rate_domain->list);
+		mutex_unlock(&devlink_rate_domains.lock);
+	}
+	kvfree(rate_domain);
+}
+
 /**
  * devlink_try_get() - try to obtain a reference on a devlink instance
  * @devlink: instance to reference
@@ -314,7 +388,7 @@  static void devlink_release(struct work_struct *work)
 	mutex_destroy(&devlink->lock);
 	lockdep_unregister_key(&devlink->lock_key);
 	put_device(devlink->dev);
-	kvfree(devlink->rate_domain);
+	devlink_rate_domain_put(devlink->rate_domain);
 	kvfree(devlink);
 }
 
@@ -428,6 +502,7 @@  struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
 	devlink->rate_domain = kvzalloc(sizeof(*devlink->rate_domain), GFP_KERNEL);
 	if (!devlink->rate_domain)
 		goto err_rate_domain;
+	devlink->rate_domain->shared = false;
 	INIT_LIST_HEAD(&devlink->rate_domain->rate_list);
 
 	ret = xa_alloc_cyclic(&devlinks, &devlink->index, devlink, xa_limit_31b,
@@ -484,7 +559,7 @@  void devlink_free(struct devlink *devlink)
 	WARN_ON(!list_empty(&devlink->resource_list));
 	WARN_ON(!list_empty(&devlink->dpipe_table_list));
 	WARN_ON(!list_empty(&devlink->sb_list));
-	WARN_ON(!list_empty(&devlink->rate_domain->rate_list));
+	WARN_ON(devlink_rates_check(devlink));
 	WARN_ON(!list_empty(&devlink->linecard_list));
 	WARN_ON(!xa_empty(&devlink->ports));
 
diff --git a/net/devlink/dev.c b/net/devlink/dev.c
index c926c75cc10d..84353a85e8fe 100644
--- a/net/devlink/dev.c
+++ b/net/devlink/dev.c
@@ -434,7 +434,7 @@  static void devlink_reload_reinit_sanity_check(struct devlink *devlink)
 	WARN_ON(!list_empty(&devlink->trap_list));
 	WARN_ON(!list_empty(&devlink->dpipe_table_list));
 	WARN_ON(!list_empty(&devlink->sb_list));
-	WARN_ON(!list_empty(&devlink->rate_domain->rate_list));
+	WARN_ON(devlink_rates_check(devlink));
 	WARN_ON(!list_empty(&devlink->linecard_list));
 	WARN_ON(!xa_empty(&devlink->ports));
 }
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
index fae81dd6953f..7401aab274e5 100644
--- a/net/devlink/devl_internal.h
+++ b/net/devlink/devl_internal.h
@@ -30,9 +30,20 @@  struct devlink_dev_stats {
 	u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
 };
 
-/* Stores devlink rates associated with a rate domain. */
+/* Stores devlink rates associated with a rate domain.
+ * Multiple devlink objects may share the same domain (when 'shared' is true)
+ * and rate nodes can have members from multiple devices.
+ */
 struct devlink_rate_domain {
+	bool shared;
+	struct list_head list;
 	struct list_head rate_list;
+	/* Fields below are only used for shared rate domains. */
+	const struct devlink_ops *ops;
+	u64 id;
+	refcount_t refcount;
+	/* Serializes access to rates. */
+	struct mutex lock;
 };
 
 struct devlink {
@@ -121,9 +132,17 @@  static inline void devl_dev_unlock(struct devlink *devlink, bool dev_lock)
 		device_unlock(devlink->dev);
 }
 
-static inline void devl_rate_domain_lock(struct devlink *devlink) { }
+static inline void devl_rate_domain_lock(struct devlink *devlink)
+{
+	if (devlink->rate_domain->shared)
+		mutex_lock(&devlink->rate_domain->lock);
+}
 
-static inline void devl_rate_domain_unlock(struct devlink *devlink) { }
+static inline void devl_rate_domain_unlock(struct devlink *devlink)
+{
+	if (devlink->rate_domain->shared)
+		mutex_unlock(&devlink->rate_domain->lock);
+}
 
 typedef void devlink_rel_notify_cb_t(struct devlink *devlink, u32 obj_index);
 typedef void devlink_rel_cleanup_cb_t(struct devlink *devlink, u32 obj_index,
@@ -307,6 +326,7 @@  int devlink_resources_validate(struct devlink *devlink,
 
 /* Rates */
 int devlink_rate_nodes_check(struct devlink *devlink, struct netlink_ext_ack *extack);
+int devlink_rates_check(struct devlink *devlink);
 
 /* Linecards */
 unsigned int devlink_linecard_index(struct devlink_linecard *linecard);
diff --git a/net/devlink/rate.c b/net/devlink/rate.c
index b888a6ecdf96..daf366ca0575 100644
--- a/net/devlink/rate.c
+++ b/net/devlink/rate.c
@@ -675,6 +675,21 @@  int devlink_rate_nodes_check(struct devlink *devlink, struct netlink_ext_ack *ex
 	return err;
 }
 
+int devlink_rates_check(struct devlink *devlink)
+{
+	struct devlink_rate *devlink_rate;
+	int err = 0;
+
+	devl_rate_domain_lock(devlink);
+	list_for_each_entry(devlink_rate, &devlink->rate_domain->rate_list, list)
+		if (devlink_rate->devlink == devlink) {
+			err = -EBUSY;
+			break;
+		}
+	devl_rate_domain_unlock(devlink);
+	return err;
+}
+
 /**
  * devl_rate_node_create - create devlink rate node
  * @devlink: devlink instance