@@ -1553,6 +1553,14 @@ void devlink_register(struct devlink *devlink);
void devlink_unregister(struct devlink *devlink);
void devlink_free(struct devlink *devlink);
+/* Can be used to tell devlink that shared rate domains are supported.
+ * The same id needs to be provided for devlink objects that can share
+ * rate nodes in hw (e.g. contain nodes with parents in other devlink objects).
+ * This requires holding the devlink lock and can only be called once per object.
+ * Rate node relationships across different rate domains are not supported.
+ */
+int devlink_shared_rate_domain_init(struct devlink *devlink, u64 id);
+
/**
* struct devlink_port_ops - Port operations
* @port_split: Callback used to split the port into multiple ones.
@@ -289,6 +289,80 @@ void devl_unlock(struct devlink *devlink)
}
EXPORT_SYMBOL_GPL(devl_unlock);
+/* A global data struct with all shared rate domains. */
+static struct {
+ struct mutex lock; /* Acquired AFTER the devlink lock. */
+ struct list_head rate_domains;
+} devlink_rate_domains = {
+ .lock = __MUTEX_INITIALIZER(devlink_rate_domains.lock),
+ .rate_domains = LIST_HEAD_INIT(devlink_rate_domains.rate_domains),
+};
+
+static bool devlink_rate_domain_eq(struct devlink_rate_domain *rate_domain,
+ const struct devlink_ops *ops, u64 id)
+{
+ return rate_domain->ops == ops && rate_domain->id == id;
+}
+
+int devlink_shared_rate_domain_init(struct devlink *devlink, u64 id)
+{
+ struct devlink_rate_domain *rate_domain;
+ int err = 0;
+
+ devl_assert_locked(devlink);
+
+ if (devlink->rate_domain->shared) {
+ if (devlink_rate_domain_eq(devlink->rate_domain, devlink->ops, id))
+ return 0;
+ return -EEXIST;
+ }
+ if (!list_empty(&devlink->rate_domain->rate_list))
+ return -EINVAL;
+
+ mutex_lock(&devlink_rate_domains.lock);
+ list_for_each_entry(rate_domain, &devlink_rate_domains.rate_domains, list) {
+ if (devlink_rate_domain_eq(rate_domain, devlink->ops, id)) {
+ refcount_inc(&rate_domain->refcount);
+ goto replace_domain;
+ }
+ }
+
+ /* Shared domain not found, create one. */
+ rate_domain = kvzalloc(sizeof(*rate_domain), GFP_KERNEL);
+ if (!rate_domain) {
+ err = -ENOMEM;
+ goto unlock;
+ }
+ rate_domain->shared = true;
+ rate_domain->ops = devlink->ops;
+ rate_domain->id = id;
+ mutex_init(&rate_domain->lock);
+ INIT_LIST_HEAD(&rate_domain->rate_list);
+ refcount_set(&rate_domain->refcount, 1);
+ list_add_tail(&rate_domain->list, &devlink_rate_domains.rate_domains);
+replace_domain:
+ kvfree(devlink->rate_domain);
+ devlink->rate_domain = rate_domain;
+unlock:
+ mutex_unlock(&devlink_rate_domains.lock);
+ return err;
+}
+EXPORT_SYMBOL_GPL(devlink_shared_rate_domain_init);
+
+static void devlink_rate_domain_put(struct devlink_rate_domain *rate_domain)
+{
+ if (rate_domain->shared) {
+ if (!refcount_dec_and_test(&rate_domain->refcount))
+ return;
+
+ WARN_ON(!list_empty(&rate_domain->rate_list));
+ mutex_lock(&devlink_rate_domains.lock);
+ list_del(&rate_domain->list);
+ mutex_unlock(&devlink_rate_domains.lock);
+ }
+ kvfree(rate_domain);
+}
+
/**
* devlink_try_get() - try to obtain a reference on a devlink instance
* @devlink: instance to reference
@@ -314,7 +388,7 @@ static void devlink_release(struct work_struct *work)
mutex_destroy(&devlink->lock);
lockdep_unregister_key(&devlink->lock_key);
put_device(devlink->dev);
- kvfree(devlink->rate_domain);
+ devlink_rate_domain_put(devlink->rate_domain);
kvfree(devlink);
}
@@ -428,6 +502,7 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
devlink->rate_domain = kvzalloc(sizeof(*devlink->rate_domain), GFP_KERNEL);
if (!devlink->rate_domain)
goto err_rate_domain;
+ devlink->rate_domain->shared = false;
INIT_LIST_HEAD(&devlink->rate_domain->rate_list);
ret = xa_alloc_cyclic(&devlinks, &devlink->index, devlink, xa_limit_31b,
@@ -484,7 +559,7 @@ void devlink_free(struct devlink *devlink)
WARN_ON(!list_empty(&devlink->resource_list));
WARN_ON(!list_empty(&devlink->dpipe_table_list));
WARN_ON(!list_empty(&devlink->sb_list));
- WARN_ON(!list_empty(&devlink->rate_domain->rate_list));
+ WARN_ON(devlink_rates_check(devlink));
WARN_ON(!list_empty(&devlink->linecard_list));
WARN_ON(!xa_empty(&devlink->ports));
@@ -434,7 +434,7 @@ static void devlink_reload_reinit_sanity_check(struct devlink *devlink)
WARN_ON(!list_empty(&devlink->trap_list));
WARN_ON(!list_empty(&devlink->dpipe_table_list));
WARN_ON(!list_empty(&devlink->sb_list));
- WARN_ON(!list_empty(&devlink->rate_domain->rate_list));
+ WARN_ON(devlink_rates_check(devlink));
WARN_ON(!list_empty(&devlink->linecard_list));
WARN_ON(!xa_empty(&devlink->ports));
}
@@ -30,9 +30,20 @@ struct devlink_dev_stats {
u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
};
-/* Stores devlink rates associated with a rate domain. */
+/* Stores devlink rates associated with a rate domain.
+ * Multiple devlink objects may share the same domain (when 'shared' is true)
+ * and rate nodes can have members from multiple devices.
+ */
struct devlink_rate_domain {
+ bool shared;
+ struct list_head list;
struct list_head rate_list;
+ /* Fields below are only used for shared rate domains. */
+ const struct devlink_ops *ops;
+ u64 id;
+ refcount_t refcount;
+ /* Serializes access to rates. */
+ struct mutex lock;
};
struct devlink {
@@ -121,9 +132,17 @@ static inline void devl_dev_unlock(struct devlink *devlink, bool dev_lock)
device_unlock(devlink->dev);
}
-static inline void devl_rate_domain_lock(struct devlink *devlink) { }
+static inline void devl_rate_domain_lock(struct devlink *devlink)
+{
+ if (devlink->rate_domain->shared)
+ mutex_lock(&devlink->rate_domain->lock);
+}
-static inline void devl_rate_domain_unlock(struct devlink *devlink) { }
+static inline void devl_rate_domain_unlock(struct devlink *devlink)
+{
+ if (devlink->rate_domain->shared)
+ mutex_unlock(&devlink->rate_domain->lock);
+}
typedef void devlink_rel_notify_cb_t(struct devlink *devlink, u32 obj_index);
typedef void devlink_rel_cleanup_cb_t(struct devlink *devlink, u32 obj_index,
@@ -307,6 +326,7 @@ int devlink_resources_validate(struct devlink *devlink,
/* Rates */
int devlink_rate_nodes_check(struct devlink *devlink, struct netlink_ext_ack *extack);
+int devlink_rates_check(struct devlink *devlink);
/* Linecards */
unsigned int devlink_linecard_index(struct devlink_linecard *linecard);
@@ -675,6 +675,21 @@ int devlink_rate_nodes_check(struct devlink *devlink, struct netlink_ext_ack *ex
return err;
}
+int devlink_rates_check(struct devlink *devlink)
+{
+ struct devlink_rate *devlink_rate;
+ int err = 0;
+
+ devl_rate_domain_lock(devlink);
+ list_for_each_entry(devlink_rate, &devlink->rate_domain->rate_list, list)
+ if (devlink_rate->devlink == devlink) {
+ err = -EBUSY;
+ break;
+ }
+ devl_rate_domain_unlock(devlink);
+ return err;
+}
+
/**
* devl_rate_node_create - create devlink rate node
* @devlink: devlink instance
The underlying idea is modeling a piece of hardware which: 1. Exposes multiple functions as separate devlink objects. 2. Is capable of instantiating a transmit scheduling tree spanning multiple functions. Modeling this requires devlink rate nodes with parents across other devlink objects. A naive approach that relies on the current one-lock-per-devlink model is impossible, as it would require in some cases acquiring multiple devlink locks in the correct order. Based on the preliminary patches in this series, this commit introduces the concept of a shared rate domain. 1. A shared rate domain stores rate nodes for a piece of hardware that has the properties described at the beginning. 2. A shared rate domain is identified by the devlink operations pointer (a proxy for the device type) and a unique u64 hardware identifier provided by the driver. 3. There is a global registry of reference counted shared rate domains. 4. A devlink object starts out with a private rate domain, and can be switched once to use a shared rate domain with devlink_shared_rate_domain_init. Further calls do nothing. 5. Shared rate domains have an additional mutex serializing access to rate nodes, acquired by the previously introduced functions devl_rate_domain_lock and devl_rate_domain_unlock. These new code paths are unused for now. A caller to devlink_shared_rate_domain_init will be added in a subsequent patch. Issue: 3645895 Change-Id: I045456c4d06fd7018ba0da345688ee43e1d0fd74 Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com> --- include/net/devlink.h | 8 ++++ net/devlink/core.c | 79 ++++++++++++++++++++++++++++++++++++- net/devlink/dev.c | 2 +- net/devlink/devl_internal.h | 26 ++++++++++-- net/devlink/rate.c | 15 +++++++ 5 files changed, 124 insertions(+), 6 deletions(-)