@@ -220,6 +220,21 @@ config DEVFREQ_THERMAL
If you want this support, you should say Y here.
+config THERMAL_AGGREGATION
+ bool "Thermal zone aggregation support"
+ depends on THERMAL_OF
+ help
+ This provides support for the creation of virtual thermal zone
+ devices that serve as aggregation point for other devices.
+ Individual primary thermal devices remain accessible in sysfs
+ but all cooling device bindings are moved over to the thermal
+ aggregator whose temperature is the weighted average of its
+ primary devices.
+
+ When this is enabled, the aggregation is applied to all entries
+ found in the device tree's "thermal-sensors" list when it contains
+ more than one entry.
+
config THERMAL_EMULATION
bool "Thermal emulation mode support"
help
@@ -748,6 +748,13 @@ struct thermal_zone_device *thermal_zone_get_by_id(int id)
* binding, and unbinding.
*/
+#ifdef CONFIG_THERMAL_AGGREGATION
+static void thermal_remove_tz_from_aggregator(struct thermal_zone_device *tz);
+#else
+static inline void thermal_remove_tz_from_aggregator(struct thermal_zone_device *tz)
+{}
+#endif /* CONFIG_THERMAL_AGGREGATION */
+
/**
* thermal_bind_cdev_to_trip - bind a cooling device to a thermal zone
* @tz: pointer to struct thermal_zone_device
@@ -1577,6 +1584,8 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz)
thermal_debug_tz_remove(tz);
+ thermal_remove_tz_from_aggregator(tz);
+
mutex_lock(&thermal_list_lock);
list_for_each_entry(pos, &thermal_tz_list, node)
if (pos == tz)
@@ -1654,6 +1663,413 @@ struct thermal_zone_device *thermal_zone_get_zone_by_name(const char *name)
}
EXPORT_SYMBOL_GPL(thermal_zone_get_zone_by_name);
+#ifdef CONFIG_THERMAL_AGGREGATION
+
+static LIST_HEAD(thermal_aggregator_list);
+
+struct thermal_zone_aggregator {
+ struct thermal_zone_device *tz;
+ struct list_head primary_tz_list;
+ struct mutex lock;
+ struct ida ida;
+ struct list_head node;
+};
+
+static bool is_aggregated(struct thermal_zone_device *tz)
+{
+ return tz->aggregator != NULL;
+}
+
+/*
+ * Get weighted average temperature for all aggregated zones.
+ *
+ * A "max temp" variant could also be provided but the end result on the
+ * cooling device would be the same as if no aggregation was done in the
+ * first place.
+ */
+static int thermal_aggr_get_average_temp(struct thermal_zone_device *aggr_tz,
+ int *aggr_temp)
+{
+ struct thermal_zone_aggregator *aggr = aggr_tz->devdata;
+ struct thermal_zone_device *tz;
+ int temp, weight;
+ s64 temp_sum = 0;
+ u32 weight_sum = 0;
+ int ret = -ENODATA;
+
+ mutex_lock(&aggr->lock);
+ list_for_each_entry(tz, &aggr->primary_tz_list, aggregated_node) {
+ mutex_lock(&tz->lock);
+ if (tz->suspended || tz->mode != THERMAL_DEVICE_ENABLED) {
+ mutex_unlock(&tz->lock);
+ continue;
+ }
+ ret = __thermal_zone_get_temp(tz, &temp);
+ if (ret) {
+ mutex_unlock(&tz->lock);
+ break;
+ }
+ if (temp <= THERMAL_TEMP_INVALID) {
+ /*
+ * Invalid temp values are ignored, unless all
+ * primary zones are invalid in which case it is
+ * passed up to accommodate the special case in
+ * __thermal_zone_device_update().
+ */
+ *aggr_temp = temp;
+ } else {
+ weight = tz->tzp->slope;
+ temp_sum += (s64)temp * weight;
+ weight_sum += weight;
+ }
+ mutex_unlock(&tz->lock);
+ }
+ mutex_unlock(&aggr->lock);
+
+ if (weight_sum)
+ *aggr_temp = div_s64(temp_sum, weight_sum);
+
+ return ret;
+}
+
+static int thermal_aggr_set_trips(struct thermal_zone_device *aggr_tz,
+ int low, int high)
+{
+ struct thermal_zone_aggregator *aggr = aggr_tz->devdata;
+ struct thermal_zone_device *tz;
+
+ mutex_lock(&aggr->lock);
+ list_for_each_entry(tz, &aggr->primary_tz_list, aggregated_node) {
+ mutex_lock(&tz->lock);
+ if (!tz->suspended && tz->mode == THERMAL_DEVICE_ENABLED)
+ thermal_zone_set_trips(tz, low, high);
+ mutex_unlock(&tz->lock);
+ }
+ mutex_unlock(&aggr->lock);
+
+ return 0;
+}
+
+static int thermal_aggr_change_mode(struct thermal_zone_device *aggr_tz,
+ enum thermal_device_mode mode)
+{
+ struct thermal_zone_aggregator *aggr = aggr_tz->devdata;
+ struct thermal_zone_device *tz;
+ int ret, err = 0;
+
+ mutex_lock(&aggr->lock);
+ list_for_each_entry(tz, &aggr->primary_tz_list, aggregated_node) {
+ ret = thermal_zone_device_set_mode(tz, mode);
+ if (ret)
+ err = ret;
+ /* cycle through all tz's even if there are errors */
+ }
+ mutex_unlock(&aggr->lock);
+
+ return err;
+}
+
+static const struct thermal_zone_device_ops thermal_aggr_tz_ops = {
+ .get_temp = thermal_aggr_get_average_temp,
+ .set_trips = thermal_aggr_set_trips,
+ .change_mode = thermal_aggr_change_mode,
+};
+
+/**
+ * create_thermal_aggregator - create a tz to be used as an aggregator
+ *
+ * @ref_tz: the tz from which parameters such as trip values are copied
+ * @name: name to identify this aggregator
+ *
+ * This creates a virtual thermal zone to be used as an aggregator for
+ * other zones called "primary" zones. Those primary zones must be added to
+ * the created aggregator with add_tz_to_aggregator().
+ *
+ * Return: a pointer to the created struct thermal_zone_device or an ERR_PTR
+ * value in case of errors.
+ */
+static struct thermal_zone_device *
+create_thermal_aggregator(struct thermal_zone_device *ref_tz, const char *name)
+{
+ struct thermal_zone_aggregator *aggr;
+ struct thermal_zone_device *aggr_tz;
+ struct thermal_trip *trips;
+ int ntrips = ref_tz->num_trips;
+ int ret;
+
+ trips = kcalloc(ntrips, sizeof(*trips), GFP_KERNEL);
+ if (!trips)
+ return ERR_PTR(-ENOMEM);
+ for (int i = 0; i < ntrips; i++)
+ trips[i] = ref_tz->trips[i].trip;
+
+ aggr = kzalloc(sizeof(*aggr), GFP_KERNEL);
+ if (!aggr) {
+ ret = -ENOMEM;
+ goto err_free_trips;
+ }
+ mutex_init(&aggr->lock);
+ INIT_LIST_HEAD(&aggr->primary_tz_list);
+ ida_init(&aggr->ida);
+
+ aggr_tz = thermal_zone_device_register_with_trips(name, trips, ntrips,
+ aggr,
+ &thermal_aggr_tz_ops,
+ ref_tz->tzp, 0, 0);
+ if (IS_ERR(aggr_tz)) {
+ ret = PTR_ERR(aggr_tz);
+ pr_err("Failed to register thermal aggregator zone: %d\n", ret);
+ goto err_free_aggr;
+ }
+ aggr_tz->polling_delay_jiffies = ref_tz->polling_delay_jiffies;
+ aggr_tz->passive_delay_jiffies = ref_tz->passive_delay_jiffies;
+ aggr_tz->ops.should_bind = ref_tz->ops.should_bind;
+ aggr->tz = aggr_tz;
+ kfree(trips);
+
+ ret = thermal_zone_device_enable(aggr_tz);
+ if (ret) {
+ pr_err("Failed to enable thermal aggregator zone: %d\n", ret);
+ goto err_unregister_tz;
+ }
+
+ mutex_lock(&thermal_list_lock);
+ list_add_tail(&aggr->node, &thermal_aggregator_list);
+ mutex_unlock(&thermal_list_lock);
+
+ return aggr_tz;
+
+err_unregister_tz:
+ thermal_zone_device_unregister(aggr_tz);
+err_free_aggr:
+ ida_destroy(&aggr->ida);
+ kfree(aggr);
+err_free_trips:
+ kfree(trips);
+ return ERR_PTR(ret);
+}
+
+/**
+ * add_tz_to_aggregator() - add a primary zone to an aggregator
+ *
+ * @aggr_tz: the aggregator tz to use, as returned by create_thermal_aggregator()
+ * @tz: a primary tz to be added
+ *
+ * This enrolls a primary tz with an aggregator tz. Thermal instances
+ * (bindings) from the primary tz are moved to the aggregator. It is assumed
+ * that all primary tz's added to a given aggregator have the same set of
+ * trip points. This can be called with an already enrolled tz in which case
+ * only new thermal instances from the primary, if any, will be moved over
+ * to the aggregator. Duplicate thermal instances are simply dropped.
+ *
+ * An "aggregator" symlink is created within the primary tz's sysfs directory
+ * to the aggregator tz directory. And a list of "primary_zone_<n>" symlinks
+ * in the aggregator's directory point back to all primary tz's it owns.
+ */
+static void add_tz_to_aggregator(struct thermal_zone_device *aggr_tz,
+ struct thermal_zone_device *tz)
+{
+ struct thermal_zone_aggregator *aggr = aggr_tz->devdata;
+ struct thermal_instance *ti, *next;
+ int ret;
+
+ mutex_lock(&aggr->lock);
+ mutex_lock(&aggr_tz->lock);
+ mutex_lock(&tz->lock);
+
+ /* duplicate thermal instances onto the aggregator */
+ list_for_each_entry(ti, &tz->thermal_instances, tz_node) {
+ int i = thermal_zone_trip_id(tz, ti->trip);
+ struct thermal_trip *aggr_trip = &aggr_tz->trips[i].trip;
+ struct thermal_cooling_device *cdev = ti->cdev;
+ struct cooling_spec c = {
+ .upper = ti->upper_no_limit ? THERMAL_NO_LIMIT : ti->upper,
+ .lower = ti->lower,
+ .weight = ti->weight,
+ };
+ ret = thermal_bind_cdev_to_trip(aggr_tz, aggr_trip, cdev, &c);
+ if (ret == -EEXIST)
+ ret = 0;
+ if (ret) {
+ print_bind_err_msg(aggr_tz, aggr_trip, cdev, ret);
+ goto out;
+ }
+ }
+
+ /* remove thermal instances from the primary tz */
+ list_for_each_entry_safe(ti, next, &tz->thermal_instances, tz_node) {
+ thermal_unbind_cdev_from_trip(tz, ti->trip, ti->cdev);
+ }
+
+ if (!tz->aggregator) {
+ list_add_tail(&tz->aggregated_node, &aggr->primary_tz_list);
+ tz->aggregator = aggr_tz;
+
+ /* add a link from the primary tz to its aggregator */
+ ret = sysfs_create_link(&tz->device.kobj,
+ &aggr_tz->device.kobj,
+ "aggregator");
+ if (ret)
+ dev_err(&tz->device, "linking to aggregator failed: %d\n", ret);
+
+ /* add a link from the aggregator to this primary tz */
+ tz->aggregated_id = ret = ida_alloc(&aggr->ida, GFP_KERNEL);
+ if (ret >= 0) {
+ char name[sizeof("primary_zone_000")];
+
+ snprintf(name, sizeof(name), "primary_zone_%d", ret);
+ ret = sysfs_create_link(&aggr_tz->device.kobj,
+ &tz->device.kobj,
+ name);
+ if (ret) {
+ ida_free(&aggr->ida, tz->aggregated_id);
+ tz->aggregated_id = -1;
+ }
+ }
+ if (ret)
+ dev_err(&aggr_tz->device, "linking to primary failed: %d\n", ret);
+ }
+
+out:
+ mutex_unlock(&tz->lock);
+ mutex_unlock(&aggr_tz->lock);
+ mutex_unlock(&aggr->lock);
+}
+
+/**
+ * free_thermal_aggregator_unlock - unregister and free an aggregator tz
+ *
+ * @aggr_tz: the aggregator to free, as returned by create_thermal_aggregator()
+ *
+ * This unregisters the tz used as an aggregator and frees its associated
+ * memory. This is called by thermal_remove_tz_from_aggregator() when the
+ * last primary tz is removed from the aggregator, or if the aggregator was
+ * created and is no longer needed before any primary tz's have been added
+ * to it.
+ *
+ * Note: To avoid race issues, this is expected to be called with
+ * thermal_list_lock held, but it will be released before returning.
+ */
+static void free_thermal_aggregator_unlock(struct thermal_zone_device *aggr_tz)
+{
+ struct thermal_zone_aggregator *aggr = aggr_tz->devdata;
+
+ lockdep_assert_held(&thermal_list_lock);
+ BUG_ON(!list_empty(&aggr->primary_tz_list));
+
+ list_del(&aggr->node);
+ mutex_unlock(&thermal_list_lock);
+
+ thermal_zone_device_disable(aggr_tz);
+ thermal_zone_device_unregister(aggr_tz);
+ ida_destroy(&aggr->ida);
+ kfree(aggr);
+}
+
+/**
+ * thermal_remove_tz_from_aggregator - remove a primary tz from an aggregator
+ *
+ * @tz: the thermal zone to remove from its aggregator
+ *
+ * This retires a primary tz from its aggregator. If not aggregated, this
+ * is a no-op. Thermal instance bindings are re-established with the primary
+ * tz.
+ *
+ * If given tz was the last one owned by the aggregator, then the aggregator
+ * is destroyed.
+ */
+static void thermal_remove_tz_from_aggregator(struct thermal_zone_device *tz)
+{
+ struct thermal_zone_device *aggr_tz = tz->aggregator;
+ struct thermal_zone_aggregator *aggr = aggr_tz->devdata;
+ struct thermal_cooling_device *cdev;
+
+ if (!is_aggregated(tz))
+ return;
+
+ mutex_lock(&thermal_list_lock);
+
+ /* Bind cooling devices back to this zone */
+ tz->ops.should_bind = aggr_tz->ops.should_bind;
+ list_for_each_entry(cdev, &thermal_cdev_list, node)
+ thermal_zone_cdev_bind(tz, cdev);
+
+ mutex_lock(&aggr->lock);
+ list_del(&tz->aggregated_node);
+ tz->aggregator = NULL;
+ sysfs_remove_link(&tz->device.kobj, "aggregator");
+ if (tz->aggregated_id >= 0) {
+ char name[sizeof("primary_zone_000")];
+
+ snprintf(name, sizeof(name), "primary_zone_%d", tz->aggregated_id);
+ sysfs_remove_link(&aggr_tz->device.kobj, name);
+ ida_free(&aggr->ida, tz->aggregated_id);
+ }
+ mutex_unlock(&aggr->lock);
+
+ if (list_empty(&aggr->primary_tz_list)) {
+ /* no more tz tied to this aggregator */
+ free_thermal_aggregator_unlock(aggr_tz);
+ } else {
+ mutex_unlock(&thermal_list_lock);
+ }
+}
+
+/**
+ * thermal_zone_device_aggregate - aggregate provided thermal zone device
+ *
+ * @tz: the thermal zone to aggregate
+ * @name: the aggregator's name to use
+ *
+ * This adds the provided thermal zone device to the thermal aggregator
+ * identified by @name. If no such aggregator exists it is created.
+ *
+ * In case of any error, the provided thermal zone will remain freestanding
+ * on its own and an error message will be logged. There is no point returning
+ * an error as it would be unwise for the caller to revert registration of
+ * this tz anyway (a suboptimal thermal configuration is better than no
+ * configuration).
+ */
+void thermal_zone_device_aggregate(struct thermal_zone_device *tz,
+ const char *name)
+{
+ struct thermal_zone_aggregator *aggr;
+ struct thermal_zone_device *aggr_tz;
+
+ /* look for an existing aggregator */
+ mutex_lock(&thermal_list_lock);
+ list_for_each_entry(aggr, &thermal_aggregator_list, node) {
+ aggr_tz = aggr->tz;
+ if (strcmp(aggr_tz->type, name) != 0)
+ continue;
+ if (aggr_tz->ops.should_bind != tz->ops.should_bind) {
+ pr_err("%s: same name but ops.should_bind differs\n",
+ __func__);
+ goto out;
+ }
+ goto add;
+ }
+ mutex_unlock(&thermal_list_lock);
+
+ /* no aggregator with specified name exists, create one */
+ aggr_tz = create_thermal_aggregator(tz, name);
+ if (IS_ERR(aggr_tz)) {
+ pr_err("unable to create thermal aggregator (%ld)\n",
+ PTR_ERR(aggr_tz));
+ return;
+ }
+
+ mutex_lock(&thermal_list_lock);
+add:
+ add_tz_to_aggregator(aggr_tz, tz);
+ if (is_aggregated(tz))
+ tz->ops.should_bind = NULL;
+out:
+ mutex_unlock(&thermal_list_lock);
+}
+
+#endif /* CONFIG_THERMAL_AGGREGATION */
+
static void thermal_zone_device_resume(struct work_struct *work)
{
struct thermal_zone_device *tz;
@@ -136,6 +136,11 @@ struct thermal_zone_device {
enum thermal_notify_event notify_event;
bool suspended;
bool resuming;
+#ifdef CONFIG_THERMAL_AGGREGATION
+ struct thermal_zone_device *aggregator;
+ struct list_head aggregated_node;
+ int aggregated_id;
+#endif
#ifdef CONFIG_THERMAL_DEBUGFS
struct thermal_debugfs *debugfs;
#endif
@@ -287,4 +292,13 @@ thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev,
unsigned long new_state) {}
#endif /* CONFIG_THERMAL_STATISTICS */
+#ifdef CONFIG_THERMAL_AGGREGATION
+void thermal_zone_device_aggregate(struct thermal_zone_device *tz,
+ const char *aggr_name);
+#else
+static inline void
+thermal_zone_device_aggregate(struct thermal_zone_device *tz, const char *aggr_name)
+{}
+#endif /* CONFIG_THERMAL_AGGREGATION */
+
#endif /* __THERMAL_CORE_H__ */
@@ -451,7 +451,25 @@ static struct thermal_zone_device *thermal_of_zone_register(struct device_node *
if (!of_ops.critical && !strcasecmp(action, "reboot"))
of_ops.critical = thermal_zone_device_critical_reboot;
- tz = thermal_zone_device_register_with_trips(np->name, trips, ntrips,
+ /*
+ * The device tree node name is used with single-sensor entries.
+ * When multiple-sensor entries are aggregated, the node name is used
+ * for the aggregator and primary sensors use their compatible alias
+ * name with the id value.
+ */
+ bool multi = (index != -1);
+ char namebuf[THERMAL_NAME_LENGTH];
+ const char *tz_name = namebuf;
+
+ if (!multi)
+ tz_name = np->name;
+ else if (of_alias_from_compatible(sensor, namebuf, sizeof(namebuf)) == 0)
+ snprintf(namebuf + strlen(namebuf), sizeof(namebuf) - strlen(namebuf),
+ "(%d)", id);
+ else
+ snprintf(namebuf, sizeof(namebuf), "%s(%d)", sensor->name, id);
+
+ tz = thermal_zone_device_register_with_trips(tz_name, trips, ntrips,
data, &of_ops, &tzp,
pdelay, delay);
if (IS_ERR(tz)) {
@@ -460,6 +478,9 @@ static struct thermal_zone_device *thermal_of_zone_register(struct device_node *
goto out_kfree_trips;
}
+ if (multi)
+ thermal_zone_device_aggregate(tz, np->name);
+
of_node_put(np);
kfree(trips);