Message ID | 9334403.CDJkKcVGEf@rjwysocki.net (mailing list archive) |
---|---|
State | Mainlined, archived |
Headers | show |
Series | thermal: Rework binding cooling devices to trip points | expand |
On Mon, 2024-08-19 at 18:00 +0200, Rafael J. Wysocki wrote: > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com> > > The current design of the code binding cooling devices to trip points > in > thermal zones is convoluted and hard to follow. > > Namely, a driver that registers a thermal zone can provide .bind() > and .unbind() operations for it, which are required to call either > thermal_bind_cdev_to_trip() and thermal_unbind_cdev_from_trip(), > respectively, or thermal_zone_bind_cooling_device() and > thermal_zone_unbind_cooling_device(), respectively, for every > relevant > trip point and the given cooling device. Moreover, if .bind() is > provided and .unbind() is not, the cleanup necessary during the > removal > of a thermal zone or a cooling device may not be carried out. > > In other words, the core relies on the thermal zone owners to do the > right thing, which is error prone and far from obvious, even though > all > of that is not really necessary. Specifically, if the core could ask > the thermal zone owner, through a special thermal zone callback, > whether > or not a given cooling device should be bound to a given trip point > in > the given thermal zone, it might as well carry out all of the binding > and unbinding by itself. In particular, the unbinding can be done > automatically without involving the thermal zone owner at all because > all of the thermal instances associated with a thermal zone or > cooling > device going away must be deleted regardless. > > Accordingly, introduce a new thermal zone operation, .should_bind(), > that can be invoked by the thermal core for a given thermal zone, > trip point and cooling device combination in order to check whether > or not the cooling device should be bound to the trip point at hand. > It takes an additional cooling_spec argument allowing the thermal > zone owner to specify the highest and lowest cooling states of the > cooling device and its weight for the given trip point binding. > > Make the thermal core use this operation, if present, in the absence > of > .bind() and .unbind(). Note that .should_bind() will be called under > the thermal zone lock. > > No intentional functional impact. > > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Reviewed-by: Zhang Rui <rui.zhang@intel.com> thanks, rui > --- > > v1 -> v3: No changes (previously [08/17]) > > --- > drivers/thermal/thermal_core.c | 106 > +++++++++++++++++++++++++++++++---------- > include/linux/thermal.h | 10 +++ > 2 files changed, 92 insertions(+), 24 deletions(-) > > Index: linux-pm/include/linux/thermal.h > =================================================================== > --- linux-pm.orig/include/linux/thermal.h > +++ linux-pm/include/linux/thermal.h > @@ -85,11 +85,21 @@ struct thermal_trip { > > struct thermal_zone_device; > > +struct cooling_spec { > + unsigned long upper; /* Highest cooling state */ > + unsigned long lower; /* Lowest cooling state */ > + unsigned int weight; /* Cooling device weight */ > +}; > + > struct thermal_zone_device_ops { > int (*bind) (struct thermal_zone_device *, > struct thermal_cooling_device *); > int (*unbind) (struct thermal_zone_device *, > struct thermal_cooling_device *); > + bool (*should_bind) (struct thermal_zone_device *, > + const struct thermal_trip *, > + struct thermal_cooling_device *, > + struct cooling_spec *); > int (*get_temp) (struct thermal_zone_device *, int *); > int (*set_trips) (struct thermal_zone_device *, int, int); > int (*change_mode) (struct thermal_zone_device *, > Index: linux-pm/drivers/thermal/thermal_core.c > =================================================================== > --- linux-pm.orig/drivers/thermal/thermal_core.c > +++ linux-pm/drivers/thermal/thermal_core.c > @@ -991,12 +991,61 @@ static struct class *thermal_class; > > static inline > void print_bind_err_msg(struct thermal_zone_device *tz, > + const struct thermal_trip *trip, > struct thermal_cooling_device *cdev, int ret) > { > + if (trip) { > + dev_err(&tz->device, "binding cdev %s to trip %d > failed: %d\n", > + cdev->type, thermal_zone_trip_id(tz, trip), > ret); > + return; > + } > + > dev_err(&tz->device, "binding zone %s with cdev %s > failed:%d\n", > tz->type, cdev->type, ret); > } > > +static void thermal_zone_cdev_binding(struct thermal_zone_device > *tz, > + struct thermal_cooling_device > *cdev) > +{ > + struct thermal_trip_desc *td; > + int ret; > + > + /* > + * Old-style binding. The .bind() callback is expected to > call > + * thermal_bind_cdev_to_trip() under the thermal zone lock. > + */ > + if (tz->ops.bind) { > + ret = tz->ops.bind(tz, cdev); > + if (ret) > + print_bind_err_msg(tz, NULL, cdev, ret); > + > + return; > + } > + > + if (!tz->ops.should_bind) > + return; > + > + mutex_lock(&tz->lock); > + > + for_each_trip_desc(tz, td) { > + struct thermal_trip *trip = &td->trip; > + struct cooling_spec c = { > + .upper = THERMAL_NO_LIMIT, > + .lower = THERMAL_NO_LIMIT, > + .weight = THERMAL_WEIGHT_DEFAULT > + }; > + > + if (tz->ops.should_bind(tz, trip, cdev, &c)) { > + ret = thermal_bind_cdev_to_trip(tz, trip, > cdev, c.upper, > + c.lower, > c.weight); > + if (ret) > + print_bind_err_msg(tz, trip, cdev, > ret); > + } > + } > + > + mutex_unlock(&tz->lock); > +} > + > /** > * __thermal_cooling_device_register() - register a new thermal > cooling device > * @np: a pointer to a device tree node. > @@ -1092,13 +1141,8 @@ __thermal_cooling_device_register(struct > list_add(&cdev->node, &thermal_cdev_list); > > /* Update binding information for 'this' new cdev */ > - list_for_each_entry(pos, &thermal_tz_list, node) { > - if (pos->ops.bind) { > - ret = pos->ops.bind(pos, cdev); > - if (ret) > - print_bind_err_msg(pos, cdev, ret); > - } > - } > + list_for_each_entry(pos, &thermal_tz_list, node) > + thermal_zone_cdev_binding(pos, cdev); > > list_for_each_entry(pos, &thermal_tz_list, node) > if (atomic_cmpxchg(&pos->need_update, 1, 0)) > @@ -1299,6 +1343,28 @@ unlock_list: > } > EXPORT_SYMBOL_GPL(thermal_cooling_device_update); > > +static void thermal_zone_cdev_unbinding(struct thermal_zone_device > *tz, > + struct thermal_cooling_device > *cdev) > +{ > + struct thermal_trip_desc *td; > + > + /* > + * Old-style unbinding. The .unbind callback is expected to > call > + * thermal_unbind_cdev_from_trip() under the thermal zone > lock. > + */ > + if (tz->ops.unbind) { > + tz->ops.unbind(tz, cdev); > + return; > + } > + > + mutex_lock(&tz->lock); > + > + for_each_trip_desc(tz, td) > + thermal_unbind_cdev_from_trip(tz, &td->trip, cdev); > + > + mutex_unlock(&tz->lock); > +} > + > /** > * thermal_cooling_device_unregister - removes a thermal cooling > device > * @cdev: the thermal cooling device to remove. > @@ -1325,10 +1391,8 @@ void thermal_cooling_device_unregister(s > list_del(&cdev->node); > > /* Unbind all thermal zones associated with 'this' cdev */ > - list_for_each_entry(tz, &thermal_tz_list, node) { > - if (tz->ops.unbind) > - tz->ops.unbind(tz, cdev); > - } > + list_for_each_entry(tz, &thermal_tz_list, node) > + thermal_zone_cdev_unbinding(tz, cdev); > > mutex_unlock(&thermal_list_lock); > > @@ -1403,6 +1467,7 @@ thermal_zone_device_register_with_trips( > unsigned int polling_delay) > { > const struct thermal_trip *trip = trips; > + struct thermal_cooling_device *cdev; > struct thermal_zone_device *tz; > struct thermal_trip_desc *td; > int id; > @@ -1425,8 +1490,9 @@ thermal_zone_device_register_with_trips( > return ERR_PTR(-EINVAL); > } > > - if (!ops || !ops->get_temp) { > - pr_err("Thermal zone device ops not defined\n"); > + if (!ops || !ops->get_temp || (ops->should_bind && ops->bind) > || > + (ops->should_bind && ops->unbind)) { > + pr_err("Thermal zone device ops not defined or > invalid\n"); > return ERR_PTR(-EINVAL); > } > > @@ -1539,15 +1605,8 @@ thermal_zone_device_register_with_trips( > mutex_unlock(&tz->lock); > > /* Bind cooling devices for this zone */ > - if (tz->ops.bind) { > - struct thermal_cooling_device *cdev; > - > - list_for_each_entry(cdev, &thermal_cdev_list, node) { > - result = tz->ops.bind(tz, cdev); > - if (result) > - print_bind_err_msg(tz, cdev, result); > - } > - } > + list_for_each_entry(cdev, &thermal_cdev_list, node) > + thermal_zone_cdev_binding(tz, cdev); > > mutex_unlock(&thermal_list_lock); > > @@ -1641,8 +1700,7 @@ void thermal_zone_device_unregister(stru > > /* Unbind all cdevs associated with 'this' thermal zone */ > list_for_each_entry(cdev, &thermal_cdev_list, node) > - if (tz->ops.unbind) > - tz->ops.unbind(tz, cdev); > + thermal_zone_cdev_unbinding(tz, cdev); > > mutex_unlock(&thermal_list_lock); > > > >
在 2024/8/20 0:00, Rafael J. Wysocki 写道: > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com> > > The current design of the code binding cooling devices to trip points in > thermal zones is convoluted and hard to follow. > > Namely, a driver that registers a thermal zone can provide .bind() > and .unbind() operations for it, which are required to call either > thermal_bind_cdev_to_trip() and thermal_unbind_cdev_from_trip(), > respectively, or thermal_zone_bind_cooling_device() and > thermal_zone_unbind_cooling_device(), respectively, for every relevant > trip point and the given cooling device. Moreover, if .bind() is > provided and .unbind() is not, the cleanup necessary during the removal > of a thermal zone or a cooling device may not be carried out. > > In other words, the core relies on the thermal zone owners to do the > right thing, which is error prone and far from obvious, even though all > of that is not really necessary. Specifically, if the core could ask > the thermal zone owner, through a special thermal zone callback, whether > or not a given cooling device should be bound to a given trip point in > the given thermal zone, it might as well carry out all of the binding > and unbinding by itself. In particular, the unbinding can be done > automatically without involving the thermal zone owner at all because > all of the thermal instances associated with a thermal zone or cooling > device going away must be deleted regardless. > > Accordingly, introduce a new thermal zone operation, .should_bind(), > that can be invoked by the thermal core for a given thermal zone, > trip point and cooling device combination in order to check whether > or not the cooling device should be bound to the trip point at hand. > It takes an additional cooling_spec argument allowing the thermal > zone owner to specify the highest and lowest cooling states of the > cooling device and its weight for the given trip point binding. > > Make the thermal core use this operation, if present, in the absence of > .bind() and .unbind(). Note that .should_bind() will be called under > the thermal zone lock. > > No intentional functional impact. > > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> > --- all thermal zone is linked to thermal_tz_list and cooling devices is linked to thermal_cdev_list. But if one cooling device should bind to a trip in thermal zone is determined by thermal driver. Introducing should_bind() looks good to me. Acked-by: Huisong Li <lihuisong@huawei.com> > > > > > .
On 19/08/2024 18:00, Rafael J. Wysocki wrote: > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com> > > The current design of the code binding cooling devices to trip points in > thermal zones is convoluted and hard to follow. > > Namely, a driver that registers a thermal zone can provide .bind() > and .unbind() operations for it, which are required to call either > thermal_bind_cdev_to_trip() and thermal_unbind_cdev_from_trip(), > respectively, or thermal_zone_bind_cooling_device() and > thermal_zone_unbind_cooling_device(), respectively, for every relevant > trip point and the given cooling device. Moreover, if .bind() is > provided and .unbind() is not, the cleanup necessary during the removal > of a thermal zone or a cooling device may not be carried out. > > In other words, the core relies on the thermal zone owners to do the > right thing, which is error prone and far from obvious, even though all > of that is not really necessary. Specifically, if the core could ask > the thermal zone owner, through a special thermal zone callback, whether > or not a given cooling device should be bound to a given trip point in > the given thermal zone, it might as well carry out all of the binding > and unbinding by itself. In particular, the unbinding can be done > automatically without involving the thermal zone owner at all because > all of the thermal instances associated with a thermal zone or cooling > device going away must be deleted regardless. > > Accordingly, introduce a new thermal zone operation, .should_bind(), > that can be invoked by the thermal core for a given thermal zone, > trip point and cooling device combination in order to check whether > or not the cooling device should be bound to the trip point at hand. > It takes an additional cooling_spec argument allowing the thermal > zone owner to specify the highest and lowest cooling states of the > cooling device and its weight for the given trip point binding. > > Make the thermal core use this operation, if present, in the absence of > .bind() and .unbind(). Note that .should_bind() will be called under > the thermal zone lock. > > No intentional functional impact. > > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> > --- > > v1 -> v3: No changes (previously [08/17]) > > --- > drivers/thermal/thermal_core.c | 106 +++++++++++++++++++++++++++++++---------- > include/linux/thermal.h | 10 +++ > 2 files changed, 92 insertions(+), 24 deletions(-) > > Index: linux-pm/include/linux/thermal.h > =================================================================== > --- linux-pm.orig/include/linux/thermal.h > +++ linux-pm/include/linux/thermal.h > @@ -85,11 +85,21 @@ struct thermal_trip { > > struct thermal_zone_device; > > +struct cooling_spec { > + unsigned long upper; /* Highest cooling state */ > + unsigned long lower; /* Lowest cooling state */ > + unsigned int weight; /* Cooling device weight */ > +}; > + > struct thermal_zone_device_ops { > int (*bind) (struct thermal_zone_device *, > struct thermal_cooling_device *); > int (*unbind) (struct thermal_zone_device *, > struct thermal_cooling_device *); > + bool (*should_bind) (struct thermal_zone_device *, > + const struct thermal_trip *, > + struct thermal_cooling_device *, > + struct cooling_spec *); > int (*get_temp) (struct thermal_zone_device *, int *); > int (*set_trips) (struct thermal_zone_device *, int, int); > int (*change_mode) (struct thermal_zone_device *, > Index: linux-pm/drivers/thermal/thermal_core.c > =================================================================== > --- linux-pm.orig/drivers/thermal/thermal_core.c > +++ linux-pm/drivers/thermal/thermal_core.c > @@ -991,12 +991,61 @@ static struct class *thermal_class; > > static inline > void print_bind_err_msg(struct thermal_zone_device *tz, > + const struct thermal_trip *trip, > struct thermal_cooling_device *cdev, int ret) > { > + if (trip) { > + dev_err(&tz->device, "binding cdev %s to trip %d failed: %d\n", > + cdev->type, thermal_zone_trip_id(tz, trip), ret); > + return; > + } > + > dev_err(&tz->device, "binding zone %s with cdev %s failed:%d\n", > tz->type, cdev->type, ret); > } > > +static void thermal_zone_cdev_binding(struct thermal_zone_device *tz, > + struct thermal_cooling_device *cdev) nit picking: is there a reason to use 'binding' instead of 'bind' ? IMO it would appear more consistent to keep the same wording than the ops. The present participle is usually used to describe an action which is happening, usually to report back an event. Here it is more an action to be done (feel free to send a separate patch for the renaming). Other than that Reviewed-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Index: linux-pm/include/linux/thermal.h =================================================================== --- linux-pm.orig/include/linux/thermal.h +++ linux-pm/include/linux/thermal.h @@ -85,11 +85,21 @@ struct thermal_trip { struct thermal_zone_device; +struct cooling_spec { + unsigned long upper; /* Highest cooling state */ + unsigned long lower; /* Lowest cooling state */ + unsigned int weight; /* Cooling device weight */ +}; + struct thermal_zone_device_ops { int (*bind) (struct thermal_zone_device *, struct thermal_cooling_device *); int (*unbind) (struct thermal_zone_device *, struct thermal_cooling_device *); + bool (*should_bind) (struct thermal_zone_device *, + const struct thermal_trip *, + struct thermal_cooling_device *, + struct cooling_spec *); int (*get_temp) (struct thermal_zone_device *, int *); int (*set_trips) (struct thermal_zone_device *, int, int); int (*change_mode) (struct thermal_zone_device *, Index: linux-pm/drivers/thermal/thermal_core.c =================================================================== --- linux-pm.orig/drivers/thermal/thermal_core.c +++ linux-pm/drivers/thermal/thermal_core.c @@ -991,12 +991,61 @@ static struct class *thermal_class; static inline void print_bind_err_msg(struct thermal_zone_device *tz, + const struct thermal_trip *trip, struct thermal_cooling_device *cdev, int ret) { + if (trip) { + dev_err(&tz->device, "binding cdev %s to trip %d failed: %d\n", + cdev->type, thermal_zone_trip_id(tz, trip), ret); + return; + } + dev_err(&tz->device, "binding zone %s with cdev %s failed:%d\n", tz->type, cdev->type, ret); } +static void thermal_zone_cdev_binding(struct thermal_zone_device *tz, + struct thermal_cooling_device *cdev) +{ + struct thermal_trip_desc *td; + int ret; + + /* + * Old-style binding. The .bind() callback is expected to call + * thermal_bind_cdev_to_trip() under the thermal zone lock. + */ + if (tz->ops.bind) { + ret = tz->ops.bind(tz, cdev); + if (ret) + print_bind_err_msg(tz, NULL, cdev, ret); + + return; + } + + if (!tz->ops.should_bind) + return; + + mutex_lock(&tz->lock); + + for_each_trip_desc(tz, td) { + struct thermal_trip *trip = &td->trip; + struct cooling_spec c = { + .upper = THERMAL_NO_LIMIT, + .lower = THERMAL_NO_LIMIT, + .weight = THERMAL_WEIGHT_DEFAULT + }; + + if (tz->ops.should_bind(tz, trip, cdev, &c)) { + ret = thermal_bind_cdev_to_trip(tz, trip, cdev, c.upper, + c.lower, c.weight); + if (ret) + print_bind_err_msg(tz, trip, cdev, ret); + } + } + + mutex_unlock(&tz->lock); +} + /** * __thermal_cooling_device_register() - register a new thermal cooling device * @np: a pointer to a device tree node. @@ -1092,13 +1141,8 @@ __thermal_cooling_device_register(struct list_add(&cdev->node, &thermal_cdev_list); /* Update binding information for 'this' new cdev */ - list_for_each_entry(pos, &thermal_tz_list, node) { - if (pos->ops.bind) { - ret = pos->ops.bind(pos, cdev); - if (ret) - print_bind_err_msg(pos, cdev, ret); - } - } + list_for_each_entry(pos, &thermal_tz_list, node) + thermal_zone_cdev_binding(pos, cdev); list_for_each_entry(pos, &thermal_tz_list, node) if (atomic_cmpxchg(&pos->need_update, 1, 0)) @@ -1299,6 +1343,28 @@ unlock_list: } EXPORT_SYMBOL_GPL(thermal_cooling_device_update); +static void thermal_zone_cdev_unbinding(struct thermal_zone_device *tz, + struct thermal_cooling_device *cdev) +{ + struct thermal_trip_desc *td; + + /* + * Old-style unbinding. The .unbind callback is expected to call + * thermal_unbind_cdev_from_trip() under the thermal zone lock. + */ + if (tz->ops.unbind) { + tz->ops.unbind(tz, cdev); + return; + } + + mutex_lock(&tz->lock); + + for_each_trip_desc(tz, td) + thermal_unbind_cdev_from_trip(tz, &td->trip, cdev); + + mutex_unlock(&tz->lock); +} + /** * thermal_cooling_device_unregister - removes a thermal cooling device * @cdev: the thermal cooling device to remove. @@ -1325,10 +1391,8 @@ void thermal_cooling_device_unregister(s list_del(&cdev->node); /* Unbind all thermal zones associated with 'this' cdev */ - list_for_each_entry(tz, &thermal_tz_list, node) { - if (tz->ops.unbind) - tz->ops.unbind(tz, cdev); - } + list_for_each_entry(tz, &thermal_tz_list, node) + thermal_zone_cdev_unbinding(tz, cdev); mutex_unlock(&thermal_list_lock); @@ -1403,6 +1467,7 @@ thermal_zone_device_register_with_trips( unsigned int polling_delay) { const struct thermal_trip *trip = trips; + struct thermal_cooling_device *cdev; struct thermal_zone_device *tz; struct thermal_trip_desc *td; int id; @@ -1425,8 +1490,9 @@ thermal_zone_device_register_with_trips( return ERR_PTR(-EINVAL); } - if (!ops || !ops->get_temp) { - pr_err("Thermal zone device ops not defined\n"); + if (!ops || !ops->get_temp || (ops->should_bind && ops->bind) || + (ops->should_bind && ops->unbind)) { + pr_err("Thermal zone device ops not defined or invalid\n"); return ERR_PTR(-EINVAL); } @@ -1539,15 +1605,8 @@ thermal_zone_device_register_with_trips( mutex_unlock(&tz->lock); /* Bind cooling devices for this zone */ - if (tz->ops.bind) { - struct thermal_cooling_device *cdev; - - list_for_each_entry(cdev, &thermal_cdev_list, node) { - result = tz->ops.bind(tz, cdev); - if (result) - print_bind_err_msg(tz, cdev, result); - } - } + list_for_each_entry(cdev, &thermal_cdev_list, node) + thermal_zone_cdev_binding(tz, cdev); mutex_unlock(&thermal_list_lock); @@ -1641,8 +1700,7 @@ void thermal_zone_device_unregister(stru /* Unbind all cdevs associated with 'this' thermal zone */ list_for_each_entry(cdev, &thermal_cdev_list, node) - if (tz->ops.unbind) - tz->ops.unbind(tz, cdev); + thermal_zone_cdev_unbinding(tz, cdev); mutex_unlock(&thermal_list_lock);