Message ID | 20200116151219.v2.1.I146403d05b9ec82f48b807efd416a57f545b447a@changeid (mailing list archive) |
---|---|
State | New, archived |
Delegated to: | Daniel Lezcano |
Headers | show |
Series | [v2] thermal: devfreq_cooling: Use PM QoS to set frequency limits | expand |
On 1/17/20 8:12 AM, Matthias Kaehlcke wrote: > Now that devfreq supports limiting the frequency range of a device > through PM QoS make use of it instead of disabling OPPs that should > not be used. > > The switch from disabling OPPs to PM QoS introduces a subtle behavioral > change in case of conflicting requests (min > max): PM QoS gives > precedence to the MIN_FREQUENCY request, while higher OPPs disabled > with dev_pm_opp_disable() would override MIN_FREQUENCY. > > Signed-off-by: Matthias Kaehlcke <mka@chromium.org> > --- > > Changes in v2: > - added documentation for 'req_max_freq' > - fixed jumps in of_devfreq_cooling_register_power() unwind > - added comment about behavioral change to the commit message > > drivers/thermal/devfreq_cooling.c | 70 ++++++++++--------------------- > 1 file changed, 23 insertions(+), 47 deletions(-) > > diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c > index ef59256887ff63..cbbaf5bc425d1a 100644 > --- a/drivers/thermal/devfreq_cooling.c > +++ b/drivers/thermal/devfreq_cooling.c > @@ -24,11 +24,13 @@ > #include <linux/idr.h> > #include <linux/slab.h> > #include <linux/pm_opp.h> > +#include <linux/pm_qos.h> > #include <linux/thermal.h> > > #include <trace/events/thermal.h> > > -#define SCALE_ERROR_MITIGATION 100 > +#define HZ_PER_KHZ 1000 > +#define SCALE_ERROR_MITIGATION 100 > > static DEFINE_IDA(devfreq_ida); > > @@ -53,6 +55,8 @@ static DEFINE_IDA(devfreq_ida); > * 'utilization' (which is 'busy_time / 'total_time'). > * The 'res_util' range is from 100 to (power_table[state] * 100) > * for the corresponding 'state'. > + * @req_max_freq: PM QoS request for limiting the maximum frequency > + * of the devfreq device. > */ > struct devfreq_cooling_device { > int id; > @@ -65,49 +69,9 @@ struct devfreq_cooling_device { > struct devfreq_cooling_power *power_ops; > u32 res_util; > int capped_state; > + struct dev_pm_qos_request req_max_freq; > }; > > -/** > - * partition_enable_opps() - disable all opps above a given state > - * @dfc: Pointer to devfreq we are operating on > - * @cdev_state: cooling device state we're setting > - * > - * Go through the OPPs of the device, enabling all OPPs until > - * @cdev_state and disabling those frequencies above it. > - */ > -static int partition_enable_opps(struct devfreq_cooling_device *dfc, > - unsigned long cdev_state) > -{ > - int i; > - struct device *dev = dfc->devfreq->dev.parent; > - > - for (i = 0; i < dfc->freq_table_size; i++) { > - struct dev_pm_opp *opp; > - int ret = 0; > - unsigned int freq = dfc->freq_table[i]; > - bool want_enable = i >= cdev_state ? true : false; > - > - opp = dev_pm_opp_find_freq_exact(dev, freq, !want_enable); > - > - if (PTR_ERR(opp) == -ERANGE) > - continue; > - else if (IS_ERR(opp)) > - return PTR_ERR(opp); > - > - dev_pm_opp_put(opp); > - > - if (want_enable) > - ret = dev_pm_opp_enable(dev, freq); > - else > - ret = dev_pm_opp_disable(dev, freq); > - > - if (ret) > - return ret; > - } > - > - return 0; > -} > - > static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev, > unsigned long *state) > { > @@ -134,7 +98,7 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev, > struct devfreq_cooling_device *dfc = cdev->devdata; > struct devfreq *df = dfc->devfreq; > struct device *dev = df->dev.parent; > - int ret; > + unsigned long freq; > > if (state == dfc->cooling_state) > return 0; > @@ -144,9 +108,10 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev, > if (state >= dfc->freq_table_size) > return -EINVAL; > > - ret = partition_enable_opps(dfc, state); > - if (ret) > - return ret; > + freq = dfc->freq_table[state]; > + > + dev_pm_qos_update_request(&dfc->req_max_freq, > + DIV_ROUND_UP(freq, HZ_PER_KHZ)); > > dfc->cooling_state = state; > > @@ -529,9 +494,15 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, > if (err) > goto free_dfc; > > - err = ida_simple_get(&devfreq_ida, 0, 0, GFP_KERNEL); > + err = dev_pm_qos_add_request(df->dev.parent, &dfc->req_max_freq, > + DEV_PM_QOS_MAX_FREQUENCY, > + PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE); > if (err < 0) > goto free_tables; > + > + err = ida_simple_get(&devfreq_ida, 0, 0, GFP_KERNEL); > + if (err < 0) > + goto remove_qos_req; > dfc->id = err; > > snprintf(dev_name, sizeof(dev_name), "thermal-devfreq-%d", dfc->id); > @@ -552,6 +523,10 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, > > release_ida: > ida_simple_remove(&devfreq_ida, dfc->id); > + > +remove_qos_req: > + dev_pm_qos_remove_request(&dfc->req_max_freq); > + > free_tables: > kfree(dfc->power_table); > kfree(dfc->freq_table); > @@ -600,6 +575,7 @@ void devfreq_cooling_unregister(struct thermal_cooling_device *cdev) > > thermal_cooling_device_unregister(dfc->cdev); > ida_simple_remove(&devfreq_ida, dfc->id); > + dev_pm_qos_remove_request(&dfc->req_max_freq); > kfree(dfc->power_table); > kfree(dfc->freq_table); > > Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>
Is any further action needed from my side or can this land? Thanks Matthias On Fri, Jan 17, 2020 at 02:22:02PM +0900, Chanwoo Choi wrote: > On 1/17/20 8:12 AM, Matthias Kaehlcke wrote: > > Now that devfreq supports limiting the frequency range of a device > > through PM QoS make use of it instead of disabling OPPs that should > > not be used. > > > > The switch from disabling OPPs to PM QoS introduces a subtle behavioral > > change in case of conflicting requests (min > max): PM QoS gives > > precedence to the MIN_FREQUENCY request, while higher OPPs disabled > > with dev_pm_opp_disable() would override MIN_FREQUENCY. > > > > Signed-off-by: Matthias Kaehlcke <mka@chromium.org> > > --- > > > > Changes in v2: > > - added documentation for 'req_max_freq' > > - fixed jumps in of_devfreq_cooling_register_power() unwind > > - added comment about behavioral change to the commit message > > > > drivers/thermal/devfreq_cooling.c | 70 ++++++++++--------------------- > > 1 file changed, 23 insertions(+), 47 deletions(-) > > > > diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c > > index ef59256887ff63..cbbaf5bc425d1a 100644 > > --- a/drivers/thermal/devfreq_cooling.c > > +++ b/drivers/thermal/devfreq_cooling.c > > @@ -24,11 +24,13 @@ > > #include <linux/idr.h> > > #include <linux/slab.h> > > #include <linux/pm_opp.h> > > +#include <linux/pm_qos.h> > > #include <linux/thermal.h> > > > > #include <trace/events/thermal.h> > > > > -#define SCALE_ERROR_MITIGATION 100 > > +#define HZ_PER_KHZ 1000 > > +#define SCALE_ERROR_MITIGATION 100 > > > > static DEFINE_IDA(devfreq_ida); > > > > @@ -53,6 +55,8 @@ static DEFINE_IDA(devfreq_ida); > > * 'utilization' (which is 'busy_time / 'total_time'). > > * The 'res_util' range is from 100 to (power_table[state] * 100) > > * for the corresponding 'state'. > > + * @req_max_freq: PM QoS request for limiting the maximum frequency > > + * of the devfreq device. > > */ > > struct devfreq_cooling_device { > > int id; > > @@ -65,49 +69,9 @@ struct devfreq_cooling_device { > > struct devfreq_cooling_power *power_ops; > > u32 res_util; > > int capped_state; > > + struct dev_pm_qos_request req_max_freq; > > }; > > > > -/** > > - * partition_enable_opps() - disable all opps above a given state > > - * @dfc: Pointer to devfreq we are operating on > > - * @cdev_state: cooling device state we're setting > > - * > > - * Go through the OPPs of the device, enabling all OPPs until > > - * @cdev_state and disabling those frequencies above it. > > - */ > > -static int partition_enable_opps(struct devfreq_cooling_device *dfc, > > - unsigned long cdev_state) > > -{ > > - int i; > > - struct device *dev = dfc->devfreq->dev.parent; > > - > > - for (i = 0; i < dfc->freq_table_size; i++) { > > - struct dev_pm_opp *opp; > > - int ret = 0; > > - unsigned int freq = dfc->freq_table[i]; > > - bool want_enable = i >= cdev_state ? true : false; > > - > > - opp = dev_pm_opp_find_freq_exact(dev, freq, !want_enable); > > - > > - if (PTR_ERR(opp) == -ERANGE) > > - continue; > > - else if (IS_ERR(opp)) > > - return PTR_ERR(opp); > > - > > - dev_pm_opp_put(opp); > > - > > - if (want_enable) > > - ret = dev_pm_opp_enable(dev, freq); > > - else > > - ret = dev_pm_opp_disable(dev, freq); > > - > > - if (ret) > > - return ret; > > - } > > - > > - return 0; > > -} > > - > > static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev, > > unsigned long *state) > > { > > @@ -134,7 +98,7 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev, > > struct devfreq_cooling_device *dfc = cdev->devdata; > > struct devfreq *df = dfc->devfreq; > > struct device *dev = df->dev.parent; > > - int ret; > > + unsigned long freq; > > > > if (state == dfc->cooling_state) > > return 0; > > @@ -144,9 +108,10 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev, > > if (state >= dfc->freq_table_size) > > return -EINVAL; > > > > - ret = partition_enable_opps(dfc, state); > > - if (ret) > > - return ret; > > + freq = dfc->freq_table[state]; > > + > > + dev_pm_qos_update_request(&dfc->req_max_freq, > > + DIV_ROUND_UP(freq, HZ_PER_KHZ)); > > > > dfc->cooling_state = state; > > > > @@ -529,9 +494,15 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, > > if (err) > > goto free_dfc; > > > > - err = ida_simple_get(&devfreq_ida, 0, 0, GFP_KERNEL); > > + err = dev_pm_qos_add_request(df->dev.parent, &dfc->req_max_freq, > > + DEV_PM_QOS_MAX_FREQUENCY, > > + PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE); > > if (err < 0) > > goto free_tables; > > + > > + err = ida_simple_get(&devfreq_ida, 0, 0, GFP_KERNEL); > > + if (err < 0) > > + goto remove_qos_req; > > dfc->id = err; > > > > snprintf(dev_name, sizeof(dev_name), "thermal-devfreq-%d", dfc->id); > > @@ -552,6 +523,10 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, > > > > release_ida: > > ida_simple_remove(&devfreq_ida, dfc->id); > > + > > +remove_qos_req: > > + dev_pm_qos_remove_request(&dfc->req_max_freq); > > + > > free_tables: > > kfree(dfc->power_table); > > kfree(dfc->freq_table); > > @@ -600,6 +575,7 @@ void devfreq_cooling_unregister(struct thermal_cooling_device *cdev) > > > > thermal_cooling_device_unregister(dfc->cdev); > > ida_simple_remove(&devfreq_ida, dfc->id); > > + dev_pm_qos_remove_request(&dfc->req_max_freq); > > kfree(dfc->power_table); > > kfree(dfc->freq_table); > > > > > > Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com> > > -- > Best Regards, > Chanwoo Choi > Samsung Electronics
Hi Matthias, I just saw this email below the patch. I wasn't aware that you are working on this. I will have to update my changes... It looks good to me. Unfortunately, it does not apply on top of Amit's commit 1b5cb9570670a6277cc0 thermal: devfreq_cooling: Appease the kernel-doc deity Could you check this? Other then that Reviewed-by: Lukasz Luba <lukasz.luba@arm.com> Regards, Lukasz On 3/12/20 12:35 AM, Matthias Kaehlcke wrote: > Is any further action needed from my side or can this land? > > Thanks > > Matthias > > On Fri, Jan 17, 2020 at 02:22:02PM +0900, Chanwoo Choi wrote: >> On 1/17/20 8:12 AM, Matthias Kaehlcke wrote: >>> Now that devfreq supports limiting the frequency range of a device >>> through PM QoS make use of it instead of disabling OPPs that should >>> not be used. >>> >>> The switch from disabling OPPs to PM QoS introduces a subtle behavioral >>> change in case of conflicting requests (min > max): PM QoS gives >>> precedence to the MIN_FREQUENCY request, while higher OPPs disabled >>> with dev_pm_opp_disable() would override MIN_FREQUENCY. >>> >>> Signed-off-by: Matthias Kaehlcke <mka@chromium.org> >>> --- >>> >>> Changes in v2: >>> - added documentation for 'req_max_freq' >>> - fixed jumps in of_devfreq_cooling_register_power() unwind >>> - added comment about behavioral change to the commit message >>> >>> drivers/thermal/devfreq_cooling.c | 70 ++++++++++--------------------- >>> 1 file changed, 23 insertions(+), 47 deletions(-) >>> >>> diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c >>> index ef59256887ff63..cbbaf5bc425d1a 100644 >>> --- a/drivers/thermal/devfreq_cooling.c >>> +++ b/drivers/thermal/devfreq_cooling.c >>> @@ -24,11 +24,13 @@ >>> #include <linux/idr.h> >>> #include <linux/slab.h> >>> #include <linux/pm_opp.h> >>> +#include <linux/pm_qos.h> >>> #include <linux/thermal.h> >>> >>> #include <trace/events/thermal.h> >>> >>> -#define SCALE_ERROR_MITIGATION 100 >>> +#define HZ_PER_KHZ 1000 >>> +#define SCALE_ERROR_MITIGATION 100 >>> >>> static DEFINE_IDA(devfreq_ida); >>> >>> @@ -53,6 +55,8 @@ static DEFINE_IDA(devfreq_ida); >>> * 'utilization' (which is 'busy_time / 'total_time'). >>> * The 'res_util' range is from 100 to (power_table[state] * 100) >>> * for the corresponding 'state'. >>> + * @req_max_freq: PM QoS request for limiting the maximum frequency >>> + * of the devfreq device. >>> */ >>> struct devfreq_cooling_device { >>> int id; >>> @@ -65,49 +69,9 @@ struct devfreq_cooling_device { >>> struct devfreq_cooling_power *power_ops; >>> u32 res_util; >>> int capped_state; >>> + struct dev_pm_qos_request req_max_freq; >>> }; >>> >>> -/** >>> - * partition_enable_opps() - disable all opps above a given state >>> - * @dfc: Pointer to devfreq we are operating on >>> - * @cdev_state: cooling device state we're setting >>> - * >>> - * Go through the OPPs of the device, enabling all OPPs until >>> - * @cdev_state and disabling those frequencies above it. >>> - */ >>> -static int partition_enable_opps(struct devfreq_cooling_device *dfc, >>> - unsigned long cdev_state) >>> -{ >>> - int i; >>> - struct device *dev = dfc->devfreq->dev.parent; >>> - >>> - for (i = 0; i < dfc->freq_table_size; i++) { >>> - struct dev_pm_opp *opp; >>> - int ret = 0; >>> - unsigned int freq = dfc->freq_table[i]; >>> - bool want_enable = i >= cdev_state ? true : false; >>> - >>> - opp = dev_pm_opp_find_freq_exact(dev, freq, !want_enable); >>> - >>> - if (PTR_ERR(opp) == -ERANGE) >>> - continue; >>> - else if (IS_ERR(opp)) >>> - return PTR_ERR(opp); >>> - >>> - dev_pm_opp_put(opp); >>> - >>> - if (want_enable) >>> - ret = dev_pm_opp_enable(dev, freq); >>> - else >>> - ret = dev_pm_opp_disable(dev, freq); >>> - >>> - if (ret) >>> - return ret; >>> - } >>> - >>> - return 0; >>> -} >>> - >>> static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev, >>> unsigned long *state) >>> { >>> @@ -134,7 +98,7 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev, >>> struct devfreq_cooling_device *dfc = cdev->devdata; >>> struct devfreq *df = dfc->devfreq; >>> struct device *dev = df->dev.parent; >>> - int ret; >>> + unsigned long freq; >>> >>> if (state == dfc->cooling_state) >>> return 0; >>> @@ -144,9 +108,10 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev, >>> if (state >= dfc->freq_table_size) >>> return -EINVAL; >>> >>> - ret = partition_enable_opps(dfc, state); >>> - if (ret) >>> - return ret; >>> + freq = dfc->freq_table[state]; >>> + >>> + dev_pm_qos_update_request(&dfc->req_max_freq, >>> + DIV_ROUND_UP(freq, HZ_PER_KHZ)); >>> >>> dfc->cooling_state = state; >>> >>> @@ -529,9 +494,15 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, >>> if (err) >>> goto free_dfc; >>> >>> - err = ida_simple_get(&devfreq_ida, 0, 0, GFP_KERNEL); >>> + err = dev_pm_qos_add_request(df->dev.parent, &dfc->req_max_freq, >>> + DEV_PM_QOS_MAX_FREQUENCY, >>> + PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE); >>> if (err < 0) >>> goto free_tables; >>> + >>> + err = ida_simple_get(&devfreq_ida, 0, 0, GFP_KERNEL); >>> + if (err < 0) >>> + goto remove_qos_req; >>> dfc->id = err; >>> >>> snprintf(dev_name, sizeof(dev_name), "thermal-devfreq-%d", dfc->id); >>> @@ -552,6 +523,10 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, >>> >>> release_ida: >>> ida_simple_remove(&devfreq_ida, dfc->id); >>> + >>> +remove_qos_req: >>> + dev_pm_qos_remove_request(&dfc->req_max_freq); >>> + >>> free_tables: >>> kfree(dfc->power_table); >>> kfree(dfc->freq_table); >>> @@ -600,6 +575,7 @@ void devfreq_cooling_unregister(struct thermal_cooling_device *cdev) >>> >>> thermal_cooling_device_unregister(dfc->cdev); >>> ida_simple_remove(&devfreq_ida, dfc->id); >>> + dev_pm_qos_remove_request(&dfc->req_max_freq); >>> kfree(dfc->power_table); >>> kfree(dfc->freq_table); >>> >>> >> >> Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com> >> >> -- >> Best Regards, >> Chanwoo Choi >> Samsung Electronics
Hi Lukasz, thanks for the review! I'll rebase and send v3. Hopefully it doesn't cause too much extra work for your changes. Thanks Matthias On Thu, Mar 12, 2020 at 11:39:56AM +0000, Lukasz Luba wrote: > Hi Matthias, > > I just saw this email below the patch. I wasn't aware that you > are working on this. I will have to update my changes... > > It looks good to me. > Unfortunately, it does not apply on top of Amit's commit > 1b5cb9570670a6277cc0 thermal: devfreq_cooling: Appease the kernel-doc deity > > Could you check this? > > Other then that > > Reviewed-by: Lukasz Luba <lukasz.luba@arm.com> > > Regards, > Lukasz > > On 3/12/20 12:35 AM, Matthias Kaehlcke wrote: > > Is any further action needed from my side or can this land? > > > > Thanks > > > > Matthias > > > > On Fri, Jan 17, 2020 at 02:22:02PM +0900, Chanwoo Choi wrote: > > > On 1/17/20 8:12 AM, Matthias Kaehlcke wrote: > > > > Now that devfreq supports limiting the frequency range of a device > > > > through PM QoS make use of it instead of disabling OPPs that should > > > > not be used. > > > > > > > > The switch from disabling OPPs to PM QoS introduces a subtle behavioral > > > > change in case of conflicting requests (min > max): PM QoS gives > > > > precedence to the MIN_FREQUENCY request, while higher OPPs disabled > > > > with dev_pm_opp_disable() would override MIN_FREQUENCY. > > > > > > > > Signed-off-by: Matthias Kaehlcke <mka@chromium.org> > > > > --- > > > > > > > > Changes in v2: > > > > - added documentation for 'req_max_freq' > > > > - fixed jumps in of_devfreq_cooling_register_power() unwind > > > > - added comment about behavioral change to the commit message > > > > > > > > drivers/thermal/devfreq_cooling.c | 70 ++++++++++--------------------- > > > > 1 file changed, 23 insertions(+), 47 deletions(-) > > > > > > > > diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c > > > > index ef59256887ff63..cbbaf5bc425d1a 100644 > > > > --- a/drivers/thermal/devfreq_cooling.c > > > > +++ b/drivers/thermal/devfreq_cooling.c > > > > @@ -24,11 +24,13 @@ > > > > #include <linux/idr.h> > > > > #include <linux/slab.h> > > > > #include <linux/pm_opp.h> > > > > +#include <linux/pm_qos.h> > > > > #include <linux/thermal.h> > > > > #include <trace/events/thermal.h> > > > > -#define SCALE_ERROR_MITIGATION 100 > > > > +#define HZ_PER_KHZ 1000 > > > > +#define SCALE_ERROR_MITIGATION 100 > > > > static DEFINE_IDA(devfreq_ida); > > > > @@ -53,6 +55,8 @@ static DEFINE_IDA(devfreq_ida); > > > > * 'utilization' (which is 'busy_time / 'total_time'). > > > > * The 'res_util' range is from 100 to (power_table[state] * 100) > > > > * for the corresponding 'state'. > > > > + * @req_max_freq: PM QoS request for limiting the maximum frequency > > > > + * of the devfreq device. > > > > */ > > > > struct devfreq_cooling_device { > > > > int id; > > > > @@ -65,49 +69,9 @@ struct devfreq_cooling_device { > > > > struct devfreq_cooling_power *power_ops; > > > > u32 res_util; > > > > int capped_state; > > > > + struct dev_pm_qos_request req_max_freq; > > > > }; > > > > -/** > > > > - * partition_enable_opps() - disable all opps above a given state > > > > - * @dfc: Pointer to devfreq we are operating on > > > > - * @cdev_state: cooling device state we're setting > > > > - * > > > > - * Go through the OPPs of the device, enabling all OPPs until > > > > - * @cdev_state and disabling those frequencies above it. > > > > - */ > > > > -static int partition_enable_opps(struct devfreq_cooling_device *dfc, > > > > - unsigned long cdev_state) > > > > -{ > > > > - int i; > > > > - struct device *dev = dfc->devfreq->dev.parent; > > > > - > > > > - for (i = 0; i < dfc->freq_table_size; i++) { > > > > - struct dev_pm_opp *opp; > > > > - int ret = 0; > > > > - unsigned int freq = dfc->freq_table[i]; > > > > - bool want_enable = i >= cdev_state ? true : false; > > > > - > > > > - opp = dev_pm_opp_find_freq_exact(dev, freq, !want_enable); > > > > - > > > > - if (PTR_ERR(opp) == -ERANGE) > > > > - continue; > > > > - else if (IS_ERR(opp)) > > > > - return PTR_ERR(opp); > > > > - > > > > - dev_pm_opp_put(opp); > > > > - > > > > - if (want_enable) > > > > - ret = dev_pm_opp_enable(dev, freq); > > > > - else > > > > - ret = dev_pm_opp_disable(dev, freq); > > > > - > > > > - if (ret) > > > > - return ret; > > > > - } > > > > - > > > > - return 0; > > > > -} > > > > - > > > > static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev, > > > > unsigned long *state) > > > > { > > > > @@ -134,7 +98,7 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev, > > > > struct devfreq_cooling_device *dfc = cdev->devdata; > > > > struct devfreq *df = dfc->devfreq; > > > > struct device *dev = df->dev.parent; > > > > - int ret; > > > > + unsigned long freq; > > > > if (state == dfc->cooling_state) > > > > return 0; > > > > @@ -144,9 +108,10 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev, > > > > if (state >= dfc->freq_table_size) > > > > return -EINVAL; > > > > - ret = partition_enable_opps(dfc, state); > > > > - if (ret) > > > > - return ret; > > > > + freq = dfc->freq_table[state]; > > > > + > > > > + dev_pm_qos_update_request(&dfc->req_max_freq, > > > > + DIV_ROUND_UP(freq, HZ_PER_KHZ)); > > > > dfc->cooling_state = state; > > > > @@ -529,9 +494,15 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, > > > > if (err) > > > > goto free_dfc; > > > > - err = ida_simple_get(&devfreq_ida, 0, 0, GFP_KERNEL); > > > > + err = dev_pm_qos_add_request(df->dev.parent, &dfc->req_max_freq, > > > > + DEV_PM_QOS_MAX_FREQUENCY, > > > > + PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE); > > > > if (err < 0) > > > > goto free_tables; > > > > + > > > > + err = ida_simple_get(&devfreq_ida, 0, 0, GFP_KERNEL); > > > > + if (err < 0) > > > > + goto remove_qos_req; > > > > dfc->id = err; > > > > snprintf(dev_name, sizeof(dev_name), "thermal-devfreq-%d", dfc->id); > > > > @@ -552,6 +523,10 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, > > > > release_ida: > > > > ida_simple_remove(&devfreq_ida, dfc->id); > > > > + > > > > +remove_qos_req: > > > > + dev_pm_qos_remove_request(&dfc->req_max_freq); > > > > + > > > > free_tables: > > > > kfree(dfc->power_table); > > > > kfree(dfc->freq_table); > > > > @@ -600,6 +575,7 @@ void devfreq_cooling_unregister(struct thermal_cooling_device *cdev) > > > > thermal_cooling_device_unregister(dfc->cdev); > > > > ida_simple_remove(&devfreq_ida, dfc->id); > > > > + dev_pm_qos_remove_request(&dfc->req_max_freq); > > > > kfree(dfc->power_table); > > > > kfree(dfc->freq_table); > > > > > > > > > > Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com> > > > > > > -- > > > Best Regards, > > > Chanwoo Choi > > > Samsung Electronics
On 3/12/20 5:57 PM, Matthias Kaehlcke wrote: > Hi Lukasz, > > thanks for the review! > > I'll rebase and send v3. Hopefully it doesn't cause too much extra > work for your changes. No worries, your change is really needed. It will simply the code that I am working on (can be found here [1]). I don't have deal with the race with devfreq governor now. Thank you for working on it. Lukasz [1] https://lkml.org/lkml/2020/3/9/475
diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c index ef59256887ff63..cbbaf5bc425d1a 100644 --- a/drivers/thermal/devfreq_cooling.c +++ b/drivers/thermal/devfreq_cooling.c @@ -24,11 +24,13 @@ #include <linux/idr.h> #include <linux/slab.h> #include <linux/pm_opp.h> +#include <linux/pm_qos.h> #include <linux/thermal.h> #include <trace/events/thermal.h> -#define SCALE_ERROR_MITIGATION 100 +#define HZ_PER_KHZ 1000 +#define SCALE_ERROR_MITIGATION 100 static DEFINE_IDA(devfreq_ida); @@ -53,6 +55,8 @@ static DEFINE_IDA(devfreq_ida); * 'utilization' (which is 'busy_time / 'total_time'). * The 'res_util' range is from 100 to (power_table[state] * 100) * for the corresponding 'state'. + * @req_max_freq: PM QoS request for limiting the maximum frequency + * of the devfreq device. */ struct devfreq_cooling_device { int id; @@ -65,49 +69,9 @@ struct devfreq_cooling_device { struct devfreq_cooling_power *power_ops; u32 res_util; int capped_state; + struct dev_pm_qos_request req_max_freq; }; -/** - * partition_enable_opps() - disable all opps above a given state - * @dfc: Pointer to devfreq we are operating on - * @cdev_state: cooling device state we're setting - * - * Go through the OPPs of the device, enabling all OPPs until - * @cdev_state and disabling those frequencies above it. - */ -static int partition_enable_opps(struct devfreq_cooling_device *dfc, - unsigned long cdev_state) -{ - int i; - struct device *dev = dfc->devfreq->dev.parent; - - for (i = 0; i < dfc->freq_table_size; i++) { - struct dev_pm_opp *opp; - int ret = 0; - unsigned int freq = dfc->freq_table[i]; - bool want_enable = i >= cdev_state ? true : false; - - opp = dev_pm_opp_find_freq_exact(dev, freq, !want_enable); - - if (PTR_ERR(opp) == -ERANGE) - continue; - else if (IS_ERR(opp)) - return PTR_ERR(opp); - - dev_pm_opp_put(opp); - - if (want_enable) - ret = dev_pm_opp_enable(dev, freq); - else - ret = dev_pm_opp_disable(dev, freq); - - if (ret) - return ret; - } - - return 0; -} - static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev, unsigned long *state) { @@ -134,7 +98,7 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev, struct devfreq_cooling_device *dfc = cdev->devdata; struct devfreq *df = dfc->devfreq; struct device *dev = df->dev.parent; - int ret; + unsigned long freq; if (state == dfc->cooling_state) return 0; @@ -144,9 +108,10 @@ static int devfreq_cooling_set_cur_state(struct thermal_cooling_device *cdev, if (state >= dfc->freq_table_size) return -EINVAL; - ret = partition_enable_opps(dfc, state); - if (ret) - return ret; + freq = dfc->freq_table[state]; + + dev_pm_qos_update_request(&dfc->req_max_freq, + DIV_ROUND_UP(freq, HZ_PER_KHZ)); dfc->cooling_state = state; @@ -529,9 +494,15 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, if (err) goto free_dfc; - err = ida_simple_get(&devfreq_ida, 0, 0, GFP_KERNEL); + err = dev_pm_qos_add_request(df->dev.parent, &dfc->req_max_freq, + DEV_PM_QOS_MAX_FREQUENCY, + PM_QOS_MAX_FREQUENCY_DEFAULT_VALUE); if (err < 0) goto free_tables; + + err = ida_simple_get(&devfreq_ida, 0, 0, GFP_KERNEL); + if (err < 0) + goto remove_qos_req; dfc->id = err; snprintf(dev_name, sizeof(dev_name), "thermal-devfreq-%d", dfc->id); @@ -552,6 +523,10 @@ of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, release_ida: ida_simple_remove(&devfreq_ida, dfc->id); + +remove_qos_req: + dev_pm_qos_remove_request(&dfc->req_max_freq); + free_tables: kfree(dfc->power_table); kfree(dfc->freq_table); @@ -600,6 +575,7 @@ void devfreq_cooling_unregister(struct thermal_cooling_device *cdev) thermal_cooling_device_unregister(dfc->cdev); ida_simple_remove(&devfreq_ida, dfc->id); + dev_pm_qos_remove_request(&dfc->req_max_freq); kfree(dfc->power_table); kfree(dfc->freq_table);
Now that devfreq supports limiting the frequency range of a device through PM QoS make use of it instead of disabling OPPs that should not be used. The switch from disabling OPPs to PM QoS introduces a subtle behavioral change in case of conflicting requests (min > max): PM QoS gives precedence to the MIN_FREQUENCY request, while higher OPPs disabled with dev_pm_opp_disable() would override MIN_FREQUENCY. Signed-off-by: Matthias Kaehlcke <mka@chromium.org> --- Changes in v2: - added documentation for 'req_max_freq' - fixed jumps in of_devfreq_cooling_register_power() unwind - added comment about behavioral change to the commit message drivers/thermal/devfreq_cooling.c | 70 ++++++++++--------------------- 1 file changed, 23 insertions(+), 47 deletions(-)