Message ID | af14021b98254032e856397b54329756c1cc59c0.1566314535.git.leonard.crestez@nxp.com (mailing list archive) |
---|---|
State | Superseded |
Headers | show |
Series | PM / devfreq: Add dev_pm_qos support | expand |
On 19. 8. 21. 오전 12:24, Leonard Crestez wrote: > Now that devfreq supports dev_pm_qos requests we can use them to handle > the min/max_freq values set by userspace in sysfs, similar to cpufreq. > > Since dev_pm_qos handles frequencies as kHz this change reduces the > precision of min_freq and max_freq. This shouldn't introduce problems > because frequencies which are not an integer number of kHz are likely > not an integer number of Hz either. > > Try to ensure compatibilitity by rounding min values down and rounding > max values up. > > Simplify the {min,max}_freq_store code by setting "null" values of 0 and > MAX_S32 respectively instead of clamping to what freq tables are > actually supported. Values are already automatically clamped on > readback. > > Also simplify by droping the limitation that userspace min_freq must be > lower than userspace max_freq, it is already documented that max_freq > takes precedence. > > Signed-off-by: Leonard Crestez <leonard.crestez@nxp.com> > --- > drivers/devfreq/devfreq.c | 79 ++++++++++++++++----------------------- > include/linux/devfreq.h | 9 +++-- > 2 files changed, 38 insertions(+), 50 deletions(-) > > diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c > index 58deffa52a37..687deadd08ed 100644 > --- a/drivers/devfreq/devfreq.c > +++ b/drivers/devfreq/devfreq.c > @@ -101,21 +101,21 @@ static unsigned long find_available_max_freq(struct devfreq *devfreq) > > static unsigned long get_effective_min_freq(struct devfreq *devfreq) > { > lockdep_assert_held(&devfreq->lock); > > - return max3(devfreq->scaling_min_freq, devfreq->min_freq, > + return max(devfreq->scaling_min_freq, > 1000 * (unsigned long)dev_pm_qos_read_value( > devfreq->dev.parent, > DEV_PM_QOS_MIN_FREQUENCY)); > } > > static unsigned long get_effective_max_freq(struct devfreq *devfreq) > { > lockdep_assert_held(&devfreq->lock); > > - return min3(devfreq->scaling_max_freq, devfreq->max_freq, > + return min(devfreq->scaling_max_freq, > 1000 * (unsigned long)dev_pm_qos_read_value( > devfreq->dev.parent, > DEV_PM_QOS_MAX_FREQUENCY)); > } > > @@ -644,10 +644,12 @@ static void devfreq_dev_release(struct device *dev) > > dev_pm_qos_remove_notifier(devfreq->dev.parent, &devfreq->nb_max, > DEV_PM_QOS_MAX_FREQUENCY); > dev_pm_qos_remove_notifier(devfreq->dev.parent, &devfreq->nb_min, > DEV_PM_QOS_MIN_FREQUENCY); > + dev_pm_qos_remove_request(&devfreq->max_freq_req); > + dev_pm_qos_remove_request(&devfreq->min_freq_req); > mutex_destroy(&devfreq->lock); > kfree(devfreq); > } > > /** > @@ -698,10 +700,19 @@ struct devfreq *devfreq_add_device(struct device *dev, > devfreq->previous_freq = profile->initial_freq; > devfreq->last_status.current_frequency = profile->initial_freq; > devfreq->data = data; > devfreq->nb.notifier_call = devfreq_notifier_call; > > + err = dev_pm_qos_add_request(dev, &devfreq->min_freq_req, > + DEV_PM_QOS_MIN_FREQUENCY, 0); > + if (err < 0) > + goto err_dev; > + err = dev_pm_qos_add_request(dev, &devfreq->max_freq_req, > + DEV_PM_QOS_MAX_FREQUENCY, S32_MAX); > + if (err < 0) > + goto err_dev; > + Please move them under dev_pm_qos_add_notifier() because it seems that it request the qos without qos_notifier. > /* > * notifier from pm_qos > * > * initialized outside of devfreq->lock to avoid circular warning > * between devfreq->lock and dev_pm_qos_mtx > @@ -732,19 +743,17 @@ struct devfreq *devfreq_add_device(struct device *dev, > if (!devfreq->scaling_min_freq) { > mutex_unlock(&devfreq->lock); > err = -EINVAL; > goto err_dev; > } > - devfreq->min_freq = devfreq->scaling_min_freq; > > devfreq->scaling_max_freq = find_available_max_freq(devfreq); > if (!devfreq->scaling_max_freq) { > mutex_unlock(&devfreq->lock); > err = -EINVAL; > goto err_dev; > } > - devfreq->max_freq = devfreq->scaling_max_freq; > > devfreq->suspend_freq = dev_pm_opp_get_suspend_opp_freq(dev); > atomic_set(&devfreq->suspend_count, 0); > > dev_set_name(&devfreq->dev, "devfreq%d", > @@ -816,10 +825,14 @@ struct devfreq *devfreq_add_device(struct device *dev, > err_dev: > dev_pm_qos_remove_notifier(devfreq->dev.parent, &devfreq->nb_max, > DEV_PM_QOS_MAX_FREQUENCY); > dev_pm_qos_remove_notifier(devfreq->dev.parent, &devfreq->nb_min, > DEV_PM_QOS_MIN_FREQUENCY); > + if (dev_pm_qos_request_active(&devfreq->max_freq_req)) > + dev_pm_qos_remove_request(&devfreq->max_freq_req); > + if (dev_pm_qos_request_active(&devfreq->min_freq_req)) > + dev_pm_qos_remove_request(&devfreq->min_freq_req); > kfree(devfreq); > err_out: > return ERR_PTR(err); > } > EXPORT_SYMBOL(devfreq_add_device); > @@ -1358,33 +1371,20 @@ static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr, > > ret = sscanf(buf, "%lu", &value); > if (ret != 1) > return -EINVAL; > > - mutex_lock(&df->lock); > - > - if (value) { > - if (value > df->max_freq) { > - ret = -EINVAL; > - goto unlock; > - } Actually, the user can input the value they want. So, the above code is not necessary because the devfreq->scaling_max_freq is never overflow from supported maximum frequency. The devfreq->scaling_max_freq is based on OPP entries from DT. But, if we replace the existing request way of devfreq-cooling.c with dev_pm_qos, devfreq->scaling_max_freq doesn't guarantee the supported maximum frequency. We need to keep the supported min_freq/max_freq value without dev_pm_qos requirement because the dev_pm_qos requirement might have the overflow value. the dev_pm_qos doesn't know the supported minimum and maximum frequency of devfreq device. > - } else { > - unsigned long *freq_table = df->profile->freq_table; > + if (value) > + value = value / 1000; Better to use KHZ definition instead of 1000 as I commented on patch1. > + else > + value = 0; > > - /* Get minimum frequency according to sorting order */ > - if (freq_table[0] < freq_table[df->profile->max_state - 1]) > - value = freq_table[0]; > - else > - value = freq_table[df->profile->max_state - 1]; > - } > + ret = dev_pm_qos_update_request(&df->min_freq_req, value); > + if (ret < 0) > + return ret; > > - df->min_freq = value; > - update_devfreq(df); > - ret = count; > -unlock: > - mutex_unlock(&df->lock); > - return ret; > + return count; > } > > static ssize_t min_freq_show(struct device *dev, struct device_attribute *attr, > char *buf) > { > @@ -1407,33 +1407,20 @@ static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr, > > ret = sscanf(buf, "%lu", &value); > if (ret != 1) > return -EINVAL; > > - mutex_lock(&df->lock); > - > - if (value) { > - if (value < df->min_freq) { > - ret = -EINVAL; > - goto unlock; > - } > - } else { > - unsigned long *freq_table = df->profile->freq_table; > + if (value) > + value = DIV_ROUND_UP(value, 1000); > + else > + value = S32_MAX; > > - /* Get maximum frequency according to sorting order */ > - if (freq_table[0] < freq_table[df->profile->max_state - 1]) > - value = freq_table[df->profile->max_state - 1]; > - else > - value = freq_table[0]; > - } > + ret = dev_pm_qos_update_request(&df->max_freq_req, value); > + if (ret < 0) > + return ret; > > - df->max_freq = value; > - update_devfreq(df); > - ret = count; > -unlock: > - mutex_unlock(&df->lock); > - return ret; > + return count; > } > static DEVICE_ATTR_RW(min_freq); > > static ssize_t max_freq_show(struct device *dev, struct device_attribute *attr, > char *buf) > diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h > index 8b92ccbd1962..d2c5bb7add0a 100644 > --- a/include/linux/devfreq.h > +++ b/include/linux/devfreq.h > @@ -11,10 +11,11 @@ > #define __LINUX_DEVFREQ_H__ > > #include <linux/device.h> > #include <linux/notifier.h> > #include <linux/pm_opp.h> > +#include <linux/pm_qos.h> > > #define DEVFREQ_NAME_LEN 16 > > /* DEVFREQ governor name */ > #define DEVFREQ_GOV_SIMPLE_ONDEMAND "simple_ondemand" > @@ -121,12 +122,12 @@ struct devfreq_dev_profile { > * devfreq.nb to the corresponding register notifier call chain. > * @work: delayed work for load monitoring. > * @previous_freq: previously configured frequency value. > * @data: Private data of the governor. The devfreq framework does not > * touch this. > - * @min_freq: Limit minimum frequency requested by user (0: none) > - * @max_freq: Limit maximum frequency requested by user (0: none) > + * @min_freq_req: Limit minimum frequency requested by user (0: none) > + * @max_freq_req: Limit maximum frequency requested by user (0: none) > * @scaling_min_freq: Limit minimum frequency requested by OPP interface > * @scaling_max_freq: Limit maximum frequency requested by OPP interface > * @stop_polling: devfreq polling status of a device. > * @suspend_freq: frequency of a device set during suspend phase. > * @resume_freq: frequency of a device set in resume phase. > @@ -161,12 +162,12 @@ struct devfreq { > unsigned long previous_freq; > struct devfreq_dev_status last_status; > > void *data; /* private data for governors */ > > - unsigned long min_freq; > - unsigned long max_freq; > + struct dev_pm_qos_request min_freq_req; > + struct dev_pm_qos_request max_freq_req; > unsigned long scaling_min_freq; > unsigned long scaling_max_freq; > bool stop_polling; > > unsigned long suspend_freq; >
On 21.08.2019 05:02, Chanwoo Choi wrote: > On 19. 8. 21. 오전 12:24, Leonard Crestez wrote: >> Now that devfreq supports dev_pm_qos requests we can use them to handle >> the min/max_freq values set by userspace in sysfs, similar to cpufreq. >> >> Since dev_pm_qos handles frequencies as kHz this change reduces the >> precision of min_freq and max_freq. This shouldn't introduce problems >> because frequencies which are not an integer number of kHz are likely >> not an integer number of Hz either. >> >> Try to ensure compatibilitity by rounding min values down and rounding >> max values up. >> >> Simplify the {min,max}_freq_store code by setting "null" values of 0 and >> MAX_S32 respectively instead of clamping to what freq tables are >> actually supported. Values are already automatically clamped on >> readback. >> >> Also simplify by droping the limitation that userspace min_freq must be >> lower than userspace max_freq, it is already documented that max_freq >> takes precedence. >> >> @@ -1358,33 +1371,20 @@ static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr, >> >> ret = sscanf(buf, "%lu", &value); >> if (ret != 1) >> return -EINVAL; >> >> - mutex_lock(&df->lock); >> - >> - if (value) { >> - if (value > df->max_freq) { >> - ret = -EINVAL; >> - goto unlock; >> - } > > Actually, the user can input the value they want. > So, the above code is not necessary because the devfreq->scaling_max_freq > is never overflow from supported maximum frequency. The devfreq->scaling_max_freq > is based on OPP entries from DT. > > But, if we replace the existing request way of devfreq-cooling.c > with dev_pm_qos, devfreq->scaling_max_freq doesn't guarantee > the supported maximum frequency. > > We need to keep the supported min_freq/max_freq value without dev_pm_qos > requirement because the dev_pm_qos requirement might have the overflow value. > the dev_pm_qos doesn't know the supported minimum and maximum frequency > of devfreq device. I'm not sure I understand what you mean. My patch allows user to set entirely arbitrary min/max rates and this is good because we already have a well-defined way to handle this: max overrides min. The scaling_min_freq and scaling_max_freq variables can just be kept around indefinitely no matter what happens to thermal. They're just a cache for dev_pm_opp_find_freq_ceil and dev_pm_opp_find_freq_floor. BTW: I noticed that scaling_min_freq and scaling_max_freq are updated in devfreq_notifier_call but devfreq->nb is not registered by default, only when a driver requests it explicitly. Is there a reason for this? Because I'd call dev_pm_opp_register_notifier inside devfreq_add_device and remove all the devfreq_register/unregister_notifier APIs. -- Regards, Leonard
On 19. 8. 21. 오후 10:03, Leonard Crestez wrote: > On 21.08.2019 05:02, Chanwoo Choi wrote: >> On 19. 8. 21. 오전 12:24, Leonard Crestez wrote: >>> Now that devfreq supports dev_pm_qos requests we can use them to handle >>> the min/max_freq values set by userspace in sysfs, similar to cpufreq. >>> >>> Since dev_pm_qos handles frequencies as kHz this change reduces the >>> precision of min_freq and max_freq. This shouldn't introduce problems >>> because frequencies which are not an integer number of kHz are likely >>> not an integer number of Hz either. >>> >>> Try to ensure compatibilitity by rounding min values down and rounding >>> max values up. >>> >>> Simplify the {min,max}_freq_store code by setting "null" values of 0 and >>> MAX_S32 respectively instead of clamping to what freq tables are >>> actually supported. Values are already automatically clamped on >>> readback. >>> >>> Also simplify by droping the limitation that userspace min_freq must be >>> lower than userspace max_freq, it is already documented that max_freq >>> takes precedence. >>> >>> @@ -1358,33 +1371,20 @@ static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr, >>> >>> ret = sscanf(buf, "%lu", &value); >>> if (ret != 1) >>> return -EINVAL; >>> >>> - mutex_lock(&df->lock); >>> - >>> - if (value) { >>> - if (value > df->max_freq) { >>> - ret = -EINVAL; >>> - goto unlock; >>> - } >> >> Actually, the user can input the value they want. >> So, the above code is not necessary because the devfreq->scaling_max_freq >> is never overflow from supported maximum frequency. The devfreq->scaling_max_freq >> is based on OPP entries from DT. >> >> But, if we replace the existing request way of devfreq-cooling.c >> with dev_pm_qos, devfreq->scaling_max_freq doesn't guarantee >> the supported maximum frequency. > >> We need to keep the supported min_freq/max_freq value without dev_pm_qos >> requirement because the dev_pm_qos requirement might have the overflow value. >> the dev_pm_qos doesn't know the supported minimum and maximum frequency >> of devfreq device. > > I'm not sure I understand what you mean. My patch allows user to set > entirely arbitrary min/max rates and this is good because we already > have a well-defined way to handle this: max overrides min. > > The scaling_min_freq and scaling_max_freq variables can just be kept > around indefinitely no matter what happens to thermal. They're just a > cache for dev_pm_opp_find_freq_ceil and dev_pm_opp_find_freq_floor. This patch doesn't check the range of input value with the supported frequencies of devfreq device. For example, The devfreq0 device has the following available frequencies 100000000 200000000 300000000 and then user enters the 500000000 as following: echo 500000000 > /sys/class/devfreq/devfreq0/min_freq In result, get_effective_min_freq() will return the 500000000. It is wrong value. it show the unsupported frequency through min_freq sysfs path. - devfreq->scaling_min_freq is 100000000, - 1000 * (unsigned long)dev_pm_qos_read_value(devfreq->dev.parent, DEV_PM_QOS_MIN_FREQUENCY)); is 500000000 > > BTW: I noticed that scaling_min_freq and scaling_max_freq are updated in > devfreq_notifier_call but devfreq->nb is not registered by default, only > when a driver requests it explicitly. Is there a reason for this? Initial version of devfreq has not forced to use the OPP interface as the mandatory. So, it just provides the external function devfreq_register_opp_notifier. But, We can call 'devfreq_register_opp_notifier' during devfreq_add_device() because the OPP interface is mandatory for devfreq. > Because I'd call dev_pm_opp_register_notifier inside devfreq_add_device > and remove all the devfreq_register/unregister_notifier APIs. OK. > > -- > Regards, > Leonard >
On 8/22/2019 1:03 PM, Chanwoo Choi wrote: > On 19. 8. 21. 오후 10:03, Leonard Crestez wrote: >> On 21.08.2019 05:02, Chanwoo Choi wrote: >>> On 19. 8. 21. 오전 12:24, Leonard Crestez wrote: >>>> Now that devfreq supports dev_pm_qos requests we can use them to handle >>>> the min/max_freq values set by userspace in sysfs, similar to cpufreq. >>>> >>>> Since dev_pm_qos handles frequencies as kHz this change reduces the >>>> precision of min_freq and max_freq. This shouldn't introduce problems >>>> because frequencies which are not an integer number of kHz are likely >>>> not an integer number of Hz either. >>>> >>>> Try to ensure compatibilitity by rounding min values down and rounding >>>> max values up. >>>> >>>> Simplify the {min,max}_freq_store code by setting "null" values of 0 and >>>> MAX_S32 respectively instead of clamping to what freq tables are >>>> actually supported. Values are already automatically clamped on >>>> readback. >>>> >>>> Also simplify by droping the limitation that userspace min_freq must be >>>> lower than userspace max_freq, it is already documented that max_freq >>>> takes precedence. >>>> >>>> @@ -1358,33 +1371,20 @@ static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr, >>>> >>>> ret = sscanf(buf, "%lu", &value); >>>> if (ret != 1) >>>> return -EINVAL; >>>> >>>> - mutex_lock(&df->lock); >>>> - >>>> - if (value) { >>>> - if (value > df->max_freq) { >>>> - ret = -EINVAL; >>>> - goto unlock; >>>> - } >>> >>> Actually, the user can input the value they want. >>> So, the above code is not necessary because the devfreq->scaling_max_freq >>> is never overflow from supported maximum frequency. The devfreq->scaling_max_freq >>> is based on OPP entries from DT. >>> >>> But, if we replace the existing request way of devfreq-cooling.c >>> with dev_pm_qos, devfreq->scaling_max_freq doesn't guarantee >>> the supported maximum frequency. > >>> We need to keep the supported min_freq/max_freq value without dev_pm_qos >>> requirement because the dev_pm_qos requirement might have the overflow value. >>> the dev_pm_qos doesn't know the supported minimum and maximum frequency >>> of devfreq device. >> >> I'm not sure I understand what you mean. My patch allows user to set >> entirely arbitrary min/max rates and this is good because we already >> have a well-defined way to handle this: max overrides min. >> >> The scaling_min_freq and scaling_max_freq variables can just be kept >> around indefinitely no matter what happens to thermal. They're just a >> cache for dev_pm_opp_find_freq_ceil and dev_pm_opp_find_freq_floor. > > This patch doesn't check the range of input value > with the supported frequencies of devfreq device. > > For example, > The devfreq0 device has the following available frequencies > 100000000 200000000 300000000 > > and then user enters the 500000000 as following: > echo 500000000 > /sys/class/devfreq/devfreq0/min_freq > > In result, get_effective_min_freq() will return the 500000000. > It is wrong value. it show the unsupported frequency through > min_freq sysfs path. Through dev_pm_qos devices can also ask for a freq higher than the maximum OPP and unlike sysfs there is no way to reject such requests, instead PM qos claims it's based on "best effort". There are many requests in the kernel for "PM_QOS_CPU_DMA_LATENCY 0" and I think that DEV_PM_QOS_MIN_FREQUENCY, MAX_S32 is a reasonable way for a device to request "max performance" from devfreq. Rejecting min > max is complicated (what happens to rejected requests when max goes back up?) and I think it's better to just stick with a "max overrides min" policy since it can already deal with conflicts. Do you have a usecase for rejecting out-of-range min_freq from userspace? cpufreq also accepts arbitrary min/max values and handles them. In theory pm_qos could be extended to differentiate between "soft" requests (current behavior) and "hard" requests which return errors if they can't be guaranteed but this is far out of scope. -- Regards, Leonard
On 19. 8. 22. 오후 7:58, Leonard Crestez wrote: > On 8/22/2019 1:03 PM, Chanwoo Choi wrote: >> On 19. 8. 21. 오후 10:03, Leonard Crestez wrote: >>> On 21.08.2019 05:02, Chanwoo Choi wrote: >>>> On 19. 8. 21. 오전 12:24, Leonard Crestez wrote: >>>>> Now that devfreq supports dev_pm_qos requests we can use them to handle >>>>> the min/max_freq values set by userspace in sysfs, similar to cpufreq. >>>>> >>>>> Since dev_pm_qos handles frequencies as kHz this change reduces the >>>>> precision of min_freq and max_freq. This shouldn't introduce problems >>>>> because frequencies which are not an integer number of kHz are likely >>>>> not an integer number of Hz either. >>>>> >>>>> Try to ensure compatibilitity by rounding min values down and rounding >>>>> max values up. >>>>> >>>>> Simplify the {min,max}_freq_store code by setting "null" values of 0 and >>>>> MAX_S32 respectively instead of clamping to what freq tables are >>>>> actually supported. Values are already automatically clamped on >>>>> readback. >>>>> >>>>> Also simplify by droping the limitation that userspace min_freq must be >>>>> lower than userspace max_freq, it is already documented that max_freq >>>>> takes precedence. >>>>> >>>>> @@ -1358,33 +1371,20 @@ static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr, >>>>> >>>>> ret = sscanf(buf, "%lu", &value); >>>>> if (ret != 1) >>>>> return -EINVAL; >>>>> >>>>> - mutex_lock(&df->lock); >>>>> - >>>>> - if (value) { >>>>> - if (value > df->max_freq) { >>>>> - ret = -EINVAL; >>>>> - goto unlock; >>>>> - } >>>> >>>> Actually, the user can input the value they want. >>>> So, the above code is not necessary because the devfreq->scaling_max_freq >>>> is never overflow from supported maximum frequency. The devfreq->scaling_max_freq >>>> is based on OPP entries from DT. >>>> >>>> But, if we replace the existing request way of devfreq-cooling.c >>>> with dev_pm_qos, devfreq->scaling_max_freq doesn't guarantee >>>> the supported maximum frequency. > >>>> We need to keep the supported min_freq/max_freq value without dev_pm_qos >>>> requirement because the dev_pm_qos requirement might have the overflow value. >>>> the dev_pm_qos doesn't know the supported minimum and maximum frequency >>>> of devfreq device. >>> >>> I'm not sure I understand what you mean. My patch allows user to set >>> entirely arbitrary min/max rates and this is good because we already >>> have a well-defined way to handle this: max overrides min. >>> >>> The scaling_min_freq and scaling_max_freq variables can just be kept >>> around indefinitely no matter what happens to thermal. They're just a >>> cache for dev_pm_opp_find_freq_ceil and dev_pm_opp_find_freq_floor. >> >> This patch doesn't check the range of input value >> with the supported frequencies of devfreq device. >> >> For example, >> The devfreq0 device has the following available frequencies >> 100000000 200000000 300000000 >> >> and then user enters the 500000000 as following: >> echo 500000000 > /sys/class/devfreq/devfreq0/min_freq >> >> In result, get_effective_min_freq() will return the 500000000. >> It is wrong value. it show the unsupported frequency through >> min_freq sysfs path. > > Through dev_pm_qos devices can also ask for a freq higher than the > maximum OPP and unlike sysfs there is no way to reject such requests, > instead PM qos claims it's based on "best effort". > > There are many requests in the kernel for "PM_QOS_CPU_DMA_LATENCY 0" and > I think that DEV_PM_QOS_MIN_FREQUENCY, MAX_S32 is a reasonable way for a > device to request "max performance" from devfreq. > > Rejecting min > max is complicated (what happens to rejected requests > when max goes back up?) and I think it's better to just stick with a > "max overrides min" policy since it can already deal with conflicts. > > Do you have a usecase for rejecting out-of-range min_freq from > userspace? cpufreq also accepts arbitrary min/max values and handles them. I don't mean the rejecting when user enter the out-of-range frequency. As I commented, user can enter the value they want. But, we should check the range of input because devfreq have to show the correct supported frequency through sysfs. > > In theory pm_qos could be extended to differentiate between "soft" > requests (current behavior) and "hard" requests which return errors if > they can't be guaranteed but this is far out of scope. I think that you agreed the limitation of dev_pm_qos when entering or requesting the unsupported frequency. Until fixing it on dev_pm_qos, it have to be handled on consumer like devfreq. I think that get_min_freq() and get_max_freq() have to check the range of return value of dev_pm_qos_read_value().
On 22.08.2019 14:06, Chanwoo Choi wrote: > On 19. 8. 22. 오후 7:58, Leonard Crestez wrote: >> On 8/22/2019 1:03 PM, Chanwoo Choi wrote: >>> This patch doesn't check the range of input value >>> with the supported frequencies of devfreq device. >>> >>> For example, >>> The devfreq0 device has the following available frequencies >>> 100000000 200000000 300000000 >>> >>> and then user enters the 500000000 as following: >>> echo 500000000 > /sys/class/devfreq/devfreq0/min_freq >>> >>> In result, get_effective_min_freq() will return the 500000000. >>> It is wrong value. it show the unsupported frequency through >>> min_freq sysfs path. >> >> Through dev_pm_qos devices can also ask for a freq higher than the >> maximum OPP and unlike sysfs there is no way to reject such requests, >> instead PM qos claims it's based on "best effort". >> >> There are many requests in the kernel for "PM_QOS_CPU_DMA_LATENCY 0" and >> I think that DEV_PM_QOS_MIN_FREQUENCY, MAX_S32 is a reasonable way for a >> device to request "max performance" from devfreq. >> >> Rejecting min > max is complicated (what happens to rejected requests >> when max goes back up?) and I think it's better to just stick with a >> "max overrides min" policy since it can already deal with conflicts. >> >> Do you have a usecase for rejecting out-of-range min_freq from >> userspace? cpufreq also accepts arbitrary min/max values and handles them. > > I don't mean the rejecting when user enter the out-of-range frequency. > As I commented, user can enter the value they want. But, we should > check the range of input because devfreq have to show the correct supported > frequency through sysfs. We can avoid showing an out-of-range value in min_freq by printing min(min_freq, max_freq). The max_freq value from pm_qos can still be between OPPs so maybe print devfreq_recommended_opp(max_freq, DEVFREQ_FLAG_LEAST_UPPER_BOUND). >> In theory pm_qos could be extended to differentiate between "soft" >> requests (current behavior) and "hard" requests which return errors if >> they can't be guaranteed but this is far out of scope. > > I think that you agreed the limitation of dev_pm_qos when entering > or requesting the unsupported frequency. Yes, "best effort qos" is acceptable for now.
On 19. 8. 22. 오후 9:24, Leonard Crestez wrote: > On 22.08.2019 14:06, Chanwoo Choi wrote: >> On 19. 8. 22. 오후 7:58, Leonard Crestez wrote: >>> On 8/22/2019 1:03 PM, Chanwoo Choi wrote: >>>> This patch doesn't check the range of input value >>>> with the supported frequencies of devfreq device. >>>> >>>> For example, >>>> The devfreq0 device has the following available frequencies >>>> 100000000 200000000 300000000 >>>> >>>> and then user enters the 500000000 as following: >>>> echo 500000000 > /sys/class/devfreq/devfreq0/min_freq >>>> >>>> In result, get_effective_min_freq() will return the 500000000. >>>> It is wrong value. it show the unsupported frequency through >>>> min_freq sysfs path. >>> >>> Through dev_pm_qos devices can also ask for a freq higher than the >>> maximum OPP and unlike sysfs there is no way to reject such requests, >>> instead PM qos claims it's based on "best effort". >>> >>> There are many requests in the kernel for "PM_QOS_CPU_DMA_LATENCY 0" and >>> I think that DEV_PM_QOS_MIN_FREQUENCY, MAX_S32 is a reasonable way for a >>> device to request "max performance" from devfreq. >>> >>> Rejecting min > max is complicated (what happens to rejected requests >>> when max goes back up?) and I think it's better to just stick with a >>> "max overrides min" policy since it can already deal with conflicts. >>> >>> Do you have a usecase for rejecting out-of-range min_freq from >>> userspace? cpufreq also accepts arbitrary min/max values and handles them. >> >> I don't mean the rejecting when user enter the out-of-range frequency. >> As I commented, user can enter the value they want. But, we should >> check the range of input because devfreq have to show the correct supported >> frequency through sysfs. > > We can avoid showing an out-of-range value in min_freq by printing > min(min_freq, max_freq). You can check the range of frequency in the get_min_freq()/get_max_freq(). I want to return the meaningful frequency from get_min_freq()/get_max_freq(). Everyone expects get_min_freq()/get_max_freq() functions will retunrn the supported min/max frequency. If get_min_freq()/get_max_freq() return the our-of-range frequency, it is not nice and cause the confusion why these functions return the out-of-range frequency.. Also, if we don't check the return value of dev_pm_qos_read_value(), the out-of-range frequency of dev_pm_qos_read_value() would be used on multiple point of devfreq.c. I think that it is not good. > > The max_freq value from pm_qos can still be between OPPs so maybe print > devfreq_recommended_opp(max_freq, DEVFREQ_FLAG_LEAST_UPPER_BOUND). > >>> In theory pm_qos could be extended to differentiate between "soft" >>> requests (current behavior) and "hard" requests which return errors if >>> they can't be guaranteed but this is far out of scope. >> >> I think that you agreed the limitation of dev_pm_qos when entering >> or requesting the unsupported frequency. > > Yes, "best effort qos" is acceptable for now. > And please don't remove the my previous comment. Just reply your opinion without any removal.
On 23.08.2019 11:36, Chanwoo Choi wrote: > On 19. 8. 22. 오후 9:24, Leonard Crestez wrote: >> On 22.08.2019 14:06, Chanwoo Choi wrote: >>> On 19. 8. 22. 오후 7:58, Leonard Crestez wrote: >>>> On 8/22/2019 1:03 PM, Chanwoo Choi wrote: >>>>> This patch doesn't check the range of input value >>>>> with the supported frequencies of devfreq device. >>>>> >>>>> For example, >>>>> The devfreq0 device has the following available frequencies >>>>> 100000000 200000000 300000000 >>>>> >>>>> and then user enters the 500000000 as following: >>>>> echo 500000000 > /sys/class/devfreq/devfreq0/min_freq >>>>> >>>>> In result, get_effective_min_freq() will return the 500000000. >>>>> It is wrong value. it show the unsupported frequency through >>>>> min_freq sysfs path. >>>> >>>> Through dev_pm_qos devices can also ask for a freq higher than the >>>> maximum OPP and unlike sysfs there is no way to reject such requests, >>>> instead PM qos claims it's based on "best effort". >>>> >>>> There are many requests in the kernel for "PM_QOS_CPU_DMA_LATENCY 0" and >>>> I think that DEV_PM_QOS_MIN_FREQUENCY, MAX_S32 is a reasonable way for a >>>> device to request "max performance" from devfreq. >>>> >>>> Rejecting min > max is complicated (what happens to rejected requests >>>> when max goes back up?) and I think it's better to just stick with a >>>> "max overrides min" policy since it can already deal with conflicts. >>>> >>>> Do you have a usecase for rejecting out-of-range min_freq from >>>> userspace? cpufreq also accepts arbitrary min/max values and handles them. >>> >>> I don't mean the rejecting when user enter the out-of-range frequency. >>> As I commented, user can enter the value they want. But, we should >>> check the range of input because devfreq have to show the correct supported >>> frequency through sysfs. >> >> We can avoid showing an out-of-range value in min_freq by printing >> min(min_freq, max_freq). > > You can check the range of frequency in the get_min_freq()/get_max_freq(). > I want to return the meaningful frequency from get_min_freq()/get_max_freq(). > > Everyone expects get_min_freq()/get_max_freq() functions will retunrn > the supported min/max frequency. If get_min_freq()/get_max_freq() > return the our-of-range frequency, it is not nice and cause the confusion > why these functions return the out-of-range frequency.. > > Also, if we don't check the return value of dev_pm_qos_read_value(), > the out-of-range frequency of dev_pm_qos_read_value() would be used > on multiple point of devfreq.c. I think that it is not good. OK, I will try to partially move the min/max conflict logic from "update_devfreq" to a "get_freq_range" function and call that from show_min_freq/show_max_freq. >> The max_freq value from pm_qos can still be between OPPs so maybe print >> devfreq_recommended_opp(max_freq, DEVFREQ_FLAG_LEAST_UPPER_BOUND). >> >>>> In theory pm_qos could be extended to differentiate between "soft" >>>> requests (current behavior) and "hard" requests which return errors if >>>> they can't be guaranteed but this is far out of scope. >>> >>> I think that you agreed the limitation of dev_pm_qos when entering >>> or requesting the unsupported frequency. >> >> Yes, "best effort qos" is acceptable for now. > > And please don't remove the my previous comment. > Just reply your opinion without any removal. Sorry, when responding I usually trim sections which are very old or where there appears to be broad agreement. -- Regards, Leonard
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c index 58deffa52a37..687deadd08ed 100644 --- a/drivers/devfreq/devfreq.c +++ b/drivers/devfreq/devfreq.c @@ -101,21 +101,21 @@ static unsigned long find_available_max_freq(struct devfreq *devfreq) static unsigned long get_effective_min_freq(struct devfreq *devfreq) { lockdep_assert_held(&devfreq->lock); - return max3(devfreq->scaling_min_freq, devfreq->min_freq, + return max(devfreq->scaling_min_freq, 1000 * (unsigned long)dev_pm_qos_read_value( devfreq->dev.parent, DEV_PM_QOS_MIN_FREQUENCY)); } static unsigned long get_effective_max_freq(struct devfreq *devfreq) { lockdep_assert_held(&devfreq->lock); - return min3(devfreq->scaling_max_freq, devfreq->max_freq, + return min(devfreq->scaling_max_freq, 1000 * (unsigned long)dev_pm_qos_read_value( devfreq->dev.parent, DEV_PM_QOS_MAX_FREQUENCY)); } @@ -644,10 +644,12 @@ static void devfreq_dev_release(struct device *dev) dev_pm_qos_remove_notifier(devfreq->dev.parent, &devfreq->nb_max, DEV_PM_QOS_MAX_FREQUENCY); dev_pm_qos_remove_notifier(devfreq->dev.parent, &devfreq->nb_min, DEV_PM_QOS_MIN_FREQUENCY); + dev_pm_qos_remove_request(&devfreq->max_freq_req); + dev_pm_qos_remove_request(&devfreq->min_freq_req); mutex_destroy(&devfreq->lock); kfree(devfreq); } /** @@ -698,10 +700,19 @@ struct devfreq *devfreq_add_device(struct device *dev, devfreq->previous_freq = profile->initial_freq; devfreq->last_status.current_frequency = profile->initial_freq; devfreq->data = data; devfreq->nb.notifier_call = devfreq_notifier_call; + err = dev_pm_qos_add_request(dev, &devfreq->min_freq_req, + DEV_PM_QOS_MIN_FREQUENCY, 0); + if (err < 0) + goto err_dev; + err = dev_pm_qos_add_request(dev, &devfreq->max_freq_req, + DEV_PM_QOS_MAX_FREQUENCY, S32_MAX); + if (err < 0) + goto err_dev; + /* * notifier from pm_qos * * initialized outside of devfreq->lock to avoid circular warning * between devfreq->lock and dev_pm_qos_mtx @@ -732,19 +743,17 @@ struct devfreq *devfreq_add_device(struct device *dev, if (!devfreq->scaling_min_freq) { mutex_unlock(&devfreq->lock); err = -EINVAL; goto err_dev; } - devfreq->min_freq = devfreq->scaling_min_freq; devfreq->scaling_max_freq = find_available_max_freq(devfreq); if (!devfreq->scaling_max_freq) { mutex_unlock(&devfreq->lock); err = -EINVAL; goto err_dev; } - devfreq->max_freq = devfreq->scaling_max_freq; devfreq->suspend_freq = dev_pm_opp_get_suspend_opp_freq(dev); atomic_set(&devfreq->suspend_count, 0); dev_set_name(&devfreq->dev, "devfreq%d", @@ -816,10 +825,14 @@ struct devfreq *devfreq_add_device(struct device *dev, err_dev: dev_pm_qos_remove_notifier(devfreq->dev.parent, &devfreq->nb_max, DEV_PM_QOS_MAX_FREQUENCY); dev_pm_qos_remove_notifier(devfreq->dev.parent, &devfreq->nb_min, DEV_PM_QOS_MIN_FREQUENCY); + if (dev_pm_qos_request_active(&devfreq->max_freq_req)) + dev_pm_qos_remove_request(&devfreq->max_freq_req); + if (dev_pm_qos_request_active(&devfreq->min_freq_req)) + dev_pm_qos_remove_request(&devfreq->min_freq_req); kfree(devfreq); err_out: return ERR_PTR(err); } EXPORT_SYMBOL(devfreq_add_device); @@ -1358,33 +1371,20 @@ static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr, ret = sscanf(buf, "%lu", &value); if (ret != 1) return -EINVAL; - mutex_lock(&df->lock); - - if (value) { - if (value > df->max_freq) { - ret = -EINVAL; - goto unlock; - } - } else { - unsigned long *freq_table = df->profile->freq_table; + if (value) + value = value / 1000; + else + value = 0; - /* Get minimum frequency according to sorting order */ - if (freq_table[0] < freq_table[df->profile->max_state - 1]) - value = freq_table[0]; - else - value = freq_table[df->profile->max_state - 1]; - } + ret = dev_pm_qos_update_request(&df->min_freq_req, value); + if (ret < 0) + return ret; - df->min_freq = value; - update_devfreq(df); - ret = count; -unlock: - mutex_unlock(&df->lock); - return ret; + return count; } static ssize_t min_freq_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1407,33 +1407,20 @@ static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr, ret = sscanf(buf, "%lu", &value); if (ret != 1) return -EINVAL; - mutex_lock(&df->lock); - - if (value) { - if (value < df->min_freq) { - ret = -EINVAL; - goto unlock; - } - } else { - unsigned long *freq_table = df->profile->freq_table; + if (value) + value = DIV_ROUND_UP(value, 1000); + else + value = S32_MAX; - /* Get maximum frequency according to sorting order */ - if (freq_table[0] < freq_table[df->profile->max_state - 1]) - value = freq_table[df->profile->max_state - 1]; - else - value = freq_table[0]; - } + ret = dev_pm_qos_update_request(&df->max_freq_req, value); + if (ret < 0) + return ret; - df->max_freq = value; - update_devfreq(df); - ret = count; -unlock: - mutex_unlock(&df->lock); - return ret; + return count; } static DEVICE_ATTR_RW(min_freq); static ssize_t max_freq_show(struct device *dev, struct device_attribute *attr, char *buf) diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 8b92ccbd1962..d2c5bb7add0a 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -11,10 +11,11 @@ #define __LINUX_DEVFREQ_H__ #include <linux/device.h> #include <linux/notifier.h> #include <linux/pm_opp.h> +#include <linux/pm_qos.h> #define DEVFREQ_NAME_LEN 16 /* DEVFREQ governor name */ #define DEVFREQ_GOV_SIMPLE_ONDEMAND "simple_ondemand" @@ -121,12 +122,12 @@ struct devfreq_dev_profile { * devfreq.nb to the corresponding register notifier call chain. * @work: delayed work for load monitoring. * @previous_freq: previously configured frequency value. * @data: Private data of the governor. The devfreq framework does not * touch this. - * @min_freq: Limit minimum frequency requested by user (0: none) - * @max_freq: Limit maximum frequency requested by user (0: none) + * @min_freq_req: Limit minimum frequency requested by user (0: none) + * @max_freq_req: Limit maximum frequency requested by user (0: none) * @scaling_min_freq: Limit minimum frequency requested by OPP interface * @scaling_max_freq: Limit maximum frequency requested by OPP interface * @stop_polling: devfreq polling status of a device. * @suspend_freq: frequency of a device set during suspend phase. * @resume_freq: frequency of a device set in resume phase. @@ -161,12 +162,12 @@ struct devfreq { unsigned long previous_freq; struct devfreq_dev_status last_status; void *data; /* private data for governors */ - unsigned long min_freq; - unsigned long max_freq; + struct dev_pm_qos_request min_freq_req; + struct dev_pm_qos_request max_freq_req; unsigned long scaling_min_freq; unsigned long scaling_max_freq; bool stop_polling; unsigned long suspend_freq;
Now that devfreq supports dev_pm_qos requests we can use them to handle the min/max_freq values set by userspace in sysfs, similar to cpufreq. Since dev_pm_qos handles frequencies as kHz this change reduces the precision of min_freq and max_freq. This shouldn't introduce problems because frequencies which are not an integer number of kHz are likely not an integer number of Hz either. Try to ensure compatibilitity by rounding min values down and rounding max values up. Simplify the {min,max}_freq_store code by setting "null" values of 0 and MAX_S32 respectively instead of clamping to what freq tables are actually supported. Values are already automatically clamped on readback. Also simplify by droping the limitation that userspace min_freq must be lower than userspace max_freq, it is already documented that max_freq takes precedence. Signed-off-by: Leonard Crestez <leonard.crestez@nxp.com> --- drivers/devfreq/devfreq.c | 79 ++++++++++++++++----------------------- include/linux/devfreq.h | 9 +++-- 2 files changed, 38 insertions(+), 50 deletions(-)