diff mbox series

[v3,2/2] PM / devfreq: Use dev_pm_qos for sysfs min/max_freq

Message ID af14021b98254032e856397b54329756c1cc59c0.1566314535.git.leonard.crestez@nxp.com (mailing list archive)
State Superseded
Headers show
Series PM / devfreq: Add dev_pm_qos support | expand

Commit Message

Leonard Crestez Aug. 20, 2019, 3:24 p.m. UTC
Now that devfreq supports dev_pm_qos requests we can use them to handle
the min/max_freq values set by userspace in sysfs, similar to cpufreq.

Since dev_pm_qos handles frequencies as kHz this change reduces the
precision of min_freq and max_freq. This shouldn't introduce problems
because frequencies which are not an integer number of kHz are likely
not an integer number of Hz either.

Try to ensure compatibilitity by rounding min values down and rounding
max values up.

Simplify the {min,max}_freq_store code by setting "null" values of 0 and
MAX_S32 respectively instead of clamping to what freq tables are
actually supported. Values are already automatically clamped on
readback.

Also simplify by droping the limitation that userspace min_freq must be
lower than userspace max_freq, it is already documented that max_freq
takes precedence.

Signed-off-by: Leonard Crestez <leonard.crestez@nxp.com>
---
 drivers/devfreq/devfreq.c | 79 ++++++++++++++++-----------------------
 include/linux/devfreq.h   |  9 +++--
 2 files changed, 38 insertions(+), 50 deletions(-)

Comments

Chanwoo Choi Aug. 21, 2019, 2:04 a.m. UTC | #1
On 19. 8. 21. 오전 12:24, Leonard Crestez wrote:
> Now that devfreq supports dev_pm_qos requests we can use them to handle
> the min/max_freq values set by userspace in sysfs, similar to cpufreq.
> 
> Since dev_pm_qos handles frequencies as kHz this change reduces the
> precision of min_freq and max_freq. This shouldn't introduce problems
> because frequencies which are not an integer number of kHz are likely
> not an integer number of Hz either.
> 
> Try to ensure compatibilitity by rounding min values down and rounding
> max values up.
> 
> Simplify the {min,max}_freq_store code by setting "null" values of 0 and
> MAX_S32 respectively instead of clamping to what freq tables are
> actually supported. Values are already automatically clamped on
> readback.
> 
> Also simplify by droping the limitation that userspace min_freq must be
> lower than userspace max_freq, it is already documented that max_freq
> takes precedence.
> 
> Signed-off-by: Leonard Crestez <leonard.crestez@nxp.com>
> ---
>  drivers/devfreq/devfreq.c | 79 ++++++++++++++++-----------------------
>  include/linux/devfreq.h   |  9 +++--
>  2 files changed, 38 insertions(+), 50 deletions(-)
> 
> diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
> index 58deffa52a37..687deadd08ed 100644
> --- a/drivers/devfreq/devfreq.c
> +++ b/drivers/devfreq/devfreq.c
> @@ -101,21 +101,21 @@ static unsigned long find_available_max_freq(struct devfreq *devfreq)
>  
>  static unsigned long get_effective_min_freq(struct devfreq *devfreq)
>  {
>  	lockdep_assert_held(&devfreq->lock);
>  
> -	return max3(devfreq->scaling_min_freq, devfreq->min_freq,
> +	return max(devfreq->scaling_min_freq,
>  		1000 * (unsigned long)dev_pm_qos_read_value(
>  				devfreq->dev.parent,
>  				DEV_PM_QOS_MIN_FREQUENCY));
>  }
>  
>  static unsigned long get_effective_max_freq(struct devfreq *devfreq)
>  {
>  	lockdep_assert_held(&devfreq->lock);
>  
> -	return min3(devfreq->scaling_max_freq, devfreq->max_freq,
> +	return min(devfreq->scaling_max_freq,
>  		1000 * (unsigned long)dev_pm_qos_read_value(
>  				devfreq->dev.parent,
>  				DEV_PM_QOS_MAX_FREQUENCY));
>  }
>  
> @@ -644,10 +644,12 @@ static void devfreq_dev_release(struct device *dev)
>  
>  	dev_pm_qos_remove_notifier(devfreq->dev.parent, &devfreq->nb_max,
>  			DEV_PM_QOS_MAX_FREQUENCY);
>  	dev_pm_qos_remove_notifier(devfreq->dev.parent, &devfreq->nb_min,
>  			DEV_PM_QOS_MIN_FREQUENCY);
> +	dev_pm_qos_remove_request(&devfreq->max_freq_req);
> +	dev_pm_qos_remove_request(&devfreq->min_freq_req);
>  	mutex_destroy(&devfreq->lock);
>  	kfree(devfreq);
>  }
>  
>  /**
> @@ -698,10 +700,19 @@ struct devfreq *devfreq_add_device(struct device *dev,
>  	devfreq->previous_freq = profile->initial_freq;
>  	devfreq->last_status.current_frequency = profile->initial_freq;
>  	devfreq->data = data;
>  	devfreq->nb.notifier_call = devfreq_notifier_call;
>  
> +	err = dev_pm_qos_add_request(dev, &devfreq->min_freq_req,
> +				     DEV_PM_QOS_MIN_FREQUENCY, 0);
> +	if (err < 0)
> +		goto err_dev;
> +	err = dev_pm_qos_add_request(dev, &devfreq->max_freq_req,
> +				     DEV_PM_QOS_MAX_FREQUENCY, S32_MAX);
> +	if (err < 0)
> +		goto err_dev;
> +

Please move them under dev_pm_qos_add_notifier()
because it seems that it request the qos without qos_notifier.

>  	/*
>  	 * notifier from pm_qos
>  	 *
>  	 * initialized outside of devfreq->lock to avoid circular warning
>  	 * between devfreq->lock and dev_pm_qos_mtx
> @@ -732,19 +743,17 @@ struct devfreq *devfreq_add_device(struct device *dev,
>  	if (!devfreq->scaling_min_freq) {
>  		mutex_unlock(&devfreq->lock);
>  		err = -EINVAL;
>  		goto err_dev;
>  	}
> -	devfreq->min_freq = devfreq->scaling_min_freq;
>  
>  	devfreq->scaling_max_freq = find_available_max_freq(devfreq);
>  	if (!devfreq->scaling_max_freq) {
>  		mutex_unlock(&devfreq->lock);
>  		err = -EINVAL;
>  		goto err_dev;
>  	}
> -	devfreq->max_freq = devfreq->scaling_max_freq;
>  
>  	devfreq->suspend_freq = dev_pm_opp_get_suspend_opp_freq(dev);
>  	atomic_set(&devfreq->suspend_count, 0);
>  
>  	dev_set_name(&devfreq->dev, "devfreq%d",
> @@ -816,10 +825,14 @@ struct devfreq *devfreq_add_device(struct device *dev,
>  err_dev:
>  	dev_pm_qos_remove_notifier(devfreq->dev.parent, &devfreq->nb_max,
>  				   DEV_PM_QOS_MAX_FREQUENCY);
>  	dev_pm_qos_remove_notifier(devfreq->dev.parent, &devfreq->nb_min,
>  				   DEV_PM_QOS_MIN_FREQUENCY);
> +	if (dev_pm_qos_request_active(&devfreq->max_freq_req))
> +		dev_pm_qos_remove_request(&devfreq->max_freq_req);
> +	if (dev_pm_qos_request_active(&devfreq->min_freq_req))
> +		dev_pm_qos_remove_request(&devfreq->min_freq_req);
>  	kfree(devfreq);
>  err_out:
>  	return ERR_PTR(err);
>  }
>  EXPORT_SYMBOL(devfreq_add_device);
> @@ -1358,33 +1371,20 @@ static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr,
>  
>  	ret = sscanf(buf, "%lu", &value);
>  	if (ret != 1)
>  		return -EINVAL;
>  
> -	mutex_lock(&df->lock);
> -
> -	if (value) {
> -		if (value > df->max_freq) {
> -			ret = -EINVAL;
> -			goto unlock;
> -		}

Actually, the user can input the value they want.
So, the above code is not necessary because the devfreq->scaling_max_freq
is never overflow from supported maximum frequency. The devfreq->scaling_max_freq
is based on OPP entries from DT.

But, if we replace the existing request way of devfreq-cooling.c
with dev_pm_qos, devfreq->scaling_max_freq doesn't guarantee
the supported maximum frequency.

We need to keep the supported min_freq/max_freq value without dev_pm_qos
requirement because the dev_pm_qos requirement might have the overflow value.
the dev_pm_qos doesn't know the supported minimum and maximum frequency
of devfreq device.


> -	} else {
> -		unsigned long *freq_table = df->profile->freq_table;
> +	if (value)
> +		value = value / 1000;

Better to use KHZ definition instead of 1000 as I commented on patch1.

> +	else
> +		value = 0;
>  
> -		/* Get minimum frequency according to sorting order */
> -		if (freq_table[0] < freq_table[df->profile->max_state - 1])
> -			value = freq_table[0];
> -		else
> -			value = freq_table[df->profile->max_state - 1];
> -	}
> +	ret = dev_pm_qos_update_request(&df->min_freq_req, value);
> +	if (ret < 0)
> +		return ret;
>  
> -	df->min_freq = value;
> -	update_devfreq(df);
> -	ret = count;
> -unlock:
> -	mutex_unlock(&df->lock);
> -	return ret;
> +	return count;
>  }
>  
>  static ssize_t min_freq_show(struct device *dev, struct device_attribute *attr,
>  			     char *buf)
>  {
> @@ -1407,33 +1407,20 @@ static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr,
>  
>  	ret = sscanf(buf, "%lu", &value);
>  	if (ret != 1)
>  		return -EINVAL;
>  
> -	mutex_lock(&df->lock);
> -
> -	if (value) {
> -		if (value < df->min_freq) {
> -			ret = -EINVAL;
> -			goto unlock;

> -		}
> -	} else {
> -		unsigned long *freq_table = df->profile->freq_table;
> +	if (value)
> +		value = DIV_ROUND_UP(value, 1000);
> +	else
> +		value = S32_MAX;
>  
> -		/* Get maximum frequency according to sorting order */
> -		if (freq_table[0] < freq_table[df->profile->max_state - 1])
> -			value = freq_table[df->profile->max_state - 1];
> -		else
> -			value = freq_table[0];
> -	}
> +	ret = dev_pm_qos_update_request(&df->max_freq_req, value);
> +	if (ret < 0)
> +		return ret;
>  
> -	df->max_freq = value;
> -	update_devfreq(df);
> -	ret = count;
> -unlock:
> -	mutex_unlock(&df->lock);
> -	return ret;
> +	return count;
>  }
>  static DEVICE_ATTR_RW(min_freq);
>  
>  static ssize_t max_freq_show(struct device *dev, struct device_attribute *attr,
>  			     char *buf)
> diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
> index 8b92ccbd1962..d2c5bb7add0a 100644
> --- a/include/linux/devfreq.h
> +++ b/include/linux/devfreq.h
> @@ -11,10 +11,11 @@
>  #define __LINUX_DEVFREQ_H__
>  
>  #include <linux/device.h>
>  #include <linux/notifier.h>
>  #include <linux/pm_opp.h>
> +#include <linux/pm_qos.h>
>  
>  #define DEVFREQ_NAME_LEN 16
>  
>  /* DEVFREQ governor name */
>  #define DEVFREQ_GOV_SIMPLE_ONDEMAND	"simple_ondemand"
> @@ -121,12 +122,12 @@ struct devfreq_dev_profile {
>   *		devfreq.nb to the corresponding register notifier call chain.
>   * @work:	delayed work for load monitoring.
>   * @previous_freq:	previously configured frequency value.
>   * @data:	Private data of the governor. The devfreq framework does not
>   *		touch this.
> - * @min_freq:	Limit minimum frequency requested by user (0: none)
> - * @max_freq:	Limit maximum frequency requested by user (0: none)
> + * @min_freq_req:	Limit minimum frequency requested by user (0: none)
> + * @max_freq_req:	Limit maximum frequency requested by user (0: none)
>   * @scaling_min_freq:	Limit minimum frequency requested by OPP interface
>   * @scaling_max_freq:	Limit maximum frequency requested by OPP interface
>   * @stop_polling:	 devfreq polling status of a device.
>   * @suspend_freq:	 frequency of a device set during suspend phase.
>   * @resume_freq:	 frequency of a device set in resume phase.
> @@ -161,12 +162,12 @@ struct devfreq {
>  	unsigned long previous_freq;
>  	struct devfreq_dev_status last_status;
>  
>  	void *data; /* private data for governors */
>  
> -	unsigned long min_freq;
> -	unsigned long max_freq;
> +	struct dev_pm_qos_request min_freq_req;
> +	struct dev_pm_qos_request max_freq_req;
>  	unsigned long scaling_min_freq;
>  	unsigned long scaling_max_freq;
>  	bool stop_polling;
>  
>  	unsigned long suspend_freq;
>
Leonard Crestez Aug. 21, 2019, 1:03 p.m. UTC | #2
On 21.08.2019 05:02, Chanwoo Choi wrote:
> On 19. 8. 21. 오전 12:24, Leonard Crestez wrote:
>> Now that devfreq supports dev_pm_qos requests we can use them to handle
>> the min/max_freq values set by userspace in sysfs, similar to cpufreq.
>>
>> Since dev_pm_qos handles frequencies as kHz this change reduces the
>> precision of min_freq and max_freq. This shouldn't introduce problems
>> because frequencies which are not an integer number of kHz are likely
>> not an integer number of Hz either.
>>
>> Try to ensure compatibilitity by rounding min values down and rounding
>> max values up.
>>
>> Simplify the {min,max}_freq_store code by setting "null" values of 0 and
>> MAX_S32 respectively instead of clamping to what freq tables are
>> actually supported. Values are already automatically clamped on
>> readback.
>>
>> Also simplify by droping the limitation that userspace min_freq must be
>> lower than userspace max_freq, it is already documented that max_freq
>> takes precedence.
>>
>> @@ -1358,33 +1371,20 @@ static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr,
>>   
>>   	ret = sscanf(buf, "%lu", &value);
>>   	if (ret != 1)
>>   		return -EINVAL;
>>   
>> -	mutex_lock(&df->lock);
>> -
>> -	if (value) {
>> -		if (value > df->max_freq) {
>> -			ret = -EINVAL;
>> -			goto unlock;
>> -		}
> 
> Actually, the user can input the value they want.
> So, the above code is not necessary because the devfreq->scaling_max_freq
> is never overflow from supported maximum frequency. The devfreq->scaling_max_freq
> is based on OPP entries from DT.
> 
> But, if we replace the existing request way of devfreq-cooling.c
> with dev_pm_qos, devfreq->scaling_max_freq doesn't guarantee
> the supported maximum frequency. >
> We need to keep the supported min_freq/max_freq value without dev_pm_qos
> requirement because the dev_pm_qos requirement might have the overflow value.
> the dev_pm_qos doesn't know the supported minimum and maximum frequency
> of devfreq device.

I'm not sure I understand what you mean. My patch allows user to set 
entirely arbitrary min/max rates and this is good because we already 
have a well-defined way to handle this: max overrides min.

The scaling_min_freq and scaling_max_freq variables can just be kept 
around indefinitely no matter what happens to thermal. They're just a 
cache for dev_pm_opp_find_freq_ceil and dev_pm_opp_find_freq_floor.

BTW: I noticed that scaling_min_freq and scaling_max_freq are updated in 
devfreq_notifier_call but devfreq->nb is not registered by default, only 
when a driver requests it explicitly. Is there a reason for this?
Because I'd call dev_pm_opp_register_notifier inside devfreq_add_device 
and remove all the devfreq_register/unregister_notifier APIs.

--
Regards,
Leonard
Chanwoo Choi Aug. 22, 2019, 10:07 a.m. UTC | #3
On 19. 8. 21. 오후 10:03, Leonard Crestez wrote:
> On 21.08.2019 05:02, Chanwoo Choi wrote:
>> On 19. 8. 21. 오전 12:24, Leonard Crestez wrote:
>>> Now that devfreq supports dev_pm_qos requests we can use them to handle
>>> the min/max_freq values set by userspace in sysfs, similar to cpufreq.
>>>
>>> Since dev_pm_qos handles frequencies as kHz this change reduces the
>>> precision of min_freq and max_freq. This shouldn't introduce problems
>>> because frequencies which are not an integer number of kHz are likely
>>> not an integer number of Hz either.
>>>
>>> Try to ensure compatibilitity by rounding min values down and rounding
>>> max values up.
>>>
>>> Simplify the {min,max}_freq_store code by setting "null" values of 0 and
>>> MAX_S32 respectively instead of clamping to what freq tables are
>>> actually supported. Values are already automatically clamped on
>>> readback.
>>>
>>> Also simplify by droping the limitation that userspace min_freq must be
>>> lower than userspace max_freq, it is already documented that max_freq
>>> takes precedence.
>>>
>>> @@ -1358,33 +1371,20 @@ static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr,
>>>   
>>>   	ret = sscanf(buf, "%lu", &value);
>>>   	if (ret != 1)
>>>   		return -EINVAL;
>>>   
>>> -	mutex_lock(&df->lock);
>>> -
>>> -	if (value) {
>>> -		if (value > df->max_freq) {
>>> -			ret = -EINVAL;
>>> -			goto unlock;
>>> -		}
>>
>> Actually, the user can input the value they want.
>> So, the above code is not necessary because the devfreq->scaling_max_freq
>> is never overflow from supported maximum frequency. The devfreq->scaling_max_freq
>> is based on OPP entries from DT.
>>
>> But, if we replace the existing request way of devfreq-cooling.c
>> with dev_pm_qos, devfreq->scaling_max_freq doesn't guarantee
>> the supported maximum frequency. >
>> We need to keep the supported min_freq/max_freq value without dev_pm_qos
>> requirement because the dev_pm_qos requirement might have the overflow value.
>> the dev_pm_qos doesn't know the supported minimum and maximum frequency
>> of devfreq device.
> 
> I'm not sure I understand what you mean. My patch allows user to set 
> entirely arbitrary min/max rates and this is good because we already 
> have a well-defined way to handle this: max overrides min.
> 
> The scaling_min_freq and scaling_max_freq variables can just be kept 
> around indefinitely no matter what happens to thermal. They're just a 
> cache for dev_pm_opp_find_freq_ceil and dev_pm_opp_find_freq_floor.

This patch doesn't check the range of input value
with the supported frequencies of devfreq device.

For example,
The devfreq0 device has the following available frequencies
100000000 200000000 300000000

and then user enters the 500000000 as following:
echo 500000000 > /sys/class/devfreq/devfreq0/min_freq

In result, get_effective_min_freq() will return the 500000000.
It is wrong value. it show the unsupported frequency through
min_freq sysfs path.

- devfreq->scaling_min_freq is 100000000,                                   
- 1000 * (unsigned long)dev_pm_qos_read_value(devfreq->dev.parent,
  DEV_PM_QOS_MIN_FREQUENCY)); is 500000000


> 
> BTW: I noticed that scaling_min_freq and scaling_max_freq are updated in 
> devfreq_notifier_call but devfreq->nb is not registered by default, only 
> when a driver requests it explicitly. Is there a reason for this?

Initial version of devfreq has not forced to use the OPP interface 
as the mandatory. So, it just provides the external function
devfreq_register_opp_notifier.

But,
We can call 'devfreq_register_opp_notifier' during devfreq_add_device()
because the OPP interface is mandatory for devfreq.

> Because I'd call dev_pm_opp_register_notifier inside devfreq_add_device 
> and remove all the devfreq_register/unregister_notifier APIs.

OK.

> 
> --
> Regards,
> Leonard
>
Leonard Crestez Aug. 22, 2019, 10:58 a.m. UTC | #4
On 8/22/2019 1:03 PM, Chanwoo Choi wrote:
> On 19. 8. 21. 오후 10:03, Leonard Crestez wrote:
>> On 21.08.2019 05:02, Chanwoo Choi wrote:
>>> On 19. 8. 21. 오전 12:24, Leonard Crestez wrote:
>>>> Now that devfreq supports dev_pm_qos requests we can use them to handle
>>>> the min/max_freq values set by userspace in sysfs, similar to cpufreq.
>>>>
>>>> Since dev_pm_qos handles frequencies as kHz this change reduces the
>>>> precision of min_freq and max_freq. This shouldn't introduce problems
>>>> because frequencies which are not an integer number of kHz are likely
>>>> not an integer number of Hz either.
>>>>
>>>> Try to ensure compatibilitity by rounding min values down and rounding
>>>> max values up.
>>>>
>>>> Simplify the {min,max}_freq_store code by setting "null" values of 0 and
>>>> MAX_S32 respectively instead of clamping to what freq tables are
>>>> actually supported. Values are already automatically clamped on
>>>> readback.
>>>>
>>>> Also simplify by droping the limitation that userspace min_freq must be
>>>> lower than userspace max_freq, it is already documented that max_freq
>>>> takes precedence.
>>>>
>>>> @@ -1358,33 +1371,20 @@ static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr,
>>>>    
>>>>    	ret = sscanf(buf, "%lu", &value);
>>>>    	if (ret != 1)
>>>>    		return -EINVAL;
>>>>    
>>>> -	mutex_lock(&df->lock);
>>>> -
>>>> -	if (value) {
>>>> -		if (value > df->max_freq) {
>>>> -			ret = -EINVAL;
>>>> -			goto unlock;
>>>> -		}
>>>
>>> Actually, the user can input the value they want.
>>> So, the above code is not necessary because the devfreq->scaling_max_freq
>>> is never overflow from supported maximum frequency. The devfreq->scaling_max_freq
>>> is based on OPP entries from DT.
>>>
>>> But, if we replace the existing request way of devfreq-cooling.c
>>> with dev_pm_qos, devfreq->scaling_max_freq doesn't guarantee
>>> the supported maximum frequency. >
>>> We need to keep the supported min_freq/max_freq value without dev_pm_qos
>>> requirement because the dev_pm_qos requirement might have the overflow value.
>>> the dev_pm_qos doesn't know the supported minimum and maximum frequency
>>> of devfreq device.
>>
>> I'm not sure I understand what you mean. My patch allows user to set
>> entirely arbitrary min/max rates and this is good because we already
>> have a well-defined way to handle this: max overrides min.
>>
>> The scaling_min_freq and scaling_max_freq variables can just be kept
>> around indefinitely no matter what happens to thermal. They're just a
>> cache for dev_pm_opp_find_freq_ceil and dev_pm_opp_find_freq_floor.
> 
> This patch doesn't check the range of input value
> with the supported frequencies of devfreq device.
> 
> For example,
> The devfreq0 device has the following available frequencies
> 100000000 200000000 300000000
> 
> and then user enters the 500000000 as following:
> echo 500000000 > /sys/class/devfreq/devfreq0/min_freq
> 
> In result, get_effective_min_freq() will return the 500000000.
> It is wrong value. it show the unsupported frequency through
> min_freq sysfs path.

Through dev_pm_qos devices can also ask for a freq higher than the 
maximum OPP and unlike sysfs there is no way to reject such requests, 
instead PM qos claims it's based on "best effort".

There are many requests in the kernel for "PM_QOS_CPU_DMA_LATENCY 0" and 
I think that DEV_PM_QOS_MIN_FREQUENCY, MAX_S32 is a reasonable way for a 
device to request "max performance" from devfreq.

Rejecting min > max is complicated (what happens to rejected requests 
when max goes back up?) and I think it's better to just stick with a 
"max overrides min" policy since it can already deal with conflicts.

Do you have a usecase for rejecting out-of-range min_freq from 
userspace? cpufreq also accepts arbitrary min/max values and handles them.

In theory pm_qos could be extended to differentiate between "soft" 
requests (current behavior) and "hard" requests which return errors if 
they can't be guaranteed but this is far out of scope.

--
Regards,
Leonard
Chanwoo Choi Aug. 22, 2019, 11:10 a.m. UTC | #5
On 19. 8. 22. 오후 7:58, Leonard Crestez wrote:
> On 8/22/2019 1:03 PM, Chanwoo Choi wrote:
>> On 19. 8. 21. 오후 10:03, Leonard Crestez wrote:
>>> On 21.08.2019 05:02, Chanwoo Choi wrote:
>>>> On 19. 8. 21. 오전 12:24, Leonard Crestez wrote:
>>>>> Now that devfreq supports dev_pm_qos requests we can use them to handle
>>>>> the min/max_freq values set by userspace in sysfs, similar to cpufreq.
>>>>>
>>>>> Since dev_pm_qos handles frequencies as kHz this change reduces the
>>>>> precision of min_freq and max_freq. This shouldn't introduce problems
>>>>> because frequencies which are not an integer number of kHz are likely
>>>>> not an integer number of Hz either.
>>>>>
>>>>> Try to ensure compatibilitity by rounding min values down and rounding
>>>>> max values up.
>>>>>
>>>>> Simplify the {min,max}_freq_store code by setting "null" values of 0 and
>>>>> MAX_S32 respectively instead of clamping to what freq tables are
>>>>> actually supported. Values are already automatically clamped on
>>>>> readback.
>>>>>
>>>>> Also simplify by droping the limitation that userspace min_freq must be
>>>>> lower than userspace max_freq, it is already documented that max_freq
>>>>> takes precedence.
>>>>>
>>>>> @@ -1358,33 +1371,20 @@ static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr,
>>>>>    
>>>>>    	ret = sscanf(buf, "%lu", &value);
>>>>>    	if (ret != 1)
>>>>>    		return -EINVAL;
>>>>>    
>>>>> -	mutex_lock(&df->lock);
>>>>> -
>>>>> -	if (value) {
>>>>> -		if (value > df->max_freq) {
>>>>> -			ret = -EINVAL;
>>>>> -			goto unlock;
>>>>> -		}
>>>>
>>>> Actually, the user can input the value they want.
>>>> So, the above code is not necessary because the devfreq->scaling_max_freq
>>>> is never overflow from supported maximum frequency. The devfreq->scaling_max_freq
>>>> is based on OPP entries from DT.
>>>>
>>>> But, if we replace the existing request way of devfreq-cooling.c
>>>> with dev_pm_qos, devfreq->scaling_max_freq doesn't guarantee
>>>> the supported maximum frequency. >
>>>> We need to keep the supported min_freq/max_freq value without dev_pm_qos
>>>> requirement because the dev_pm_qos requirement might have the overflow value.
>>>> the dev_pm_qos doesn't know the supported minimum and maximum frequency
>>>> of devfreq device.
>>>
>>> I'm not sure I understand what you mean. My patch allows user to set
>>> entirely arbitrary min/max rates and this is good because we already
>>> have a well-defined way to handle this: max overrides min.
>>>
>>> The scaling_min_freq and scaling_max_freq variables can just be kept
>>> around indefinitely no matter what happens to thermal. They're just a
>>> cache for dev_pm_opp_find_freq_ceil and dev_pm_opp_find_freq_floor.
>>
>> This patch doesn't check the range of input value
>> with the supported frequencies of devfreq device.
>>
>> For example,
>> The devfreq0 device has the following available frequencies
>> 100000000 200000000 300000000
>>
>> and then user enters the 500000000 as following:
>> echo 500000000 > /sys/class/devfreq/devfreq0/min_freq
>>
>> In result, get_effective_min_freq() will return the 500000000.
>> It is wrong value. it show the unsupported frequency through
>> min_freq sysfs path.
> 
> Through dev_pm_qos devices can also ask for a freq higher than the 
> maximum OPP and unlike sysfs there is no way to reject such requests, 
> instead PM qos claims it's based on "best effort".
> 
> There are many requests in the kernel for "PM_QOS_CPU_DMA_LATENCY 0" and 
> I think that DEV_PM_QOS_MIN_FREQUENCY, MAX_S32 is a reasonable way for a 
> device to request "max performance" from devfreq.
> 
> Rejecting min > max is complicated (what happens to rejected requests 
> when max goes back up?) and I think it's better to just stick with a 
> "max overrides min" policy since it can already deal with conflicts.
> 
> Do you have a usecase for rejecting out-of-range min_freq from 
> userspace? cpufreq also accepts arbitrary min/max values and handles them.

I don't mean the rejecting when user enter the out-of-range frequency.
As I commented, user can enter the value they want. But, we should
check the range of input because devfreq have to show the correct supported
frequency through sysfs.

> 
> In theory pm_qos could be extended to differentiate between "soft" 
> requests (current behavior) and "hard" requests which return errors if 
> they can't be guaranteed but this is far out of scope.

I think that you agreed the limitation of dev_pm_qos when entering
or requesting the unsupported frequency.

Until fixing it on dev_pm_qos, it have to be handled on consumer
like devfreq. I think that get_min_freq() and get_max_freq() have
to check the range of return value of dev_pm_qos_read_value().
Leonard Crestez Aug. 22, 2019, 12:24 p.m. UTC | #6
On 22.08.2019 14:06, Chanwoo Choi wrote:
> On 19. 8. 22. 오후 7:58, Leonard Crestez wrote:
>> On 8/22/2019 1:03 PM, Chanwoo Choi wrote:
>>> This patch doesn't check the range of input value
>>> with the supported frequencies of devfreq device.
>>>
>>> For example,
>>> The devfreq0 device has the following available frequencies
>>> 100000000 200000000 300000000
>>>
>>> and then user enters the 500000000 as following:
>>> echo 500000000 > /sys/class/devfreq/devfreq0/min_freq
>>>
>>> In result, get_effective_min_freq() will return the 500000000.
>>> It is wrong value. it show the unsupported frequency through
>>> min_freq sysfs path.
>>
>> Through dev_pm_qos devices can also ask for a freq higher than the
>> maximum OPP and unlike sysfs there is no way to reject such requests,
>> instead PM qos claims it's based on "best effort".
>>
>> There are many requests in the kernel for "PM_QOS_CPU_DMA_LATENCY 0" and
>> I think that DEV_PM_QOS_MIN_FREQUENCY, MAX_S32 is a reasonable way for a
>> device to request "max performance" from devfreq.
>>
>> Rejecting min > max is complicated (what happens to rejected requests
>> when max goes back up?) and I think it's better to just stick with a
>> "max overrides min" policy since it can already deal with conflicts.
>>
>> Do you have a usecase for rejecting out-of-range min_freq from
>> userspace? cpufreq also accepts arbitrary min/max values and handles them.
> 
> I don't mean the rejecting when user enter the out-of-range frequency.
> As I commented, user can enter the value they want. But, we should
> check the range of input because devfreq have to show the correct supported
> frequency through sysfs.

We can avoid showing an out-of-range value in min_freq by printing 
min(min_freq, max_freq).

The max_freq value from pm_qos can still be between OPPs so maybe print 
devfreq_recommended_opp(max_freq, DEVFREQ_FLAG_LEAST_UPPER_BOUND).

>> In theory pm_qos could be extended to differentiate between "soft"
>> requests (current behavior) and "hard" requests which return errors if
>> they can't be guaranteed but this is far out of scope.
> 
> I think that you agreed the limitation of dev_pm_qos when entering
> or requesting the unsupported frequency.

Yes, "best effort qos" is acceptable for now.
Chanwoo Choi Aug. 23, 2019, 8:40 a.m. UTC | #7
On 19. 8. 22. 오후 9:24, Leonard Crestez wrote:
> On 22.08.2019 14:06, Chanwoo Choi wrote:
>> On 19. 8. 22. 오후 7:58, Leonard Crestez wrote:
>>> On 8/22/2019 1:03 PM, Chanwoo Choi wrote:
>>>> This patch doesn't check the range of input value
>>>> with the supported frequencies of devfreq device.
>>>>
>>>> For example,
>>>> The devfreq0 device has the following available frequencies
>>>> 100000000 200000000 300000000
>>>>
>>>> and then user enters the 500000000 as following:
>>>> echo 500000000 > /sys/class/devfreq/devfreq0/min_freq
>>>>
>>>> In result, get_effective_min_freq() will return the 500000000.
>>>> It is wrong value. it show the unsupported frequency through
>>>> min_freq sysfs path.
>>>
>>> Through dev_pm_qos devices can also ask for a freq higher than the
>>> maximum OPP and unlike sysfs there is no way to reject such requests,
>>> instead PM qos claims it's based on "best effort".
>>>
>>> There are many requests in the kernel for "PM_QOS_CPU_DMA_LATENCY 0" and
>>> I think that DEV_PM_QOS_MIN_FREQUENCY, MAX_S32 is a reasonable way for a
>>> device to request "max performance" from devfreq.
>>>
>>> Rejecting min > max is complicated (what happens to rejected requests
>>> when max goes back up?) and I think it's better to just stick with a
>>> "max overrides min" policy since it can already deal with conflicts.
>>>
>>> Do you have a usecase for rejecting out-of-range min_freq from
>>> userspace? cpufreq also accepts arbitrary min/max values and handles them.
>>
>> I don't mean the rejecting when user enter the out-of-range frequency.
>> As I commented, user can enter the value they want. But, we should
>> check the range of input because devfreq have to show the correct supported
>> frequency through sysfs.
> 
> We can avoid showing an out-of-range value in min_freq by printing 
> min(min_freq, max_freq).

You can check the range of frequency in the get_min_freq()/get_max_freq().
I want to return the meaningful frequency from get_min_freq()/get_max_freq().

Everyone expects get_min_freq()/get_max_freq() functions will retunrn
the supported min/max frequency. If get_min_freq()/get_max_freq()
return the our-of-range frequency, it is not nice and cause the confusion
why these functions return the out-of-range frequency..

Also, if we don't check the return value of dev_pm_qos_read_value(),
the out-of-range frequency of dev_pm_qos_read_value() would be used
on multiple point of devfreq.c. I think that it is not good.

> 
> The max_freq value from pm_qos can still be between OPPs so maybe print 
> devfreq_recommended_opp(max_freq, DEVFREQ_FLAG_LEAST_UPPER_BOUND).
> 
>>> In theory pm_qos could be extended to differentiate between "soft"
>>> requests (current behavior) and "hard" requests which return errors if
>>> they can't be guaranteed but this is far out of scope.
>>
>> I think that you agreed the limitation of dev_pm_qos when entering
>> or requesting the unsupported frequency.
> 
> Yes, "best effort qos" is acceptable for now.
> 

And please don't remove the my previous comment.
Just reply your opinion without any removal.
Leonard Crestez Aug. 23, 2019, 11:47 a.m. UTC | #8
On 23.08.2019 11:36, Chanwoo Choi wrote:
> On 19. 8. 22. 오후 9:24, Leonard Crestez wrote:
>> On 22.08.2019 14:06, Chanwoo Choi wrote:
>>> On 19. 8. 22. 오후 7:58, Leonard Crestez wrote:
>>>> On 8/22/2019 1:03 PM, Chanwoo Choi wrote:
>>>>> This patch doesn't check the range of input value
>>>>> with the supported frequencies of devfreq device.
>>>>>
>>>>> For example,
>>>>> The devfreq0 device has the following available frequencies
>>>>> 100000000 200000000 300000000
>>>>>
>>>>> and then user enters the 500000000 as following:
>>>>> echo 500000000 > /sys/class/devfreq/devfreq0/min_freq
>>>>>
>>>>> In result, get_effective_min_freq() will return the 500000000.
>>>>> It is wrong value. it show the unsupported frequency through
>>>>> min_freq sysfs path.
>>>>
>>>> Through dev_pm_qos devices can also ask for a freq higher than the
>>>> maximum OPP and unlike sysfs there is no way to reject such requests,
>>>> instead PM qos claims it's based on "best effort".
>>>>
>>>> There are many requests in the kernel for "PM_QOS_CPU_DMA_LATENCY 0" and
>>>> I think that DEV_PM_QOS_MIN_FREQUENCY, MAX_S32 is a reasonable way for a
>>>> device to request "max performance" from devfreq.
>>>>
>>>> Rejecting min > max is complicated (what happens to rejected requests
>>>> when max goes back up?) and I think it's better to just stick with a
>>>> "max overrides min" policy since it can already deal with conflicts.
>>>>
>>>> Do you have a usecase for rejecting out-of-range min_freq from
>>>> userspace? cpufreq also accepts arbitrary min/max values and handles them.
>>>
>>> I don't mean the rejecting when user enter the out-of-range frequency.
>>> As I commented, user can enter the value they want. But, we should
>>> check the range of input because devfreq have to show the correct supported
>>> frequency through sysfs.
>>
>> We can avoid showing an out-of-range value in min_freq by printing
>> min(min_freq, max_freq).
> 
> You can check the range of frequency in the get_min_freq()/get_max_freq().
> I want to return the meaningful frequency from get_min_freq()/get_max_freq().
> 
> Everyone expects get_min_freq()/get_max_freq() functions will retunrn
> the supported min/max frequency. If get_min_freq()/get_max_freq()
> return the our-of-range frequency, it is not nice and cause the confusion
> why these functions return the out-of-range frequency..
> 
> Also, if we don't check the return value of dev_pm_qos_read_value(),
> the out-of-range frequency of dev_pm_qos_read_value() would be used
> on multiple point of devfreq.c. I think that it is not good.

OK, I will try to partially move the min/max conflict logic from 
"update_devfreq" to a "get_freq_range" function and call that from 
show_min_freq/show_max_freq.

>> The max_freq value from pm_qos can still be between OPPs so maybe print
>> devfreq_recommended_opp(max_freq, DEVFREQ_FLAG_LEAST_UPPER_BOUND).
>>
>>>> In theory pm_qos could be extended to differentiate between "soft"
>>>> requests (current behavior) and "hard" requests which return errors if
>>>> they can't be guaranteed but this is far out of scope.
>>>
>>> I think that you agreed the limitation of dev_pm_qos when entering
>>> or requesting the unsupported frequency.
>>
>> Yes, "best effort qos" is acceptable for now.
> 
> And please don't remove the my previous comment.
> Just reply your opinion without any removal.

Sorry, when responding I usually trim sections which are very old or 
where there appears to be broad agreement.

--
Regards,
Leonard
diff mbox series

Patch

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 58deffa52a37..687deadd08ed 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -101,21 +101,21 @@  static unsigned long find_available_max_freq(struct devfreq *devfreq)
 
 static unsigned long get_effective_min_freq(struct devfreq *devfreq)
 {
 	lockdep_assert_held(&devfreq->lock);
 
-	return max3(devfreq->scaling_min_freq, devfreq->min_freq,
+	return max(devfreq->scaling_min_freq,
 		1000 * (unsigned long)dev_pm_qos_read_value(
 				devfreq->dev.parent,
 				DEV_PM_QOS_MIN_FREQUENCY));
 }
 
 static unsigned long get_effective_max_freq(struct devfreq *devfreq)
 {
 	lockdep_assert_held(&devfreq->lock);
 
-	return min3(devfreq->scaling_max_freq, devfreq->max_freq,
+	return min(devfreq->scaling_max_freq,
 		1000 * (unsigned long)dev_pm_qos_read_value(
 				devfreq->dev.parent,
 				DEV_PM_QOS_MAX_FREQUENCY));
 }
 
@@ -644,10 +644,12 @@  static void devfreq_dev_release(struct device *dev)
 
 	dev_pm_qos_remove_notifier(devfreq->dev.parent, &devfreq->nb_max,
 			DEV_PM_QOS_MAX_FREQUENCY);
 	dev_pm_qos_remove_notifier(devfreq->dev.parent, &devfreq->nb_min,
 			DEV_PM_QOS_MIN_FREQUENCY);
+	dev_pm_qos_remove_request(&devfreq->max_freq_req);
+	dev_pm_qos_remove_request(&devfreq->min_freq_req);
 	mutex_destroy(&devfreq->lock);
 	kfree(devfreq);
 }
 
 /**
@@ -698,10 +700,19 @@  struct devfreq *devfreq_add_device(struct device *dev,
 	devfreq->previous_freq = profile->initial_freq;
 	devfreq->last_status.current_frequency = profile->initial_freq;
 	devfreq->data = data;
 	devfreq->nb.notifier_call = devfreq_notifier_call;
 
+	err = dev_pm_qos_add_request(dev, &devfreq->min_freq_req,
+				     DEV_PM_QOS_MIN_FREQUENCY, 0);
+	if (err < 0)
+		goto err_dev;
+	err = dev_pm_qos_add_request(dev, &devfreq->max_freq_req,
+				     DEV_PM_QOS_MAX_FREQUENCY, S32_MAX);
+	if (err < 0)
+		goto err_dev;
+
 	/*
 	 * notifier from pm_qos
 	 *
 	 * initialized outside of devfreq->lock to avoid circular warning
 	 * between devfreq->lock and dev_pm_qos_mtx
@@ -732,19 +743,17 @@  struct devfreq *devfreq_add_device(struct device *dev,
 	if (!devfreq->scaling_min_freq) {
 		mutex_unlock(&devfreq->lock);
 		err = -EINVAL;
 		goto err_dev;
 	}
-	devfreq->min_freq = devfreq->scaling_min_freq;
 
 	devfreq->scaling_max_freq = find_available_max_freq(devfreq);
 	if (!devfreq->scaling_max_freq) {
 		mutex_unlock(&devfreq->lock);
 		err = -EINVAL;
 		goto err_dev;
 	}
-	devfreq->max_freq = devfreq->scaling_max_freq;
 
 	devfreq->suspend_freq = dev_pm_opp_get_suspend_opp_freq(dev);
 	atomic_set(&devfreq->suspend_count, 0);
 
 	dev_set_name(&devfreq->dev, "devfreq%d",
@@ -816,10 +825,14 @@  struct devfreq *devfreq_add_device(struct device *dev,
 err_dev:
 	dev_pm_qos_remove_notifier(devfreq->dev.parent, &devfreq->nb_max,
 				   DEV_PM_QOS_MAX_FREQUENCY);
 	dev_pm_qos_remove_notifier(devfreq->dev.parent, &devfreq->nb_min,
 				   DEV_PM_QOS_MIN_FREQUENCY);
+	if (dev_pm_qos_request_active(&devfreq->max_freq_req))
+		dev_pm_qos_remove_request(&devfreq->max_freq_req);
+	if (dev_pm_qos_request_active(&devfreq->min_freq_req))
+		dev_pm_qos_remove_request(&devfreq->min_freq_req);
 	kfree(devfreq);
 err_out:
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL(devfreq_add_device);
@@ -1358,33 +1371,20 @@  static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr,
 
 	ret = sscanf(buf, "%lu", &value);
 	if (ret != 1)
 		return -EINVAL;
 
-	mutex_lock(&df->lock);
-
-	if (value) {
-		if (value > df->max_freq) {
-			ret = -EINVAL;
-			goto unlock;
-		}
-	} else {
-		unsigned long *freq_table = df->profile->freq_table;
+	if (value)
+		value = value / 1000;
+	else
+		value = 0;
 
-		/* Get minimum frequency according to sorting order */
-		if (freq_table[0] < freq_table[df->profile->max_state - 1])
-			value = freq_table[0];
-		else
-			value = freq_table[df->profile->max_state - 1];
-	}
+	ret = dev_pm_qos_update_request(&df->min_freq_req, value);
+	if (ret < 0)
+		return ret;
 
-	df->min_freq = value;
-	update_devfreq(df);
-	ret = count;
-unlock:
-	mutex_unlock(&df->lock);
-	return ret;
+	return count;
 }
 
 static ssize_t min_freq_show(struct device *dev, struct device_attribute *attr,
 			     char *buf)
 {
@@ -1407,33 +1407,20 @@  static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr,
 
 	ret = sscanf(buf, "%lu", &value);
 	if (ret != 1)
 		return -EINVAL;
 
-	mutex_lock(&df->lock);
-
-	if (value) {
-		if (value < df->min_freq) {
-			ret = -EINVAL;
-			goto unlock;
-		}
-	} else {
-		unsigned long *freq_table = df->profile->freq_table;
+	if (value)
+		value = DIV_ROUND_UP(value, 1000);
+	else
+		value = S32_MAX;
 
-		/* Get maximum frequency according to sorting order */
-		if (freq_table[0] < freq_table[df->profile->max_state - 1])
-			value = freq_table[df->profile->max_state - 1];
-		else
-			value = freq_table[0];
-	}
+	ret = dev_pm_qos_update_request(&df->max_freq_req, value);
+	if (ret < 0)
+		return ret;
 
-	df->max_freq = value;
-	update_devfreq(df);
-	ret = count;
-unlock:
-	mutex_unlock(&df->lock);
-	return ret;
+	return count;
 }
 static DEVICE_ATTR_RW(min_freq);
 
 static ssize_t max_freq_show(struct device *dev, struct device_attribute *attr,
 			     char *buf)
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index 8b92ccbd1962..d2c5bb7add0a 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -11,10 +11,11 @@ 
 #define __LINUX_DEVFREQ_H__
 
 #include <linux/device.h>
 #include <linux/notifier.h>
 #include <linux/pm_opp.h>
+#include <linux/pm_qos.h>
 
 #define DEVFREQ_NAME_LEN 16
 
 /* DEVFREQ governor name */
 #define DEVFREQ_GOV_SIMPLE_ONDEMAND	"simple_ondemand"
@@ -121,12 +122,12 @@  struct devfreq_dev_profile {
  *		devfreq.nb to the corresponding register notifier call chain.
  * @work:	delayed work for load monitoring.
  * @previous_freq:	previously configured frequency value.
  * @data:	Private data of the governor. The devfreq framework does not
  *		touch this.
- * @min_freq:	Limit minimum frequency requested by user (0: none)
- * @max_freq:	Limit maximum frequency requested by user (0: none)
+ * @min_freq_req:	Limit minimum frequency requested by user (0: none)
+ * @max_freq_req:	Limit maximum frequency requested by user (0: none)
  * @scaling_min_freq:	Limit minimum frequency requested by OPP interface
  * @scaling_max_freq:	Limit maximum frequency requested by OPP interface
  * @stop_polling:	 devfreq polling status of a device.
  * @suspend_freq:	 frequency of a device set during suspend phase.
  * @resume_freq:	 frequency of a device set in resume phase.
@@ -161,12 +162,12 @@  struct devfreq {
 	unsigned long previous_freq;
 	struct devfreq_dev_status last_status;
 
 	void *data; /* private data for governors */
 
-	unsigned long min_freq;
-	unsigned long max_freq;
+	struct dev_pm_qos_request min_freq_req;
+	struct dev_pm_qos_request max_freq_req;
 	unsigned long scaling_min_freq;
 	unsigned long scaling_max_freq;
 	bool stop_polling;
 
 	unsigned long suspend_freq;