diff mbox series

[PATCHv2] PM / devfreq: Add dev_pm_qos support

Message ID e45c28528ff941abb1f72fdb1eedf65fb3345c5a.1565274802.git.leonard.crestez@nxp.com (mailing list archive)
State Superseded
Headers show
Series [PATCHv2] PM / devfreq: Add dev_pm_qos support | expand

Commit Message

Leonard Crestez Aug. 8, 2019, 2:39 p.m. UTC
Add dev_pm_qos notifies to devfreq core in order to support frequency
limits via the dev_pm_qos_add_request.

Unlike the rest of devfreq the dev_pm_qos frequency is measured Khz:
this is consistent with current dev_pm_qos usage for cpufreq and allows
frequencies above 2Ghz (pm_qos expresses limits as s32).

Signed-off-by: Leonard Crestez <leonard.crestez@nxp.com>

---
Changes since v1:
 * Add doxygen comments for min_nb/max_nb
 * Remove notifiers on error/cleanup paths. Keep gotos simple by relying
on dev_pm_qos_remove_notifier ignoring notifiers which were not added.
Link to v1: https://patchwork.kernel.org/patch/11078475/

 drivers/devfreq/devfreq.c | 83 ++++++++++++++++++++++++++++++++++-----
 include/linux/devfreq.h   |  5 +++
 2 files changed, 79 insertions(+), 9 deletions(-)

Comments

Chanwoo Choi Aug. 13, 2019, 6:14 a.m. UTC | #1
Hi,

In case of cpufreq, cpufreq.c replace the body of store_min_freq()
and store_max_freq() by using struct dev_pm_qos_request instancce
with dev_pm_qos_update_request().

If you use the new way with dev_pm_qos_update_request() for
min_freq_store() and max_freq_store(), it doesn't need to
get the final frequency from three candidate frequencies.

In result, We only consider the following two candidate frequencies
as following:
1. "devfreq->scaling_min_freq" will contain the requirement
   from devfreq thermal throttling by OPP interface.
2. "devfreq->min_freq" will contain the requirements
   from both user input through sysfs and the dev_pm_qos_request.

On 19. 8. 8. 오후 11:39, Leonard Crestez wrote:
> Add dev_pm_qos notifies to devfreq core in order to support frequency
> limits via the dev_pm_qos_add_request.
>
> Unlike the rest of devfreq the dev_pm_qos frequency is measured Khz:
> this is consistent with current dev_pm_qos usage for cpufreq and allows
> frequencies above 2Ghz (pm_qos expresses limits as s32).
>
> Signed-off-by: Leonard Crestez <leonard.crestez@nxp.com>
>
> ---
> Changes since v1:
>  * Add doxygen comments for min_nb/max_nb
>  * Remove notifiers on error/cleanup paths. Keep gotos simple by relying
> on dev_pm_qos_remove_notifier ignoring notifiers which were not added.
> Link to v1: https://patchwork.kernel.org/patch/11078475/
>
>  drivers/devfreq/devfreq.c | 83 ++++++++++++++++++++++++++++++++++-----
>  include/linux/devfreq.h   |  5 +++
>  2 files changed, 79 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
> index 784c08e4f931..7f1f273562f4 100644
> --- a/drivers/devfreq/devfreq.c
> +++ b/drivers/devfreq/devfreq.c
> @@ -22,10 +22,11 @@
>  #include <linux/platform_device.h>
>  #include <linux/list.h>
>  #include <linux/printk.h>
>  #include <linux/hrtimer.h>
>  #include <linux/of.h>
> +#include <linux/pm_qos.h>
>  #include "governor.h"
>  
>  #define CREATE_TRACE_POINTS
>  #include <trace/events/devfreq.h>
>  
> @@ -96,10 +97,26 @@ static unsigned long find_available_max_freq(struct devfreq *devfreq)
>  		dev_pm_opp_put(opp);
>  
>  	return max_freq;
>  }
>  
> +static unsigned long get_effective_min_freq(struct devfreq *devfreq)
> +{
> +	return max3(devfreq->scaling_min_freq, devfreq->min_freq,
> +		1000 * (unsigned long)dev_pm_qos_read_value(
> +				devfreq->dev.parent,
> +				DEV_PM_QOS_MIN_FREQUENCY));

As I commented above, if we use the dev_pm_qos_request, it is not needed.

> +}
> +
> +static unsigned long get_effective_max_freq(struct devfreq *devfreq)
> +{
> +	return min3(devfreq->scaling_max_freq, devfreq->max_freq,
> +		1000 * (unsigned long)dev_pm_qos_read_value(
> +				devfreq->dev.parent,
> +				DEV_PM_QOS_MAX_FREQUENCY));
> +}

ditto.

> +
>  /**
>   * devfreq_get_freq_level() - Lookup freq_table for the frequency
>   * @devfreq:	the devfreq instance
>   * @freq:	the target frequency
>   */
> @@ -356,12 +373,12 @@ int update_devfreq(struct devfreq *devfreq)
>  	 *
>  	 * List from the highest priority
>  	 * max_freq
>  	 * min_freq
>  	 */
> -	max_freq = min(devfreq->scaling_max_freq, devfreq->max_freq);
> -	min_freq = max(devfreq->scaling_min_freq, devfreq->min_freq);
> +	max_freq = get_effective_max_freq(devfreq);
> +	min_freq = get_effective_min_freq(devfreq);
>  
>  	if (freq < min_freq) {
>  		freq = min_freq;
>  		flags &= ~DEVFREQ_FLAG_LEAST_UPPER_BOUND; /* Use GLB */
>  	}
> @@ -570,10 +587,31 @@ static int devfreq_notifier_call(struct notifier_block *nb, unsigned long type,
>  	mutex_unlock(&devfreq->lock);
>  
>  	return ret;
>  }
>  
> +static int devfreq_qos_notifier_call(struct devfreq *devfreq)
> +{
> +	int ret;
> +
> +	mutex_lock(&devfreq->lock);
> +	ret = update_devfreq(devfreq);
> +	mutex_unlock(&devfreq->lock);
> +
> +	return ret;
> +}
> +
> +static int devfreq_qos_min_notifier_call(struct notifier_block *nb, unsigned long val, void *ptr)

Please keep the under 80 line if there are no any special reason.

> +{
> +	return devfreq_qos_notifier_call(container_of(nb, struct devfreq, nb_min));

ditto.

> +}
> +
> +static int devfreq_qos_max_notifier_call(struct notifier_block *nb, unsigned long val, void *ptr)

ditto.

> +{
> +	return devfreq_qos_notifier_call(container_of(nb, struct devfreq, nb_max));

ditto.

> +}
> +
>  /**
>   * devfreq_dev_release() - Callback for struct device to release the device.
>   * @dev:	the devfreq device
>   *
>   * Remove devfreq from the list and release its resources.
> @@ -592,10 +630,14 @@ static void devfreq_dev_release(struct device *dev)
>  	mutex_unlock(&devfreq_list_lock);
>  
>  	if (devfreq->profile->exit)
>  		devfreq->profile->exit(devfreq->dev.parent);
>  
> +	dev_pm_qos_remove_notifier(devfreq->dev.parent, &devfreq->nb_max,
> +			DEV_PM_QOS_MAX_FREQUENCY);
> +	dev_pm_qos_remove_notifier(devfreq->dev.parent, &devfreq->nb_min,
> +			DEV_PM_QOS_MIN_FREQUENCY);
>  	mutex_destroy(&devfreq->lock);
>  	kfree(devfreq);
>  }
>  
>  /**
> @@ -636,21 +678,44 @@ struct devfreq *devfreq_add_device(struct device *dev,
>  		err = -ENOMEM;
>  		goto err_out;
>  	}
>  
>  	mutex_init(&devfreq->lock);
> -	mutex_lock(&devfreq->lock);
>  	devfreq->dev.parent = dev;
>  	devfreq->dev.class = devfreq_class;
>  	devfreq->dev.release = devfreq_dev_release;
>  	devfreq->profile = profile;
>  	strncpy(devfreq->governor_name, governor_name, DEVFREQ_NAME_LEN);
>  	devfreq->previous_freq = profile->initial_freq;
>  	devfreq->last_status.current_frequency = profile->initial_freq;
>  	devfreq->data = data;
>  	devfreq->nb.notifier_call = devfreq_notifier_call;
>  
> +	/*
> +	 * notifier from pm_qos
> +	 *
> +	 * initialized outside of devfreq->lock to avoid circular warning
> +	 * between devfreq->lock and dev_pm_qos_mtx
> +	 */
> +	devfreq->nb_min.notifier_call = devfreq_qos_min_notifier_call;
> +	devfreq->nb_max.notifier_call = devfreq_qos_max_notifier_call;
> +
> +	err = dev_pm_qos_add_notifier(devfreq->dev.parent, &devfreq->nb_min,
> +				      DEV_PM_QOS_MIN_FREQUENCY);
> +	if (err) {
> +		dev_err(dev, "Failed to register MIN QoS notifier: %d\n", err);
> +		goto err_dev;
> +	}
> +
> +	err = dev_pm_qos_add_notifier(devfreq->dev.parent, &devfreq->nb_max,
> +				      DEV_PM_QOS_MAX_FREQUENCY);
> +	if (err) {
> +		dev_err(dev, "Failed to register MAX QoS notifier: %d\n", err);
> +		goto err_dev;
> +	}
> +
> +	mutex_lock(&devfreq->lock);
>  	if (!devfreq->profile->max_state && !devfreq->profile->freq_table) {
>  		mutex_unlock(&devfreq->lock);
>  		err = set_freq_table(devfreq);
>  		if (err < 0)
>  			goto err_dev;
> @@ -741,10 +806,14 @@ struct devfreq *devfreq_add_device(struct device *dev,
>  	mutex_unlock(&devfreq_list_lock);
>  err_devfreq:
>  	devfreq_remove_device(devfreq);
>  	devfreq = NULL;
>  err_dev:
> +	dev_pm_qos_remove_notifier(devfreq->dev.parent, &devfreq->nb_max,
> +			DEV_PM_QOS_MAX_FREQUENCY);
> +	dev_pm_qos_remove_notifier(devfreq->dev.parent, &devfreq->nb_min,
> +			DEV_PM_QOS_MIN_FREQUENCY);
>  	kfree(devfreq);
>  err_out:
>  	return ERR_PTR(err);
>  }
>  EXPORT_SYMBOL(devfreq_add_device);
> @@ -1311,13 +1380,11 @@ static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr,
>  }
>  
>  static ssize_t min_freq_show(struct device *dev, struct device_attribute *attr,
>  			     char *buf)
>  {
> -	struct devfreq *df = to_devfreq(dev);
> -
> -	return sprintf(buf, "%lu\n", max(df->scaling_min_freq, df->min_freq));
> +	return sprintf(buf, "%lu\n", get_effective_min_freq(to_devfreq(dev)));
>  }
>  
>  static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr,
>  			      const char *buf, size_t count)
>  {
> @@ -1356,13 +1423,11 @@ static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr,
>  static DEVICE_ATTR_RW(min_freq);
>  
>  static ssize_t max_freq_show(struct device *dev, struct device_attribute *attr,
>  			     char *buf)
>  {
> -	struct devfreq *df = to_devfreq(dev);
> -
> -	return sprintf(buf, "%lu\n", min(df->scaling_max_freq, df->max_freq));
> +	return sprintf(buf, "%lu\n", get_effective_max_freq(to_devfreq(dev)));
>  }
>  static DEVICE_ATTR_RW(max_freq);
>  
>  static ssize_t available_frequencies_show(struct device *d,
>  					  struct device_attribute *attr,
> diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
> index 2bae9ed3c783..8b92ccbd1962 100644
> --- a/include/linux/devfreq.h
> +++ b/include/linux/devfreq.h
> @@ -134,10 +134,12 @@ struct devfreq_dev_profile {
>   * @total_trans:	Number of devfreq transitions
>   * @trans_table:	Statistics of devfreq transitions
>   * @time_in_state:	Statistics of devfreq states
>   * @last_stat_updated:	The last time stat updated
>   * @transition_notifier_list: list head of DEVFREQ_TRANSITION_NOTIFIER notifier
> + * @nb_min:		Notifier block for DEV_PM_QOS_MIN_FREQUENCY
> + * @nb_max:		Notifier block for DEV_PM_QOS_MAX_FREQUENCY
>   *
>   * This structure stores the devfreq information for a give device.
>   *
>   * Note that when a governor accesses entries in struct devfreq in its
>   * functions except for the context of callbacks defined in struct
> @@ -176,10 +178,13 @@ struct devfreq {
>  	unsigned int *trans_table;
>  	unsigned long *time_in_state;
>  	unsigned long last_stat_updated;
>  
>  	struct srcu_notifier_head transition_notifier_list;
> +
> +	struct notifier_block nb_min;
> +	struct notifier_block nb_max;
>  };
>  
>  struct devfreq_freqs {
>  	unsigned long old;
>  	unsigned long new;
>
Leonard Crestez Aug. 13, 2019, 11:27 a.m. UTC | #2
On 13.08.2019 09:10, Chanwoo Choi wrote:
> In case of cpufreq, cpufreq.c replace the body of store_min_freq()
> and store_max_freq() by using struct dev_pm_qos_request instancce
> with dev_pm_qos_update_request().
> 
> If you use the new way with dev_pm_qos_update_request() for
> min_freq_store() and max_freq_store(), it doesn't need to
> get the final frequency from three candidate frequencies.

Yes, I saw that but didn't implement the equivalent for devfreq because 
it's not clear what there is to gain.

Since dev_pm_qos is measured in khz it means that min_freq/max_req on 
sysfq would lose 3 significant digits, however those digits are probably 
useless anyway.

> In result, We only consider the following two candidate frequencies
> as following:
> 1. "devfreq->scaling_min_freq" will contain the requirement
>     from devfreq thermal throttling by OPP interface.

It's a bit awkward that the OPPs enable/disable API is not obviously 
specific to "thermal".

> 2. "devfreq->min_freq" will contain the requirements
>     from both user input through sysfs and the dev_pm_qos_request.
According to a comment on a previous version it would be useful to have 
a separate files for "constraint min/max freq" and "user min/max freq":

     https://patchwork.kernel.org/patch/11078475/#22805379

Combining min/max requests from dev_pm_qos and sysfs would make this 
harder to implement. I guess user_min_freq could be implemented by 
reading back the dev_pm_qos request but there would be no way to 
implement a qos_min_freq entry.

>> +static int devfreq_qos_min_notifier_call(struct notifier_block *nb, unsigned long val, void *ptr)
> 
> Please keep the under 80 line if there are no any special reason.

OK, will check.
Chanwoo Choi Aug. 14, 2019, 1:06 a.m. UTC | #3
Hi,

On 19. 8. 13. 오후 8:27, Leonard Crestez wrote:
> On 13.08.2019 09:10, Chanwoo Choi wrote:
>> In case of cpufreq, cpufreq.c replace the body of store_min_freq()
>> and store_max_freq() by using struct dev_pm_qos_request instancce
>> with dev_pm_qos_update_request().
>>
>> If you use the new way with dev_pm_qos_update_request() for
>> min_freq_store() and max_freq_store(), it doesn't need to
>> get the final frequency from three candidate frequencies.
> 
> Yes, I saw that but didn't implement the equivalent for devfreq because 
> it's not clear what there is to gain.

I think that it is clear. Just use the dev_pm_qos_request interface
for both user input through sysfs and device input with qos request.
Already PM_QOS has the feature to get the final freuency among
the multiple request. When use the dev_pm_qos request, the devfreq
doesn't need to compare between user input and device input with qos.
It make devfreq core more clear and simple

> 
> Since dev_pm_qos is measured in khz it means that min_freq/max_req on 
> sysfq would lose 3 significant digits, however those digits are probably 
> useless anyway.

I think that it doesn't matter. This patch already considers the this issue
by using '* 1000'. We can get either KHz or MHz with additional operation.
I think that it is not problem.

> 
>> In result, We only consider the following two candidate frequencies
>> as following:
>> 1. "devfreq->scaling_min_freq" will contain the requirement
>>     from devfreq thermal throttling by OPP interface.
> 
> It's a bit awkward that the OPPs enable/disable API is not obviously 
> specific to "thermal".

driver/thermal/devfreq_cooling.c uses the OPP interface to enable/disable
the OPP entries according to the thermal throttling policy. 

> 
>> 2. "devfreq->min_freq" will contain the requirements
>>     from both user input through sysfs and the dev_pm_qos_request.
> According to a comment on a previous version it would be useful to have 
> a separate files for "constraint min/max freq" and "user min/max freq":
> 
>      https://patchwork.kernel.org/patch/11078475/#22805379

Actually, I think that I want to use the only dev_pm_qos_request
for all external request like devfreq cooling of thermal,
user input through sysfs and device request with dev_pm_qos_request.

Already, dev_pm_qos_request is designed to consider the multiple requirements.
We don't need to use the various method (OPP interface, sysfs input, dev_pm_qos)
because make it more simple and easy.

I think that after finished the review of this patch, I will do refactor the devfreq_cooling.c
by using the dev_pm_qos_request. Or, if there are some volunteeer,

> 
> Combining min/max requests from dev_pm_qos and sysfs would make this 
> harder to implement. I guess user_min_freq could be implemented by 

I think that it is not difficult. Just make the different dev_pm_qos_request
instances. When qos has the multiple request from one more dev_pm_qos_request,
just get the value by using dev_pm_qos_read_value().
- a dev_pm_qos_request for user input
- b dev_pm_qos_request for device request on other device driver


> reading back the dev_pm_qos request but there would be no way to 
> implement a qos_min_freq entry.

I don't understand. Just devfreq show the min freq from dev_pm_qos_request
which contains the all requirement of minimum frequency.

If there are no any request to dev_pm_qos_request, we can just
get the minimum frequency from dev_pm_qos_request because 
the devfreq device would call the dev_pm_qos_update_request(..., min_freq)
on the probe time.

If I know the wrong information, please let me know.


> 
>>> +static int devfreq_qos_min_notifier_call(struct notifier_block *nb, unsigned long val, void *ptr)
>>
>> Please keep the under 80 line if there are no any special reason.
> 
> OK, will check.
> 
>
Chanwoo Choi Aug. 14, 2019, 1:17 a.m. UTC | #4
On 19. 8. 14. 오전 10:06, Chanwoo Choi wrote:
> Hi,
> 
> On 19. 8. 13. 오후 8:27, Leonard Crestez wrote:
>> On 13.08.2019 09:10, Chanwoo Choi wrote:
>>> In case of cpufreq, cpufreq.c replace the body of store_min_freq()
>>> and store_max_freq() by using struct dev_pm_qos_request instancce
>>> with dev_pm_qos_update_request().
>>>
>>> If you use the new way with dev_pm_qos_update_request() for
>>> min_freq_store() and max_freq_store(), it doesn't need to
>>> get the final frequency from three candidate frequencies.
>>
>> Yes, I saw that but didn't implement the equivalent for devfreq because 
>> it's not clear what there is to gain.
> 
> I think that it is clear. Just use the dev_pm_qos_request interface
> for both user input through sysfs and device input with qos request.
> Already PM_QOS has the feature to get the final freuency among
> the multiple request. When use the dev_pm_qos request, the devfreq
> doesn't need to compare between user input and device input with qos.
> It make devfreq core more clear and simple
> 
>>
>> Since dev_pm_qos is measured in khz it means that min_freq/max_req on 
>> sysfq would lose 3 significant digits, however those digits are probably 
>> useless anyway.
> 
> I think that it doesn't matter. This patch already considers the this issue
> by using '* 1000'. We can get either KHz or MHz with additional operation.
> I think that it is not problem.
> 
>>
>>> In result, We only consider the following two candidate frequencies
>>> as following:
>>> 1. "devfreq->scaling_min_freq" will contain the requirement
>>>     from devfreq thermal throttling by OPP interface.
>>
>> It's a bit awkward that the OPPs enable/disable API is not obviously 
>> specific to "thermal".
> 
> driver/thermal/devfreq_cooling.c uses the OPP interface to enable/disable
> the OPP entries according to the thermal throttling policy. 
> 
>>
>>> 2. "devfreq->min_freq" will contain the requirements
>>>     from both user input through sysfs and the dev_pm_qos_request.
>> According to a comment on a previous version it would be useful to have 
>> a separate files for "constraint min/max freq" and "user min/max freq":
>>
>>      https://patchwork.kernel.org/patch/11078475/#22805379
> 
> Actually, I think that I want to use the only dev_pm_qos_request
> for all external request like devfreq cooling of thermal,
> user input through sysfs and device request with dev_pm_qos_request.
> 
> Already, dev_pm_qos_request is designed to consider the multiple requirements.
> We don't need to use the various method (OPP interface, sysfs input, dev_pm_qos)
> because make it more simple and easy.
> 
> I think that after finished the review of this patch, I will do refactor the devfreq_cooling.c
> by using the dev_pm_qos_request. Or, if there are some volunteeer,

Sorry, I would withdraw the this opinion about replacing
the OPP enable/disable interface with the dev_pm_qos_request
because even if devfreq-cooling.c needs the 'dev' instance
to use the dev_pm_qos_request method, it is not clear until now.
It needs how to get the device instance of devfreq on device-tree.

Keep discussing without it. 

> 
>>
>> Combining min/max requests from dev_pm_qos and sysfs would make this 
>> harder to implement. I guess user_min_freq could be implemented by 
> 
> I think that it is not difficult. Just make the different dev_pm_qos_request
> instances. When qos has the multiple request from one more dev_pm_qos_request,
> just get the value by using dev_pm_qos_read_value().
> - a dev_pm_qos_request for user input
> - b dev_pm_qos_request for device request on other device driver
> 
> 
>> reading back the dev_pm_qos request but there would be no way to 
>> implement a qos_min_freq entry.
> 
> I don't understand. Just devfreq show the min freq from dev_pm_qos_request
> which contains the all requirement of minimum frequency.
> 
> If there are no any request to dev_pm_qos_request, we can just
> get the minimum frequency from dev_pm_qos_request because 
> the devfreq device would call the dev_pm_qos_update_request(..., min_freq)
> on the probe time.
> 
> If I know the wrong information, please let me know.
> 
> 
>>
>>>> +static int devfreq_qos_min_notifier_call(struct notifier_block *nb, unsigned long val, void *ptr)
>>>
>>> Please keep the under 80 line if there are no any special reason.
>>
>> OK, will check.
>>
>>
> 
>
Leonard Crestez Aug. 20, 2019, 3:26 p.m. UTC | #5
On 8/14/2019 4:14 AM, Chanwoo Choi wrote:
> On 19. 8. 14. 오전 10:06, Chanwoo Choi wrote:
>> On 19. 8. 13. 오후 8:27, Leonard Crestez wrote:
>>> On 13.08.2019 09:10, Chanwoo Choi wrote:
>>>> In case of cpufreq, cpufreq.c replace the body of store_min_freq()
>>>> and store_max_freq() by using struct dev_pm_qos_request instancce
>>>> with dev_pm_qos_update_request().
>>>>
>>>> If you use the new way with dev_pm_qos_update_request() for
>>>> min_freq_store() and max_freq_store(), it doesn't need to
>>>> get the final frequency from three candidate frequencies.
>>>
>>> Yes, I saw that but didn't implement the equivalent for devfreq because
>>> it's not clear what there is to gain.
>>
>> I think that it is clear. Just use the dev_pm_qos_request interface
>> for both user input through sysfs and device input with qos request.
>> Already PM_QOS has the feature to get the final freuency among
>> the multiple request. When use the dev_pm_qos request, the devfreq
>> doesn't need to compare between user input and device input with qos.
>> It make devfreq core more clear and simple

>>> Since dev_pm_qos is measured in khz it means that min_freq/max_req on
>>> sysfq would lose 3 significant digits, however those digits are probably
>>> useless anyway.
>>
>> I think that it doesn't matter. This patch already considers the this issue
>> by using '* 1000'. We can get either KHz or MHz with additional operation.
>> I think that it is not problem.

It introduces the following issue:

# echo 333333333 > /sys/class/devfreq/devfreq0/min_freq
# cat /sys/class/devfreq/devfreq0/min_freq
333333000

Changing rounding rules could confuse userspace tools. This is not 
entirely a new issue because freq values which are not an integer number 
of khz are likely not an integer number of hz either.

>> Actually, I think that I want to use the only dev_pm_qos_request
>> for all external request like devfreq cooling of thermal,
>> user input through sysfs and device request with dev_pm_qos_request.
>>
>> Already, dev_pm_qos_request is designed to consider the multiple requirements.
>> We don't need to use the various method (OPP interface, sysfs input, dev_pm_qos)
>> because make it more simple and easy.
>>
>> I think that after finished the review of this patch, I will do refactor the devfreq_cooling.c
>> by using the dev_pm_qos_request. Or, if there are some volunteeer,
> 
> Sorry, I would withdraw the this opinion about replacing
> the OPP enable/disable interface with the dev_pm_qos_request
> because even if devfreq-cooling.c needs the 'dev' instance
> to use the dev_pm_qos_request method, it is not clear until now.
> It needs how to get the device instance of devfreq on device-tree.

I looked a bit at the devfreq-cooling implementation and it seems like 
there aren't any users in upstream?

As far as I can tell a devfreq implementation needs to call 
of_devfreq_cooling_register and then the devfreq cooling code could 
register a dev_pm_qos request on devfreq->dev.parent. I'm not sure I 
understand what problem you see.

--
Regards,
Leonard
Chanwoo Choi Aug. 21, 2019, 1:20 a.m. UTC | #6
On 19. 8. 21. 오전 12:26, Leonard Crestez wrote:
> On 8/14/2019 4:14 AM, Chanwoo Choi wrote:
>> On 19. 8. 14. 오전 10:06, Chanwoo Choi wrote:
>>> On 19. 8. 13. 오후 8:27, Leonard Crestez wrote:
>>>> On 13.08.2019 09:10, Chanwoo Choi wrote:
>>>>> In case of cpufreq, cpufreq.c replace the body of store_min_freq()
>>>>> and store_max_freq() by using struct dev_pm_qos_request instancce
>>>>> with dev_pm_qos_update_request().
>>>>>
>>>>> If you use the new way with dev_pm_qos_update_request() for
>>>>> min_freq_store() and max_freq_store(), it doesn't need to
>>>>> get the final frequency from three candidate frequencies.
>>>>
>>>> Yes, I saw that but didn't implement the equivalent for devfreq because
>>>> it's not clear what there is to gain.
>>>
>>> I think that it is clear. Just use the dev_pm_qos_request interface
>>> for both user input through sysfs and device input with qos request.
>>> Already PM_QOS has the feature to get the final freuency among
>>> the multiple request. When use the dev_pm_qos request, the devfreq
>>> doesn't need to compare between user input and device input with qos.
>>> It make devfreq core more clear and simple
> 
>>>> Since dev_pm_qos is measured in khz it means that min_freq/max_req on
>>>> sysfq would lose 3 significant digits, however those digits are probably
>>>> useless anyway.
>>>
>>> I think that it doesn't matter. This patch already considers the this issue
>>> by using '* 1000'. We can get either KHz or MHz with additional operation.
>>> I think that it is not problem.
> 
> It introduces the following issue:
> 
> # echo 333333333 > /sys/class/devfreq/devfreq0/min_freq
> # cat /sys/class/devfreq/devfreq0/min_freq
> 333333000
> 
> Changing rounding rules could confuse userspace tools. This is not 
> entirely a new issue because freq values which are not an integer number 
> of khz are likely not an integer number of hz either.

As I knew that, user don't need to enter the perfect supported clock
of devfreq0 because the final frequency is decided by device driver
with some limitations like the range of h/w clock.

The user can input the wanted frequency like 333333333,
but, the device driver try to find the similar frequency with policy
and the can decide the final supported frequency because the h/w clock
for devfreq0 might not support the perfect same clock frequency of user input.

Also, if it is the problem to lose the 3 significant digits, 
it should be fixed on dev_pm_qos.

> 
>>> Actually, I think that I want to use the only dev_pm_qos_request
>>> for all external request like devfreq cooling of thermal,
>>> user input through sysfs and device request with dev_pm_qos_request.
>>>
>>> Already, dev_pm_qos_request is designed to consider the multiple requirements.
>>> We don't need to use the various method (OPP interface, sysfs input, dev_pm_qos)
>>> because make it more simple and easy.
>>>
>>> I think that after finished the review of this patch, I will do refactor the devfreq_cooling.c
>>> by using the dev_pm_qos_request. Or, if there are some volunteeer,
>>
>> Sorry, I would withdraw the this opinion about replacing
>> the OPP enable/disable interface with the dev_pm_qos_request
>> because even if devfreq-cooling.c needs the 'dev' instance
>> to use the dev_pm_qos_request method, it is not clear until now.
>> It needs how to get the device instance of devfreq on device-tree.
> 
> I looked a bit at the devfreq-cooling implementation and it seems like 
> there aren't any users in upstream?

Right. there are no upstream patch. But, ARM developer contributed
the devfreq-cooling.c in order to control ARM Mali frequency
according to temperature. If you want, you can check
the reference code from ARM Mali kernel driver.

> 
> As far as I can tell a devfreq implementation needs to call 
> of_devfreq_cooling_register and then the devfreq cooling code could 
> register a dev_pm_qos request on devfreq->dev.parent. I'm not sure I 
> understand what problem you see.

Ah. you're right. It is my misunderstand. I though that there are no
any way to get the 'devfreq' instance from device tree. But, I checked
the devfreq-cooling.c again, as you commented, the devfreq-cooling.c
provides the of_devfreq_cooling_reigster().
diff mbox series

Patch

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 784c08e4f931..7f1f273562f4 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -22,10 +22,11 @@ 
 #include <linux/platform_device.h>
 #include <linux/list.h>
 #include <linux/printk.h>
 #include <linux/hrtimer.h>
 #include <linux/of.h>
+#include <linux/pm_qos.h>
 #include "governor.h"
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/devfreq.h>
 
@@ -96,10 +97,26 @@  static unsigned long find_available_max_freq(struct devfreq *devfreq)
 		dev_pm_opp_put(opp);
 
 	return max_freq;
 }
 
+static unsigned long get_effective_min_freq(struct devfreq *devfreq)
+{
+	return max3(devfreq->scaling_min_freq, devfreq->min_freq,
+		1000 * (unsigned long)dev_pm_qos_read_value(
+				devfreq->dev.parent,
+				DEV_PM_QOS_MIN_FREQUENCY));
+}
+
+static unsigned long get_effective_max_freq(struct devfreq *devfreq)
+{
+	return min3(devfreq->scaling_max_freq, devfreq->max_freq,
+		1000 * (unsigned long)dev_pm_qos_read_value(
+				devfreq->dev.parent,
+				DEV_PM_QOS_MAX_FREQUENCY));
+}
+
 /**
  * devfreq_get_freq_level() - Lookup freq_table for the frequency
  * @devfreq:	the devfreq instance
  * @freq:	the target frequency
  */
@@ -356,12 +373,12 @@  int update_devfreq(struct devfreq *devfreq)
 	 *
 	 * List from the highest priority
 	 * max_freq
 	 * min_freq
 	 */
-	max_freq = min(devfreq->scaling_max_freq, devfreq->max_freq);
-	min_freq = max(devfreq->scaling_min_freq, devfreq->min_freq);
+	max_freq = get_effective_max_freq(devfreq);
+	min_freq = get_effective_min_freq(devfreq);
 
 	if (freq < min_freq) {
 		freq = min_freq;
 		flags &= ~DEVFREQ_FLAG_LEAST_UPPER_BOUND; /* Use GLB */
 	}
@@ -570,10 +587,31 @@  static int devfreq_notifier_call(struct notifier_block *nb, unsigned long type,
 	mutex_unlock(&devfreq->lock);
 
 	return ret;
 }
 
+static int devfreq_qos_notifier_call(struct devfreq *devfreq)
+{
+	int ret;
+
+	mutex_lock(&devfreq->lock);
+	ret = update_devfreq(devfreq);
+	mutex_unlock(&devfreq->lock);
+
+	return ret;
+}
+
+static int devfreq_qos_min_notifier_call(struct notifier_block *nb, unsigned long val, void *ptr)
+{
+	return devfreq_qos_notifier_call(container_of(nb, struct devfreq, nb_min));
+}
+
+static int devfreq_qos_max_notifier_call(struct notifier_block *nb, unsigned long val, void *ptr)
+{
+	return devfreq_qos_notifier_call(container_of(nb, struct devfreq, nb_max));
+}
+
 /**
  * devfreq_dev_release() - Callback for struct device to release the device.
  * @dev:	the devfreq device
  *
  * Remove devfreq from the list and release its resources.
@@ -592,10 +630,14 @@  static void devfreq_dev_release(struct device *dev)
 	mutex_unlock(&devfreq_list_lock);
 
 	if (devfreq->profile->exit)
 		devfreq->profile->exit(devfreq->dev.parent);
 
+	dev_pm_qos_remove_notifier(devfreq->dev.parent, &devfreq->nb_max,
+			DEV_PM_QOS_MAX_FREQUENCY);
+	dev_pm_qos_remove_notifier(devfreq->dev.parent, &devfreq->nb_min,
+			DEV_PM_QOS_MIN_FREQUENCY);
 	mutex_destroy(&devfreq->lock);
 	kfree(devfreq);
 }
 
 /**
@@ -636,21 +678,44 @@  struct devfreq *devfreq_add_device(struct device *dev,
 		err = -ENOMEM;
 		goto err_out;
 	}
 
 	mutex_init(&devfreq->lock);
-	mutex_lock(&devfreq->lock);
 	devfreq->dev.parent = dev;
 	devfreq->dev.class = devfreq_class;
 	devfreq->dev.release = devfreq_dev_release;
 	devfreq->profile = profile;
 	strncpy(devfreq->governor_name, governor_name, DEVFREQ_NAME_LEN);
 	devfreq->previous_freq = profile->initial_freq;
 	devfreq->last_status.current_frequency = profile->initial_freq;
 	devfreq->data = data;
 	devfreq->nb.notifier_call = devfreq_notifier_call;
 
+	/*
+	 * notifier from pm_qos
+	 *
+	 * initialized outside of devfreq->lock to avoid circular warning
+	 * between devfreq->lock and dev_pm_qos_mtx
+	 */
+	devfreq->nb_min.notifier_call = devfreq_qos_min_notifier_call;
+	devfreq->nb_max.notifier_call = devfreq_qos_max_notifier_call;
+
+	err = dev_pm_qos_add_notifier(devfreq->dev.parent, &devfreq->nb_min,
+				      DEV_PM_QOS_MIN_FREQUENCY);
+	if (err) {
+		dev_err(dev, "Failed to register MIN QoS notifier: %d\n", err);
+		goto err_dev;
+	}
+
+	err = dev_pm_qos_add_notifier(devfreq->dev.parent, &devfreq->nb_max,
+				      DEV_PM_QOS_MAX_FREQUENCY);
+	if (err) {
+		dev_err(dev, "Failed to register MAX QoS notifier: %d\n", err);
+		goto err_dev;
+	}
+
+	mutex_lock(&devfreq->lock);
 	if (!devfreq->profile->max_state && !devfreq->profile->freq_table) {
 		mutex_unlock(&devfreq->lock);
 		err = set_freq_table(devfreq);
 		if (err < 0)
 			goto err_dev;
@@ -741,10 +806,14 @@  struct devfreq *devfreq_add_device(struct device *dev,
 	mutex_unlock(&devfreq_list_lock);
 err_devfreq:
 	devfreq_remove_device(devfreq);
 	devfreq = NULL;
 err_dev:
+	dev_pm_qos_remove_notifier(devfreq->dev.parent, &devfreq->nb_max,
+			DEV_PM_QOS_MAX_FREQUENCY);
+	dev_pm_qos_remove_notifier(devfreq->dev.parent, &devfreq->nb_min,
+			DEV_PM_QOS_MIN_FREQUENCY);
 	kfree(devfreq);
 err_out:
 	return ERR_PTR(err);
 }
 EXPORT_SYMBOL(devfreq_add_device);
@@ -1311,13 +1380,11 @@  static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr,
 }
 
 static ssize_t min_freq_show(struct device *dev, struct device_attribute *attr,
 			     char *buf)
 {
-	struct devfreq *df = to_devfreq(dev);
-
-	return sprintf(buf, "%lu\n", max(df->scaling_min_freq, df->min_freq));
+	return sprintf(buf, "%lu\n", get_effective_min_freq(to_devfreq(dev)));
 }
 
 static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr,
 			      const char *buf, size_t count)
 {
@@ -1356,13 +1423,11 @@  static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr,
 static DEVICE_ATTR_RW(min_freq);
 
 static ssize_t max_freq_show(struct device *dev, struct device_attribute *attr,
 			     char *buf)
 {
-	struct devfreq *df = to_devfreq(dev);
-
-	return sprintf(buf, "%lu\n", min(df->scaling_max_freq, df->max_freq));
+	return sprintf(buf, "%lu\n", get_effective_max_freq(to_devfreq(dev)));
 }
 static DEVICE_ATTR_RW(max_freq);
 
 static ssize_t available_frequencies_show(struct device *d,
 					  struct device_attribute *attr,
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index 2bae9ed3c783..8b92ccbd1962 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -134,10 +134,12 @@  struct devfreq_dev_profile {
  * @total_trans:	Number of devfreq transitions
  * @trans_table:	Statistics of devfreq transitions
  * @time_in_state:	Statistics of devfreq states
  * @last_stat_updated:	The last time stat updated
  * @transition_notifier_list: list head of DEVFREQ_TRANSITION_NOTIFIER notifier
+ * @nb_min:		Notifier block for DEV_PM_QOS_MIN_FREQUENCY
+ * @nb_max:		Notifier block for DEV_PM_QOS_MAX_FREQUENCY
  *
  * This structure stores the devfreq information for a give device.
  *
  * Note that when a governor accesses entries in struct devfreq in its
  * functions except for the context of callbacks defined in struct
@@ -176,10 +178,13 @@  struct devfreq {
 	unsigned int *trans_table;
 	unsigned long *time_in_state;
 	unsigned long last_stat_updated;
 
 	struct srcu_notifier_head transition_notifier_list;
+
+	struct notifier_block nb_min;
+	struct notifier_block nb_max;
 };
 
 struct devfreq_freqs {
 	unsigned long old;
 	unsigned long new;