diff mbox series

[v2,3/5] thermal: devfreq_cooling: add new registration functions with Energy Model

Message ID 20201118120358.17150-4-lukasz.luba@arm.com (mailing list archive)
State New, archived
Delegated to: Daniel Lezcano
Headers show
Series Thermal devfreq cooling improvements with Energy Model | expand

Commit Message

Lukasz Luba Nov. 18, 2020, 12:03 p.m. UTC
The Energy Model (EM) framework supports devices such as Devfreq. Create
new registration functions which automatically register EM for the thermal
devfreq_cooling devices. This patch prepares the code for coming changes
which are going to replace old power model with the new EM.

Signed-off-by: Lukasz Luba <lukasz.luba@arm.com>
---
 drivers/thermal/devfreq_cooling.c | 99 ++++++++++++++++++++++++++++++-
 include/linux/devfreq_cooling.h   | 22 +++++++
 2 files changed, 120 insertions(+), 1 deletion(-)

Comments

Ionela Voinescu Dec. 2, 2020, 10:24 a.m. UTC | #1
Hi Lukasz,

On Wednesday 18 Nov 2020 at 12:03:56 (+0000), Lukasz Luba wrote:
> + * Register a devfreq cooling device and attempt to register Energy Model. The
> + * available OPPs must be registered for the device.
> + *
> + * If @dfc_power is provided, the cooling device is registered with the
> + * power extensions. If @em_cb is provided it will be called for each OPP to
> + * calculate power value and cost. If @em_cb is not provided then simple Energy
> + * Model is going to be used, which requires "dynamic-power-coefficient" a
> + * devicetree property.
> + */
> +struct thermal_cooling_device *
> +devfreq_cooling_em_register_power(struct devfreq *df,
> +				  struct devfreq_cooling_power *dfc_power,
> +				  struct em_data_callback *em_cb)
> +{
> +	struct thermal_cooling_device *cdev;
> +	struct devfreq_cooling_device *dfc;
> +	struct device_node *np = NULL;
> +	struct device *dev;
> +	int nr_opp, ret;
> +
> +	if (IS_ERR_OR_NULL(df))
> +		return ERR_PTR(-EINVAL);
> +
> +	dev = df->dev.parent;
> +
> +	if (em_cb) {
> +		nr_opp = dev_pm_opp_get_opp_count(dev);
> +		if (nr_opp <= 0) {
> +			dev_err(dev, "No valid OPPs found\n");
> +			return ERR_PTR(-EINVAL);
> +		}
> +
> +		ret = em_dev_register_perf_domain(dev, nr_opp, em_cb, NULL, false);
> +	} else {
> +		ret = dev_pm_opp_of_register_em(dev, NULL);
> +	}
> +
> +	if (ret)
> +		dev_warn(dev, "Unable to register EM for devfreq cooling device (%d)\n",
> +			 ret);
> +
> +	if (dev->of_node)
> +		np = of_node_get(dev->of_node);
> +

Should np be checked before use? I'm not sure if it's better to do the
assign first and then the check on np before use. It depends on the
consequences of passing a NULL node pointer later on.

> +	cdev = of_devfreq_cooling_register_power(np, df, dfc_power);
> +
> +	if (np)
> +		of_node_put(np);
> +
> +	if (IS_ERR_OR_NULL(cdev)) {
> +		if (!ret)
> +			em_dev_unregister_perf_domain(dev);
> +	} else {
> +		dfc = cdev->devdata;
> +		dfc->em_registered = !ret;
> +	}
> +
> +	return cdev;
> +}
> +EXPORT_SYMBOL_GPL(devfreq_cooling_em_register_power);
> +
> +/**
> + * devfreq_cooling_em_register() - Register devfreq cooling device together
> + *				with Energy Model.
> + * @df:		Pointer to devfreq device.
> + * @em_cb:	Callback functions providing the data of the Energy Model
> + *
> + * This function attempts to register Energy Model for devfreq device and then
> + * register the devfreq cooling device.
> + */
> +struct thermal_cooling_device *
> +devfreq_cooling_em_register(struct devfreq *df, struct em_data_callback *em_cb)
> +{
> +	return devfreq_cooling_em_register_power(df, NULL, em_cb);
> +}
> +EXPORT_SYMBOL_GPL(devfreq_cooling_em_register);
> +
>  /**
>   * devfreq_cooling_unregister() - Unregister devfreq cooling device.
>   * @cdev: Pointer to devfreq cooling device to unregister.
> + *
> + * Unregisters devfreq cooling device and related Energy Model if it was
> + * present.
>   */
>  void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
>  {
>  	struct devfreq_cooling_device *dfc;
> +	struct device *dev;
>  
> -	if (!cdev)
> +	if (IS_ERR_OR_NULL(cdev))
>  		return;
>  
>  	dfc = cdev->devdata;
> +	dev = dfc->devfreq->dev.parent;
>  
>  	thermal_cooling_device_unregister(dfc->cdev);
>  	ida_simple_remove(&devfreq_ida, dfc->id);
>  	dev_pm_qos_remove_request(&dfc->req_max_freq);
> +
> +	if (dfc->em_registered)
> +		em_dev_unregister_perf_domain(dev);
> +
>  	kfree(dfc->power_table);
>  	kfree(dfc->freq_table);
>  
> diff --git a/include/linux/devfreq_cooling.h b/include/linux/devfreq_cooling.h
> index 9df2dfca68dd..19868fb922f1 100644
> --- a/include/linux/devfreq_cooling.h
> +++ b/include/linux/devfreq_cooling.h
> @@ -11,6 +11,7 @@
>  #define __DEVFREQ_COOLING_H__
>  
>  #include <linux/devfreq.h>
> +#include <linux/energy_model.h>
>  #include <linux/thermal.h>
>  
>  
> @@ -65,6 +66,13 @@ struct thermal_cooling_device *
>  of_devfreq_cooling_register(struct device_node *np, struct devfreq *df);
>  struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df);
>  void devfreq_cooling_unregister(struct thermal_cooling_device *dfc);
> +struct thermal_cooling_device *
> +devfreq_cooling_em_register_power(struct devfreq *df,
> +				  struct devfreq_cooling_power *dfc_power,
> +				  struct em_data_callback *em_cb);
> +struct thermal_cooling_device *
> +devfreq_cooling_em_register(struct devfreq *df,
> +			    struct em_data_callback *em_cb);
>  
>  #else /* !CONFIG_DEVFREQ_THERMAL */
>  
> @@ -87,6 +95,20 @@ devfreq_cooling_register(struct devfreq *df)
>  	return ERR_PTR(-EINVAL);
>  }
>  
> +static inline struct thermal_cooling_device *
> +devfreq_cooling_em_register_power(struct devfreq *df,
> +				  struct devfreq_cooling_power *dfc_power,
> +				  struct em_data_callback *em_cb)
> +{
> +	return ERR_PTR(-EINVAL);
> +}
> +
> +static inline struct thermal_cooling_device *
> +devfreq_cooling_em_register(struct devfreq *df,	struct em_data_callback *em_cb)
> +{
> +	return ERR_PTR(-EINVAL);
> +}
> +
>  static inline void
>  devfreq_cooling_unregister(struct thermal_cooling_device *dfc)
>  {
> -- 
> 2.17.1
> 

Otherwise it looks good to me:

Reviewed-by: Ionela Voinescu <ionela.voinescu@arm.com>

Ionela.
Lukasz Luba Dec. 2, 2020, 11:14 a.m. UTC | #2
Hi Ionela,

On 12/2/20 10:24 AM, Ionela Voinescu wrote:
> Hi Lukasz,
> 
> On Wednesday 18 Nov 2020 at 12:03:56 (+0000), Lukasz Luba wrote:

[snip]

>> +	struct device_node *np = NULL;

[snip]

>> +
>> +	if (dev->of_node)
>> +		np = of_node_get(dev->of_node);
>> +
> 
> Should np be checked before use? I'm not sure if it's better to do the
> assign first and then the check on np before use. It depends on the
> consequences of passing a NULL node pointer later on.

The np is actually dev->of_node (or left NULL, as set at the begging).
The only meaning of the line above is to increment the counter and then
decrement if CONFIG_OF_DYNAMIC was used.
The devfreq_cooling_register() has np = NULL and the registration can
handle it, so we should be OK here as well.

> 
>> +	cdev = of_devfreq_cooling_register_power(np, df, dfc_power);
>> +
>> +	if (np)
>> +		of_node_put(np);
>> +

[snip]

>>
> 
> Otherwise it looks good to me:
> 
> Reviewed-by: Ionela Voinescu <ionela.voinescu@arm.com>

Thank you for the review.

Regards,
Lukasz

> 
> Ionela.
>
Ionela Voinescu Dec. 2, 2020, 11:49 a.m. UTC | #3
On Wednesday 02 Dec 2020 at 11:14:02 (+0000), Lukasz Luba wrote:
> Hi Ionela,
> 
> On 12/2/20 10:24 AM, Ionela Voinescu wrote:
> > Hi Lukasz,
> > 
> > On Wednesday 18 Nov 2020 at 12:03:56 (+0000), Lukasz Luba wrote:
> 
> [snip]
> 
> > > +	struct device_node *np = NULL;
> 
> [snip]
> 
> > > +
> > > +	if (dev->of_node)
> > > +		np = of_node_get(dev->of_node);
> > > +
> > 
> > Should np be checked before use? I'm not sure if it's better to do the
> > assign first and then the check on np before use. It depends on the
> > consequences of passing a NULL node pointer later on.
> 
> The np is actually dev->of_node (or left NULL, as set at the begging).
> The only meaning of the line above is to increment the counter and then
> decrement if CONFIG_OF_DYNAMIC was used.
> The devfreq_cooling_register() has np = NULL and the registration can
> handle it, so we should be OK here as well.
> 

Yes, I just wanted to make sure later registration can handle np = NULL,
or whether we need to bail out.

In this case, you can drop both ifs - for (dev->of_node) before get and
for np before put below, as of_node_get/of_node_put can handle NULL
pointers themselves.

Thanks,
Ionela.

> > 
> > > +	cdev = of_devfreq_cooling_register_power(np, df, dfc_power);
> > > +
> > > +	if (np)
> > > +		of_node_put(np);
> > > +
> 
> [snip]
> 
> > > 
> > 
> > Otherwise it looks good to me:
> > 
> > Reviewed-by: Ionela Voinescu <ionela.voinescu@arm.com>
> 
> Thank you for the review.
> 
> Regards,
> Lukasz
> 
> > 
> > Ionela.
> >
Lukasz Luba Dec. 2, 2020, 11:54 a.m. UTC | #4
On 12/2/20 11:49 AM, Ionela Voinescu wrote:
> On Wednesday 02 Dec 2020 at 11:14:02 (+0000), Lukasz Luba wrote:
>> Hi Ionela,
>>
>> On 12/2/20 10:24 AM, Ionela Voinescu wrote:
>>> Hi Lukasz,
>>>
>>> On Wednesday 18 Nov 2020 at 12:03:56 (+0000), Lukasz Luba wrote:
>>
>> [snip]
>>
>>>> +	struct device_node *np = NULL;
>>
>> [snip]
>>
>>>> +
>>>> +	if (dev->of_node)
>>>> +		np = of_node_get(dev->of_node);
>>>> +
>>>
>>> Should np be checked before use? I'm not sure if it's better to do the
>>> assign first and then the check on np before use. It depends on the
>>> consequences of passing a NULL node pointer later on.
>>
>> The np is actually dev->of_node (or left NULL, as set at the begging).
>> The only meaning of the line above is to increment the counter and then
>> decrement if CONFIG_OF_DYNAMIC was used.
>> The devfreq_cooling_register() has np = NULL and the registration can
>> handle it, so we should be OK here as well.
>>
> 
> Yes, I just wanted to make sure later registration can handle np = NULL,
> or whether we need to bail out.
> 
> In this case, you can drop both ifs - for (dev->of_node) before get and
> for np before put below, as of_node_get/of_node_put can handle NULL
> pointers themselves.

Right. I agree, I will resend this patch with that small change.
Thank you for having a look at it.

Lukasz

> 
> Thanks,
> Ionela.
>
Daniel Lezcano Dec. 3, 2020, 3:40 p.m. UTC | #5
On 18/11/2020 13:03, Lukasz Luba wrote:
> The Energy Model (EM) framework supports devices such as Devfreq. Create
> new registration functions which automatically register EM for the thermal
> devfreq_cooling devices. This patch prepares the code for coming changes
> which are going to replace old power model with the new EM.
> 
> Signed-off-by: Lukasz Luba <lukasz.luba@arm.com>
> ---
>  drivers/thermal/devfreq_cooling.c | 99 ++++++++++++++++++++++++++++++-
>  include/linux/devfreq_cooling.h   | 22 +++++++
>  2 files changed, 120 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
> index 925523694462..b354271742c5 100644
> --- a/drivers/thermal/devfreq_cooling.c
> +++ b/drivers/thermal/devfreq_cooling.c
> @@ -50,6 +50,8 @@ static DEFINE_IDA(devfreq_ida);
>   * @capped_state:	index to cooling state with in dynamic power budget
>   * @req_max_freq:	PM QoS request for limiting the maximum frequency
>   *			of the devfreq device.
> + * @em:		Energy Model for the associated Devfreq device
> + * @em_registered:	Devfreq cooling registered the EM and should free it.
>   */
>  struct devfreq_cooling_device {
>  	int id;
> @@ -63,6 +65,8 @@ struct devfreq_cooling_device {
>  	u32 res_util;
>  	int capped_state;
>  	struct dev_pm_qos_request req_max_freq;
> +	struct em_perf_domain *em;

This pointer is not needed, it is in the struct device.

> +	bool em_registered;

The boolean em_registered is not needed because of the test in the
function em_dev_unregister_perf_domain():

if (IS_ERR_OR_NULL(dev) || !dev->em_pd)
                return;

Logically if the 'em' was not initialized, it must be NULL, the
corresponding struct device was zero-allocated.


>  };
>  
>  static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev,
> @@ -583,22 +587,115 @@ struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df)
>  }
>  EXPORT_SYMBOL_GPL(devfreq_cooling_register);
>  
> +/**
> + * devfreq_cooling_em_register_power() - Register devfreq cooling device with
> + *		power information and attempt to register Energy Model (EM)
> + * @df:		Pointer to devfreq device.
> + * @dfc_power:	Pointer to devfreq_cooling_power.
> + * @em_cb:	Callback functions providing the data of the EM
> + *
> + * Register a devfreq cooling device and attempt to register Energy Model. The
> + * available OPPs must be registered for the device.
> + *
> + * If @dfc_power is provided, the cooling device is registered with the
> + * power extensions. If @em_cb is provided it will be called for each OPP to
> + * calculate power value and cost. If @em_cb is not provided then simple Energy
> + * Model is going to be used, which requires "dynamic-power-coefficient" a
> + * devicetree property.
> + */
> +struct thermal_cooling_device *
> +devfreq_cooling_em_register_power(struct devfreq *df,
> +				  struct devfreq_cooling_power *dfc_power,
> +				  struct em_data_callback *em_cb)
> +{
> +	struct thermal_cooling_device *cdev;
> +	struct devfreq_cooling_device *dfc;
> +	struct device_node *np = NULL;
> +	struct device *dev;
> +	int nr_opp, ret;
> +
> +	if (IS_ERR_OR_NULL(df))
> +		return ERR_PTR(-EINVAL);
> +
> +	dev = df->dev.parent;

Why the parent ?

> +
> +	if (em_cb) {
> +		nr_opp = dev_pm_opp_get_opp_count(dev);
> +		if (nr_opp <= 0) {
> +			dev_err(dev, "No valid OPPs found\n");
> +			return ERR_PTR(-EINVAL);
> +		}
> +
> +		ret = em_dev_register_perf_domain(dev, nr_opp, em_cb, NULL, false);
> +	} else {
> +		ret = dev_pm_opp_of_register_em(dev, NULL);
> +	}
> +
> +	if (ret)
> +		dev_warn(dev, "Unable to register EM for devfreq cooling device (%d)\n",
> +			 ret);
> +
> +	if (dev->of_node)
> +		np = of_node_get(dev->of_node);
> +
> +	cdev = of_devfreq_cooling_register_power(np, df, dfc_power);
> +
> +	if (np)
> +		of_node_put(np);> +
> +	if (IS_ERR_OR_NULL(cdev)) {
> +		if (!ret)
> +			em_dev_unregister_perf_domain(dev);
> +	} else {
> +		dfc = cdev->devdata;
> +		dfc->em_registered = !ret;
> +	}
> +
> +	return cdev;
> +}
> +EXPORT_SYMBOL_GPL(devfreq_cooling_em_register_power);
> +
> +/**
> + * devfreq_cooling_em_register() - Register devfreq cooling device together
> + *				with Energy Model.
> + * @df:		Pointer to devfreq device.
> + * @em_cb:	Callback functions providing the data of the Energy Model
> + *
> + * This function attempts to register Energy Model for devfreq device and then
> + * register the devfreq cooling device.
> + */
> +struct thermal_cooling_device *
> +devfreq_cooling_em_register(struct devfreq *df, struct em_data_callback *em_cb)
> +{
> +	return devfreq_cooling_em_register_power(df, NULL, em_cb);
> +}
> +EXPORT_SYMBOL_GPL(devfreq_cooling_em_register);
> +
>  /**
>   * devfreq_cooling_unregister() - Unregister devfreq cooling device.
>   * @cdev: Pointer to devfreq cooling device to unregister.
> + *
> + * Unregisters devfreq cooling device and related Energy Model if it was
> + * present.
>   */
>  void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
>  {
>  	struct devfreq_cooling_device *dfc;
> +	struct device *dev;
>  
> -	if (!cdev)
> +	if (IS_ERR_OR_NULL(cdev))

Why this additional IS_ERR check ?

>  		return;
>  
>  	dfc = cdev->devdata;
> +	dev = dfc->devfreq->dev.parent;
>  
>  	thermal_cooling_device_unregister(dfc->cdev);
>  	ida_simple_remove(&devfreq_ida, dfc->id);
>  	dev_pm_qos_remove_request(&dfc->req_max_freq);
> +
> +	if (dfc->em_registered)
> +		em_dev_unregister_perf_domain(dev);
> +

As stated before it can be called unconditionally

>  	kfree(dfc->power_table);
>  	kfree(dfc->freq_table);
>  
> diff --git a/include/linux/devfreq_cooling.h b/include/linux/devfreq_cooling.h
> index 9df2dfca68dd..19868fb922f1 100644
> --- a/include/linux/devfreq_cooling.h
> +++ b/include/linux/devfreq_cooling.h
> @@ -11,6 +11,7 @@
>  #define __DEVFREQ_COOLING_H__
>  
>  #include <linux/devfreq.h>
> +#include <linux/energy_model.h>
>  #include <linux/thermal.h>
>  
>  
> @@ -65,6 +66,13 @@ struct thermal_cooling_device *
>  of_devfreq_cooling_register(struct device_node *np, struct devfreq *df);
>  struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df);
>  void devfreq_cooling_unregister(struct thermal_cooling_device *dfc);
> +struct thermal_cooling_device *
> +devfreq_cooling_em_register_power(struct devfreq *df,
> +				  struct devfreq_cooling_power *dfc_power,
> +				  struct em_data_callback *em_cb);
> +struct thermal_cooling_device *
> +devfreq_cooling_em_register(struct devfreq *df,
> +			    struct em_data_callback *em_cb);
>  
>  #else /* !CONFIG_DEVFREQ_THERMAL */
>  
> @@ -87,6 +95,20 @@ devfreq_cooling_register(struct devfreq *df)
>  	return ERR_PTR(-EINVAL);
>  }
>  
> +static inline struct thermal_cooling_device *
> +devfreq_cooling_em_register_power(struct devfreq *df,
> +				  struct devfreq_cooling_power *dfc_power,
> +				  struct em_data_callback *em_cb)
> +{
> +	return ERR_PTR(-EINVAL);
> +}
> +
> +static inline struct thermal_cooling_device *
> +devfreq_cooling_em_register(struct devfreq *df,	struct em_data_callback *em_cb)
> +{
> +	return ERR_PTR(-EINVAL);
> +}
> +
>  static inline void
>  devfreq_cooling_unregister(struct thermal_cooling_device *dfc)
>  {
>
Lukasz Luba Dec. 7, 2020, 9:46 a.m. UTC | #6
On 12/3/20 3:40 PM, Daniel Lezcano wrote:
> On 18/11/2020 13:03, Lukasz Luba wrote:
>> The Energy Model (EM) framework supports devices such as Devfreq. Create
>> new registration functions which automatically register EM for the thermal
>> devfreq_cooling devices. This patch prepares the code for coming changes
>> which are going to replace old power model with the new EM.
>>
>> Signed-off-by: Lukasz Luba <lukasz.luba@arm.com>
>> ---
>>   drivers/thermal/devfreq_cooling.c | 99 ++++++++++++++++++++++++++++++-
>>   include/linux/devfreq_cooling.h   | 22 +++++++
>>   2 files changed, 120 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
>> index 925523694462..b354271742c5 100644
>> --- a/drivers/thermal/devfreq_cooling.c
>> +++ b/drivers/thermal/devfreq_cooling.c
>> @@ -50,6 +50,8 @@ static DEFINE_IDA(devfreq_ida);
>>    * @capped_state:	index to cooling state with in dynamic power budget
>>    * @req_max_freq:	PM QoS request for limiting the maximum frequency
>>    *			of the devfreq device.
>> + * @em:		Energy Model for the associated Devfreq device
>> + * @em_registered:	Devfreq cooling registered the EM and should free it.
>>    */
>>   struct devfreq_cooling_device {
>>   	int id;
>> @@ -63,6 +65,8 @@ struct devfreq_cooling_device {
>>   	u32 res_util;
>>   	int capped_state;
>>   	struct dev_pm_qos_request req_max_freq;
>> +	struct em_perf_domain *em;
> 
> This pointer is not needed, it is in the struct device.

It is just a helper pointer, to make the code simpler and avoid nested
pointers:

struct device *dev = dfc->devfreq->dev.parent
and then using dev->em

The code is cleaner with dfc->em, but let me have a look if I can
remove it and still have a clean code.

> 
>> +	bool em_registered;
> 
> The boolean em_registered is not needed because of the test in the
> function em_dev_unregister_perf_domain():
> 
> if (IS_ERR_OR_NULL(dev) || !dev->em_pd)
>                  return;
> 
> Logically if the 'em' was not initialized, it must be NULL, the
> corresponding struct device was zero-allocated.

It was needed for devfreq cooling to know who registered the EM.
If there is 2 frameworks and driver and all could register EM,
this code cannot blindly unregister EM in it's code. The EM might
be used still by PowerCap DTM, so the unregister might be called
explicitly by the driver.

But I will rewrite the register function and make it way simpler,
just registration of EM (stopping when it failed) and then cooling
device. Also unregister will be simpler.

Driver will have to keep the order of unregister functions for two
frameworks and call unregister devfreq cooling device as last one,
because it will remove the EM.

> 
> 
>>   };
>>   
>>   static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev,
>> @@ -583,22 +587,115 @@ struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df)
>>   }
>>   EXPORT_SYMBOL_GPL(devfreq_cooling_register);
>>   
>> +/**
>> + * devfreq_cooling_em_register_power() - Register devfreq cooling device with
>> + *		power information and attempt to register Energy Model (EM)
>> + * @df:		Pointer to devfreq device.
>> + * @dfc_power:	Pointer to devfreq_cooling_power.
>> + * @em_cb:	Callback functions providing the data of the EM
>> + *
>> + * Register a devfreq cooling device and attempt to register Energy Model. The
>> + * available OPPs must be registered for the device.
>> + *
>> + * If @dfc_power is provided, the cooling device is registered with the
>> + * power extensions. If @em_cb is provided it will be called for each OPP to
>> + * calculate power value and cost. If @em_cb is not provided then simple Energy
>> + * Model is going to be used, which requires "dynamic-power-coefficient" a
>> + * devicetree property.
>> + */
>> +struct thermal_cooling_device *
>> +devfreq_cooling_em_register_power(struct devfreq *df,
>> +				  struct devfreq_cooling_power *dfc_power,
>> +				  struct em_data_callback *em_cb)
>> +{
>> +	struct thermal_cooling_device *cdev;
>> +	struct devfreq_cooling_device *dfc;
>> +	struct device_node *np = NULL;
>> +	struct device *dev;
>> +	int nr_opp, ret;
>> +
>> +	if (IS_ERR_OR_NULL(df))
>> +		return ERR_PTR(-EINVAL);
>> +
>> +	dev = df->dev.parent;
> 
> Why the parent ?

The parent has OPPs and we are calling em_perf_domain_register() or
dev_pm_opp_of_register_em() (which in addition needs DT node) for that
device.

The old devfreq cooling code also had dev parent, to enable/disenable
OPPs.

> 
>> +
>> +	if (em_cb) {
>> +		nr_opp = dev_pm_opp_get_opp_count(dev);
>> +		if (nr_opp <= 0) {
>> +			dev_err(dev, "No valid OPPs found\n");
>> +			return ERR_PTR(-EINVAL);
>> +		}
>> +
>> +		ret = em_dev_register_perf_domain(dev, nr_opp, em_cb, NULL, false);
>> +	} else {
>> +		ret = dev_pm_opp_of_register_em(dev, NULL);
>> +	}
>> +
>> +	if (ret)
>> +		dev_warn(dev, "Unable to register EM for devfreq cooling device (%d)\n",
>> +			 ret);
>> +
>> +	if (dev->of_node)
>> +		np = of_node_get(dev->of_node);
>> +
>> +	cdev = of_devfreq_cooling_register_power(np, df, dfc_power);
>> +
>> +	if (np)
>> +		of_node_put(np);> +
>> +	if (IS_ERR_OR_NULL(cdev)) {
>> +		if (!ret)
>> +			em_dev_unregister_perf_domain(dev);
>> +	} else {
>> +		dfc = cdev->devdata;
>> +		dfc->em_registered = !ret;
>> +	}
>> +
>> +	return cdev;
>> +}
>> +EXPORT_SYMBOL_GPL(devfreq_cooling_em_register_power);
>> +
>> +/**
>> + * devfreq_cooling_em_register() - Register devfreq cooling device together
>> + *				with Energy Model.
>> + * @df:		Pointer to devfreq device.
>> + * @em_cb:	Callback functions providing the data of the Energy Model
>> + *
>> + * This function attempts to register Energy Model for devfreq device and then
>> + * register the devfreq cooling device.
>> + */
>> +struct thermal_cooling_device *
>> +devfreq_cooling_em_register(struct devfreq *df, struct em_data_callback *em_cb)
>> +{
>> +	return devfreq_cooling_em_register_power(df, NULL, em_cb);
>> +}
>> +EXPORT_SYMBOL_GPL(devfreq_cooling_em_register);
>> +
>>   /**
>>    * devfreq_cooling_unregister() - Unregister devfreq cooling device.
>>    * @cdev: Pointer to devfreq cooling device to unregister.
>> + *
>> + * Unregisters devfreq cooling device and related Energy Model if it was
>> + * present.
>>    */
>>   void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
>>   {
>>   	struct devfreq_cooling_device *dfc;
>> +	struct device *dev;
>>   
>> -	if (!cdev)
>> +	if (IS_ERR_OR_NULL(cdev))
> 
> Why this additional IS_ERR check ?

Not needed too much, but helps if driver doesn't check the
result of registration function and then just calls unregister
function, i.e.

	if (pfdev->devfreq.cooling)
		devfreq_cooling_unregister(pfdev->devfreq.cooling);

> 
>>   		return;
>>   
>>   	dfc = cdev->devdata;
>> +	dev = dfc->devfreq->dev.parent;
>>   
>>   	thermal_cooling_device_unregister(dfc->cdev);
>>   	ida_simple_remove(&devfreq_ida, dfc->id);
>>   	dev_pm_qos_remove_request(&dfc->req_max_freq);
>> +
>> +	if (dfc->em_registered)
>> +		em_dev_unregister_perf_domain(dev);
>> +
> 
> As stated before it can be called unconditionally

OK, I will rewrite it. The goal was to be able handle many situations
in register/unregister function, but I will make them simpler.

Thank you Daniel for review comments. I will address them in next
version.

Regards,
Lukasz
diff mbox series

Patch

diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_cooling.c
index 925523694462..b354271742c5 100644
--- a/drivers/thermal/devfreq_cooling.c
+++ b/drivers/thermal/devfreq_cooling.c
@@ -50,6 +50,8 @@  static DEFINE_IDA(devfreq_ida);
  * @capped_state:	index to cooling state with in dynamic power budget
  * @req_max_freq:	PM QoS request for limiting the maximum frequency
  *			of the devfreq device.
+ * @em:		Energy Model for the associated Devfreq device
+ * @em_registered:	Devfreq cooling registered the EM and should free it.
  */
 struct devfreq_cooling_device {
 	int id;
@@ -63,6 +65,8 @@  struct devfreq_cooling_device {
 	u32 res_util;
 	int capped_state;
 	struct dev_pm_qos_request req_max_freq;
+	struct em_perf_domain *em;
+	bool em_registered;
 };
 
 static int devfreq_cooling_get_max_state(struct thermal_cooling_device *cdev,
@@ -583,22 +587,115 @@  struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df)
 }
 EXPORT_SYMBOL_GPL(devfreq_cooling_register);
 
+/**
+ * devfreq_cooling_em_register_power() - Register devfreq cooling device with
+ *		power information and attempt to register Energy Model (EM)
+ * @df:		Pointer to devfreq device.
+ * @dfc_power:	Pointer to devfreq_cooling_power.
+ * @em_cb:	Callback functions providing the data of the EM
+ *
+ * Register a devfreq cooling device and attempt to register Energy Model. The
+ * available OPPs must be registered for the device.
+ *
+ * If @dfc_power is provided, the cooling device is registered with the
+ * power extensions. If @em_cb is provided it will be called for each OPP to
+ * calculate power value and cost. If @em_cb is not provided then simple Energy
+ * Model is going to be used, which requires "dynamic-power-coefficient" a
+ * devicetree property.
+ */
+struct thermal_cooling_device *
+devfreq_cooling_em_register_power(struct devfreq *df,
+				  struct devfreq_cooling_power *dfc_power,
+				  struct em_data_callback *em_cb)
+{
+	struct thermal_cooling_device *cdev;
+	struct devfreq_cooling_device *dfc;
+	struct device_node *np = NULL;
+	struct device *dev;
+	int nr_opp, ret;
+
+	if (IS_ERR_OR_NULL(df))
+		return ERR_PTR(-EINVAL);
+
+	dev = df->dev.parent;
+
+	if (em_cb) {
+		nr_opp = dev_pm_opp_get_opp_count(dev);
+		if (nr_opp <= 0) {
+			dev_err(dev, "No valid OPPs found\n");
+			return ERR_PTR(-EINVAL);
+		}
+
+		ret = em_dev_register_perf_domain(dev, nr_opp, em_cb, NULL, false);
+	} else {
+		ret = dev_pm_opp_of_register_em(dev, NULL);
+	}
+
+	if (ret)
+		dev_warn(dev, "Unable to register EM for devfreq cooling device (%d)\n",
+			 ret);
+
+	if (dev->of_node)
+		np = of_node_get(dev->of_node);
+
+	cdev = of_devfreq_cooling_register_power(np, df, dfc_power);
+
+	if (np)
+		of_node_put(np);
+
+	if (IS_ERR_OR_NULL(cdev)) {
+		if (!ret)
+			em_dev_unregister_perf_domain(dev);
+	} else {
+		dfc = cdev->devdata;
+		dfc->em_registered = !ret;
+	}
+
+	return cdev;
+}
+EXPORT_SYMBOL_GPL(devfreq_cooling_em_register_power);
+
+/**
+ * devfreq_cooling_em_register() - Register devfreq cooling device together
+ *				with Energy Model.
+ * @df:		Pointer to devfreq device.
+ * @em_cb:	Callback functions providing the data of the Energy Model
+ *
+ * This function attempts to register Energy Model for devfreq device and then
+ * register the devfreq cooling device.
+ */
+struct thermal_cooling_device *
+devfreq_cooling_em_register(struct devfreq *df, struct em_data_callback *em_cb)
+{
+	return devfreq_cooling_em_register_power(df, NULL, em_cb);
+}
+EXPORT_SYMBOL_GPL(devfreq_cooling_em_register);
+
 /**
  * devfreq_cooling_unregister() - Unregister devfreq cooling device.
  * @cdev: Pointer to devfreq cooling device to unregister.
+ *
+ * Unregisters devfreq cooling device and related Energy Model if it was
+ * present.
  */
 void devfreq_cooling_unregister(struct thermal_cooling_device *cdev)
 {
 	struct devfreq_cooling_device *dfc;
+	struct device *dev;
 
-	if (!cdev)
+	if (IS_ERR_OR_NULL(cdev))
 		return;
 
 	dfc = cdev->devdata;
+	dev = dfc->devfreq->dev.parent;
 
 	thermal_cooling_device_unregister(dfc->cdev);
 	ida_simple_remove(&devfreq_ida, dfc->id);
 	dev_pm_qos_remove_request(&dfc->req_max_freq);
+
+	if (dfc->em_registered)
+		em_dev_unregister_perf_domain(dev);
+
 	kfree(dfc->power_table);
 	kfree(dfc->freq_table);
 
diff --git a/include/linux/devfreq_cooling.h b/include/linux/devfreq_cooling.h
index 9df2dfca68dd..19868fb922f1 100644
--- a/include/linux/devfreq_cooling.h
+++ b/include/linux/devfreq_cooling.h
@@ -11,6 +11,7 @@ 
 #define __DEVFREQ_COOLING_H__
 
 #include <linux/devfreq.h>
+#include <linux/energy_model.h>
 #include <linux/thermal.h>
 
 
@@ -65,6 +66,13 @@  struct thermal_cooling_device *
 of_devfreq_cooling_register(struct device_node *np, struct devfreq *df);
 struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df);
 void devfreq_cooling_unregister(struct thermal_cooling_device *dfc);
+struct thermal_cooling_device *
+devfreq_cooling_em_register_power(struct devfreq *df,
+				  struct devfreq_cooling_power *dfc_power,
+				  struct em_data_callback *em_cb);
+struct thermal_cooling_device *
+devfreq_cooling_em_register(struct devfreq *df,
+			    struct em_data_callback *em_cb);
 
 #else /* !CONFIG_DEVFREQ_THERMAL */
 
@@ -87,6 +95,20 @@  devfreq_cooling_register(struct devfreq *df)
 	return ERR_PTR(-EINVAL);
 }
 
+static inline struct thermal_cooling_device *
+devfreq_cooling_em_register_power(struct devfreq *df,
+				  struct devfreq_cooling_power *dfc_power,
+				  struct em_data_callback *em_cb)
+{
+	return ERR_PTR(-EINVAL);
+}
+
+static inline struct thermal_cooling_device *
+devfreq_cooling_em_register(struct devfreq *df,	struct em_data_callback *em_cb)
+{
+	return ERR_PTR(-EINVAL);
+}
+
 static inline void
 devfreq_cooling_unregister(struct thermal_cooling_device *dfc)
 {