diff mbox series

[v5,4/5] cpufreq: mediatek: add opp notification for SVS support

Message ID 1574769046-28449-5-git-send-email-andrew-sh.cheng@mediatek.com (mailing list archive)
State New, archived
Headers show
Series Add cpufreq and cci devfreq for mt8183, and SVS support | expand

Commit Message

andrew-sh.cheng Nov. 26, 2019, 11:50 a.m. UTC
From: "Andrew-sh.Cheng" <andrew-sh.cheng@mediatek.com>

cpufreq should listen opp notification and do proper actions
when receiving disable and voltage adjustment events,
which are triggered when SVS is enabled.

Signed-off-by: Andrew-sh.Cheng <andrew-sh.cheng@mediatek.com>
---
 drivers/cpufreq/mediatek-cpufreq.c | 79 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 79 insertions(+)

Comments

Viresh Kumar Nov. 27, 2019, 8:36 a.m. UTC | #1
On 26-11-19, 19:50, Andrew-sh.Cheng wrote:
> diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c
> index 4b0cc50dd93b..7c37ab31230a 100644
> --- a/drivers/cpufreq/mediatek-cpufreq.c
> +++ b/drivers/cpufreq/mediatek-cpufreq.c
> @@ -42,6 +42,10 @@ struct mtk_cpu_dvfs_info {
>  	struct list_head list_head;
>  	int intermediate_voltage;
>  	bool need_voltage_tracking;
> +	struct mutex lock; /* avoid notify and policy race condition */

Will a read-write lock be better suited here for performance reasons ?

> +	struct notifier_block opp_nb;
> +	int opp_cpu;
> +	unsigned long opp_freq;
>  };
>  
>  static LIST_HEAD(dvfs_info_list);
> @@ -231,6 +235,7 @@ static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
>  	vproc = dev_pm_opp_get_voltage(opp);
>  	dev_pm_opp_put(opp);
>  
> +	mutex_lock(&info->lock);
>  	/*
>  	 * If the new voltage or the intermediate voltage is higher than the
>  	 * current voltage, scale up voltage first.
> @@ -242,6 +247,7 @@ static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
>  			pr_err("cpu%d: failed to scale up voltage!\n",
>  			       policy->cpu);
>  			mtk_cpufreq_set_voltage(info, old_vproc);
> +			mutex_unlock(&info->lock);
>  			return ret;
>  		}
>  	}
> @@ -253,6 +259,7 @@ static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
>  		       policy->cpu);
>  		mtk_cpufreq_set_voltage(info, old_vproc);
>  		WARN_ON(1);
> +		mutex_unlock(&info->lock);
>  		return ret;
>  	}
>  
> @@ -263,6 +270,7 @@ static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
>  		       policy->cpu);
>  		clk_set_parent(cpu_clk, armpll);
>  		mtk_cpufreq_set_voltage(info, old_vproc);
> +		mutex_unlock(&info->lock);
>  		return ret;
>  	}
>  
> @@ -273,6 +281,7 @@ static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
>  		       policy->cpu);
>  		mtk_cpufreq_set_voltage(info, inter_vproc);
>  		WARN_ON(1);
> +		mutex_unlock(&info->lock);
>  		return ret;
>  	}
>  
> @@ -288,15 +297,75 @@ static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
>  			clk_set_parent(cpu_clk, info->inter_clk);
>  			clk_set_rate(armpll, old_freq_hz);
>  			clk_set_parent(cpu_clk, armpll);
> +			mutex_unlock(&info->lock);
>  			return ret;
>  		}
>  	}
>  
> +	info->opp_freq = freq_hz;
> +	mutex_unlock(&info->lock);
> +
>  	return 0;
>  }
>  
>  #define DYNAMIC_POWER "dynamic-power-coefficient"
>  
> +static int mtk_cpufreq_opp_notifier(struct notifier_block *nb,
> +				    unsigned long event, void *data)
> +{
> +	struct dev_pm_opp *opp = data;
> +	struct dev_pm_opp *opp_item;
> +	struct mtk_cpu_dvfs_info *info =
> +		container_of(nb, struct mtk_cpu_dvfs_info, opp_nb);

Do the assignment after all definitions, instead of awkwardly breaking
the line here.

> +	unsigned long freq, volt;
> +	struct cpufreq_policy *policy;
> +	int ret = 0;
> +
> +	if (event == OPP_EVENT_ADJUST_VOLTAGE) {
> +		freq = dev_pm_opp_get_freq(opp);
> +
> +		mutex_lock(&info->lock);
> +		if (info->opp_freq == freq) {
> +			volt = dev_pm_opp_get_voltage(opp);
> +			ret = mtk_cpufreq_set_voltage(info, volt);
> +			if (ret)
> +				dev_err(info->cpu_dev, "failed to scale voltage: %d\n",
> +					ret);
> +		}
> +		mutex_unlock(&info->lock);
> +	} else if (event == OPP_EVENT_DISABLE) {
> +		freq = info->opp_freq;
> +		opp_item = dev_pm_opp_find_freq_ceil(info->cpu_dev, &freq);

name it new_opp instead of opp_item.

> +		if (!IS_ERR(opp_item))
> +			dev_pm_opp_put(opp_item);
> +		else
> +			freq = 0;
> +

What is the purpose of the above code ?

> +		/* case of current opp is disabled */
> +		if (freq == 0 || freq != info->opp_freq) {
> +			// find an enable opp item

Use proper commenting style please.

> +			freq = 1;
> +			opp_item = dev_pm_opp_find_freq_ceil(info->cpu_dev,
> +							     &freq);
> +			if (!IS_ERR(opp_item)) {
> +				dev_pm_opp_put(opp_item);
> +				policy = cpufreq_cpu_get(info->opp_cpu);
> +				if (policy) {
> +					cpufreq_driver_target(policy,
> +						freq / 1000,
> +						CPUFREQ_RELATION_L);

Why don't you simply call this instead of all the code in the else
block ?

> +					cpufreq_cpu_put(policy);
> +				}
> +			} else {
> +				pr_err("%s: all opp items are disabled\n",
> +				       __func__);
> +			}
> +		}
> +	}
> +
> +	return notifier_from_errno(ret);
> +}
> +
>  static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu)
>  {
>  	struct device *cpu_dev;
> @@ -383,11 +452,21 @@ static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu)
>  	info->intermediate_voltage = dev_pm_opp_get_voltage(opp);
>  	dev_pm_opp_put(opp);
>  
> +	info->opp_cpu = cpu;
> +	info->opp_nb.notifier_call = mtk_cpufreq_opp_notifier;
> +	ret = dev_pm_opp_register_notifier(cpu_dev, &info->opp_nb);
> +	if (ret) {
> +		pr_warn("cannot register opp notification\n");
> +		goto out_free_opp_table;
> +	}
> +
> +	mutex_init(&info->lock);
>  	info->cpu_dev = cpu_dev;
>  	info->proc_reg = proc_reg;
>  	info->sram_reg = IS_ERR(sram_reg) ? NULL : sram_reg;
>  	info->cpu_clk = cpu_clk;
>  	info->inter_clk = inter_clk;
> +	info->opp_freq = clk_get_rate(cpu_clk);
>  
>  	/*
>  	 * If SRAM regulator is present, software "voltage tracking" is needed
> -- 
> 2.12.5
andrew-sh.cheng Dec. 9, 2019, 6:56 a.m. UTC | #2
On Wed, 2019-11-27 at 14:06 +0530, Viresh Kumar wrote:
> On 26-11-19, 19:50, Andrew-sh.Cheng wrote:
> > diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c
> > index 4b0cc50dd93b..7c37ab31230a 100644
> > --- a/drivers/cpufreq/mediatek-cpufreq.c
> > +++ b/drivers/cpufreq/mediatek-cpufreq.c
> > @@ -42,6 +42,10 @@ struct mtk_cpu_dvfs_info {
> >  	struct list_head list_head;
> >  	int intermediate_voltage;
> >  	bool need_voltage_tracking;
> > +	struct mutex lock; /* avoid notify and policy race condition */
> 
> Will a read-write lock be better suited here for performance reasons ?
Thank you for advice, I will check it.
> 
> > +	struct notifier_block opp_nb;
> > +	int opp_cpu;
> > +	unsigned long opp_freq;
> >  };
> >  
> >  static LIST_HEAD(dvfs_info_list);
> > @@ -231,6 +235,7 @@ static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
> >  	vproc = dev_pm_opp_get_voltage(opp);
> >  	dev_pm_opp_put(opp);
> >  
> > +	mutex_lock(&info->lock);
> >  	/*
> >  	 * If the new voltage or the intermediate voltage is higher than the
> >  	 * current voltage, scale up voltage first.
> > @@ -242,6 +247,7 @@ static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
> >  			pr_err("cpu%d: failed to scale up voltage!\n",
> >  			       policy->cpu);
> >  			mtk_cpufreq_set_voltage(info, old_vproc);
> > +			mutex_unlock(&info->lock);
> >  			return ret;
> >  		}
> >  	}
> > @@ -253,6 +259,7 @@ static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
> >  		       policy->cpu);
> >  		mtk_cpufreq_set_voltage(info, old_vproc);
> >  		WARN_ON(1);
> > +		mutex_unlock(&info->lock);
> >  		return ret;
> >  	}
> >  
> > @@ -263,6 +270,7 @@ static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
> >  		       policy->cpu);
> >  		clk_set_parent(cpu_clk, armpll);
> >  		mtk_cpufreq_set_voltage(info, old_vproc);
> > +		mutex_unlock(&info->lock);
> >  		return ret;
> >  	}
> >  
> > @@ -273,6 +281,7 @@ static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
> >  		       policy->cpu);
> >  		mtk_cpufreq_set_voltage(info, inter_vproc);
> >  		WARN_ON(1);
> > +		mutex_unlock(&info->lock);
> >  		return ret;
> >  	}
> >  
> > @@ -288,15 +297,75 @@ static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
> >  			clk_set_parent(cpu_clk, info->inter_clk);
> >  			clk_set_rate(armpll, old_freq_hz);
> >  			clk_set_parent(cpu_clk, armpll);
> > +			mutex_unlock(&info->lock);
> >  			return ret;
> >  		}
> >  	}
> >  
> > +	info->opp_freq = freq_hz;
> > +	mutex_unlock(&info->lock);
> > +
> >  	return 0;
> >  }
> >  
> >  #define DYNAMIC_POWER "dynamic-power-coefficient"
> >  
> > +static int mtk_cpufreq_opp_notifier(struct notifier_block *nb,
> > +				    unsigned long event, void *data)
> > +{
> > +	struct dev_pm_opp *opp = data;
> > +	struct dev_pm_opp *opp_item;
> > +	struct mtk_cpu_dvfs_info *info =
> > +		container_of(nb, struct mtk_cpu_dvfs_info, opp_nb);
> 
> Do the assignment after all definitions, instead of awkwardly breaking
> the line here.
Got it, will change it.
> 
> > +	unsigned long freq, volt;
> > +	struct cpufreq_policy *policy;
> > +	int ret = 0;
> > +
> > +	if (event == OPP_EVENT_ADJUST_VOLTAGE) {
> > +		freq = dev_pm_opp_get_freq(opp);
> > +
> > +		mutex_lock(&info->lock);
> > +		if (info->opp_freq == freq) {
> > +			volt = dev_pm_opp_get_voltage(opp);
> > +			ret = mtk_cpufreq_set_voltage(info, volt);
> > +			if (ret)
> > +				dev_err(info->cpu_dev, "failed to scale voltage: %d\n",
> > +					ret);
> > +		}
> > +		mutex_unlock(&info->lock);
> > +	} else if (event == OPP_EVENT_DISABLE) {
> > +		freq = info->opp_freq;
> > +		opp_item = dev_pm_opp_find_freq_ceil(info->cpu_dev, &freq);
> 
> name it new_opp instead of opp_item.
Got it, will change it.
> 
> > +		if (!IS_ERR(opp_item))
> > +			dev_pm_opp_put(opp_item);
> > +		else
> > +			freq = 0;
> > +
> 
> What is the purpose of the above code ?
When dev_pm_opp_find_freq_ceil() doesn't find matching opp item, freq
value won't be set.
Set it as 0 for below checking
> 
> > +		/* case of current opp is disabled */
> > +		if (freq == 0 || freq != info->opp_freq) {
> > +			// find an enable opp item
> 
> Use proper commenting style please.
Got it, will change it.
> 
> > +			freq = 1;
> > +			opp_item = dev_pm_opp_find_freq_ceil(info->cpu_dev,
> > +							     &freq);
> > +			if (!IS_ERR(opp_item)) {
> > +				dev_pm_opp_put(opp_item);
> > +				policy = cpufreq_cpu_get(info->opp_cpu);
> > +				if (policy) {
> > +					cpufreq_driver_target(policy,
> > +						freq / 1000,
> > +						CPUFREQ_RELATION_L);
> 
> Why don't you simply call this instead of all the code in the else
> block ?
These else code is used to check "current opp item is disabled or not".
If not, do nothing.
If current opp item is disabled, need to find an not-disabled opp item,
and set frequency to it.
> 
> > +					cpufreq_cpu_put(policy);
> > +				}
> > +			} else {
> > +				pr_err("%s: all opp items are disabled\n",
> > +				       __func__);
> > +			}
> > +		}
> > +	}
> > +
> > +	return notifier_from_errno(ret);
> > +}
> > +
> >  static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu)
> >  {
> >  	struct device *cpu_dev;
> > @@ -383,11 +452,21 @@ static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu)
> >  	info->intermediate_voltage = dev_pm_opp_get_voltage(opp);
> >  	dev_pm_opp_put(opp);
> >  
> > +	info->opp_cpu = cpu;
> > +	info->opp_nb.notifier_call = mtk_cpufreq_opp_notifier;
> > +	ret = dev_pm_opp_register_notifier(cpu_dev, &info->opp_nb);
> > +	if (ret) {
> > +		pr_warn("cannot register opp notification\n");
> > +		goto out_free_opp_table;
> > +	}
> > +
> > +	mutex_init(&info->lock);
> >  	info->cpu_dev = cpu_dev;
> >  	info->proc_reg = proc_reg;
> >  	info->sram_reg = IS_ERR(sram_reg) ? NULL : sram_reg;
> >  	info->cpu_clk = cpu_clk;
> >  	info->inter_clk = inter_clk;
> > +	info->opp_freq = clk_get_rate(cpu_clk);
> >  
> >  	/*
> >  	 * If SRAM regulator is present, software "voltage tracking" is needed
> > -- 
> > 2.12.5
>
Viresh Kumar Dec. 10, 2019, 6:43 a.m. UTC | #3
On 09-12-19, 14:56, andrew-sh.cheng wrote:
> On Wed, 2019-11-27 at 14:06 +0530, Viresh Kumar wrote:
> > On 26-11-19, 19:50, Andrew-sh.Cheng wrote:
> > > +		if (!IS_ERR(opp_item))
> > > +			dev_pm_opp_put(opp_item);
> > > +		else
> > > +			freq = 0;
> > > +
> > 
> > What is the purpose of the above code ?
> When dev_pm_opp_find_freq_ceil() doesn't find matching opp item, freq
> value won't be set.
> Set it as 0 for below checking
> > 
> > > +		/* case of current opp is disabled */
> > > +		if (freq == 0 || freq != info->opp_freq) {
> > > +			// find an enable opp item
> > > +			freq = 1;
> > > +			opp_item = dev_pm_opp_find_freq_ceil(info->cpu_dev,
> > > +							     &freq);
> > > +			if (!IS_ERR(opp_item)) {
> > > +				dev_pm_opp_put(opp_item);
> > > +				policy = cpufreq_cpu_get(info->opp_cpu);
> > > +				if (policy) {
> > > +					cpufreq_driver_target(policy,
> > > +						freq / 1000,
> > > +						CPUFREQ_RELATION_L);
> > 
> > Why don't you simply call this instead of all the code in the else
> > block ?
> These else code is used to check "current opp item is disabled or not".
> If not, do nothing.
> If current opp item is disabled, need to find an not-disabled opp item,
> and set frequency to it.

Right. So this notifier helper of yours receive the opp which is getting
disabled, why don't you compare its frequency directly to see if the current OPP
is getting disabled ?
andrew-sh.cheng March 10, 2020, 8:11 a.m. UTC | #4
On Tue, 2019-12-10 at 14:43 +0800, Viresh Kumar wrote:
> On 09-12-19, 14:56, andrew-sh.cheng wrote:
> > On Wed, 2019-11-27 at 14:06 +0530, Viresh Kumar wrote:
> > > On 26-11-19, 19:50, Andrew-sh.Cheng wrote:
> > > > +		if (!IS_ERR(opp_item))
> > > > +			dev_pm_opp_put(opp_item);
> > > > +		else
> > > > +			freq = 0;
> > > > +
> > > 
> > > What is the purpose of the above code ?
> > When dev_pm_opp_find_freq_ceil() doesn't find matching opp item, freq
> > value won't be set.
> > Set it as 0 for below checking
> > > 
> > > > +		/* case of current opp is disabled */
> > > > +		if (freq == 0 || freq != info->opp_freq) {
> > > > +			// find an enable opp item
> > > > +			freq = 1;
> > > > +			opp_item = dev_pm_opp_find_freq_ceil(info->cpu_dev,
> > > > +							     &freq);
> > > > +			if (!IS_ERR(opp_item)) {
> > > > +				dev_pm_opp_put(opp_item);
> > > > +				policy = cpufreq_cpu_get(info->opp_cpu);
> > > > +				if (policy) {
> > > > +					cpufreq_driver_target(policy,
> > > > +						freq / 1000,
> > > > +						CPUFREQ_RELATION_L);
> > > 
> > > Why don't you simply call this instead of all the code in the else
> > > block ?
> > These else code is used to check "current opp item is disabled or not".
> > If not, do nothing.
> > If current opp item is disabled, need to find an not-disabled opp item,
> > and set frequency to it.
> 
> Right. So this notifier helper of yours receive the opp which is getting
> disabled, why don't you compare its frequency directly to see if the current OPP
> is getting disabled ?
Sorry to overlook your question.
This is because when the opp is disabled,
we cannot use dev_pm_opp_get_freq() to get frequency of that opp.
There is a check:
	if (IS_ERR_OR_NULL(opp) || !opp->available) {
		pr_err("%s: Invalid parameters\n", __func__);
		return 0;

>
Viresh Kumar March 11, 2020, 6:06 a.m. UTC | #5
On 10-03-20, 16:11, andrew-sh.cheng wrote:
> On Tue, 2019-12-10 at 14:43 +0800, Viresh Kumar wrote:
> > On 09-12-19, 14:56, andrew-sh.cheng wrote:
> > > On Wed, 2019-11-27 at 14:06 +0530, Viresh Kumar wrote:
> > > > On 26-11-19, 19:50, Andrew-sh.Cheng wrote:
> > > > > +		if (!IS_ERR(opp_item))
> > > > > +			dev_pm_opp_put(opp_item);
> > > > > +		else
> > > > > +			freq = 0;
> > > > > +
> > > > 
> > > > What is the purpose of the above code ?
> > > When dev_pm_opp_find_freq_ceil() doesn't find matching opp item, freq
> > > value won't be set.
> > > Set it as 0 for below checking
> > > > 
> > > > > +		/* case of current opp is disabled */
> > > > > +		if (freq == 0 || freq != info->opp_freq) {
> > > > > +			// find an enable opp item
> > > > > +			freq = 1;
> > > > > +			opp_item = dev_pm_opp_find_freq_ceil(info->cpu_dev,
> > > > > +							     &freq);
> > > > > +			if (!IS_ERR(opp_item)) {
> > > > > +				dev_pm_opp_put(opp_item);
> > > > > +				policy = cpufreq_cpu_get(info->opp_cpu);
> > > > > +				if (policy) {
> > > > > +					cpufreq_driver_target(policy,
> > > > > +						freq / 1000,
> > > > > +						CPUFREQ_RELATION_L);
> > > > 
> > > > Why don't you simply call this instead of all the code in the else
> > > > block ?
> > > These else code is used to check "current opp item is disabled or not".
> > > If not, do nothing.
> > > If current opp item is disabled, need to find an not-disabled opp item,
> > > and set frequency to it.
> > 
> > Right. So this notifier helper of yours receive the opp which is getting
> > disabled, why don't you compare its frequency directly to see if the current OPP
> > is getting disabled ?
> Sorry to overlook your question.
> This is because when the opp is disabled,
> we cannot use dev_pm_opp_get_freq() to get frequency of that opp.
> There is a check:
> 	if (IS_ERR_OR_NULL(opp) || !opp->available) {

I think we can remove the available check here, as we are jut trying
to find frequency of an OPP we already have. Send a patch for that
please.

> 		pr_err("%s: Invalid parameters\n", __func__);
> 		return 0;
> 
> > 
>
andrew-sh.cheng March 12, 2020, 9:12 a.m. UTC | #6
On Wed, 2020-03-11 at 11:36 +0530, Viresh Kumar wrote:
> On 10-03-20, 16:11, andrew-sh.cheng wrote:
> > On Tue, 2019-12-10 at 14:43 +0800, Viresh Kumar wrote:
> > > On 09-12-19, 14:56, andrew-sh.cheng wrote:
> > > > On Wed, 2019-11-27 at 14:06 +0530, Viresh Kumar wrote:
> > > > > On 26-11-19, 19:50, Andrew-sh.Cheng wrote:
> > > > > > +		if (!IS_ERR(opp_item))
> > > > > > +			dev_pm_opp_put(opp_item);
> > > > > > +		else
> > > > > > +			freq = 0;
> > > > > > +
> > > > > 
> > > > > What is the purpose of the above code ?
> > > > When dev_pm_opp_find_freq_ceil() doesn't find matching opp item, freq
> > > > value won't be set.
> > > > Set it as 0 for below checking
> > > > > 
> > > > > > +		/* case of current opp is disabled */
> > > > > > +		if (freq == 0 || freq != info->opp_freq) {
> > > > > > +			// find an enable opp item
> > > > > > +			freq = 1;
> > > > > > +			opp_item = dev_pm_opp_find_freq_ceil(info->cpu_dev,
> > > > > > +							     &freq);
> > > > > > +			if (!IS_ERR(opp_item)) {
> > > > > > +				dev_pm_opp_put(opp_item);
> > > > > > +				policy = cpufreq_cpu_get(info->opp_cpu);
> > > > > > +				if (policy) {
> > > > > > +					cpufreq_driver_target(policy,
> > > > > > +						freq / 1000,
> > > > > > +						CPUFREQ_RELATION_L);
> > > > > 
> > > > > Why don't you simply call this instead of all the code in the else
> > > > > block ?
> > > > These else code is used to check "current opp item is disabled or not".
> > > > If not, do nothing.
> > > > If current opp item is disabled, need to find an not-disabled opp item,
> > > > and set frequency to it.
> > > 
> > > Right. So this notifier helper of yours receive the opp which is getting
> > > disabled, why don't you compare its frequency directly to see if the current OPP
> > > is getting disabled ?
> > Sorry to overlook your question.
> > This is because when the opp is disabled,
> > we cannot use dev_pm_opp_get_freq() to get frequency of that opp.
> > There is a check:
> > 	if (IS_ERR_OR_NULL(opp) || !opp->available) {
> 
> I think we can remove the available check here, as we are jut trying
> to find frequency of an OPP we already have. Send a patch for that
> please.
Got it.
I will do it at next patch set.
> 
> > 		pr_err("%s: Invalid parameters\n", __func__);
> > 		return 0;
> > 
> > > 
> > 
>
andrew-sh.cheng March 13, 2020, 7:22 a.m. UTC | #7
On Wed, 2020-03-11 at 11:36 +0530, Viresh Kumar wrote:
> On 10-03-20, 16:11, andrew-sh.cheng wrote:
> > On Tue, 2019-12-10 at 14:43 +0800, Viresh Kumar wrote:
> > > On 09-12-19, 14:56, andrew-sh.cheng wrote:
> > > > On Wed, 2019-11-27 at 14:06 +0530, Viresh Kumar wrote:
> > > > > On 26-11-19, 19:50, Andrew-sh.Cheng wrote:
> > > > > > +		if (!IS_ERR(opp_item))
> > > > > > +			dev_pm_opp_put(opp_item);
> > > > > > +		else
> > > > > > +			freq = 0;
> > > > > > +
> > > > > 
> > > > > What is the purpose of the above code ?
> > > > When dev_pm_opp_find_freq_ceil() doesn't find matching opp item, freq
> > > > value won't be set.
> > > > Set it as 0 for below checking
> > > > > 
> > > > > > +		/* case of current opp is disabled */
> > > > > > +		if (freq == 0 || freq != info->opp_freq) {
> > > > > > +			// find an enable opp item
> > > > > > +			freq = 1;
> > > > > > +			opp_item = dev_pm_opp_find_freq_ceil(info->cpu_dev,
> > > > > > +							     &freq);
> > > > > > +			if (!IS_ERR(opp_item)) {
> > > > > > +				dev_pm_opp_put(opp_item);
> > > > > > +				policy = cpufreq_cpu_get(info->opp_cpu);
> > > > > > +				if (policy) {
> > > > > > +					cpufreq_driver_target(policy,
> > > > > > +						freq / 1000,
> > > > > > +						CPUFREQ_RELATION_L);
> > > > > 
> > > > > Why don't you simply call this instead of all the code in the else
> > > > > block ?
> > > > These else code is used to check "current opp item is disabled or not".
> > > > If not, do nothing.
> > > > If current opp item is disabled, need to find an not-disabled opp item,
> > > > and set frequency to it.
> > > 
> > > Right. So this notifier helper of yours receive the opp which is getting
> > > disabled, why don't you compare its frequency directly to see if the current OPP
> > > is getting disabled ?
> > Sorry to overlook your question.
> > This is because when the opp is disabled,
> > we cannot use dev_pm_opp_get_freq() to get frequency of that opp.
> > There is a check:
> > 	if (IS_ERR_OR_NULL(opp) || !opp->available) {
> 
> I think we can remove the available check here, as we are jut trying
> to find frequency of an OPP we already have. Send a patch for that
> please.
> 

Hi Viresh,
I have something want to consult you.
For your previous comment, you suggest use read-write lock to replace
mutex lock.
Will it be more efficiently even when all are write lock?
(all lock region are "setting VProc voltage")

> > 		pr_err("%s: Invalid parameters\n", __func__);
> > 		return 0;
> > 
> > > 
> > 
>
Viresh Kumar March 13, 2020, 9:10 a.m. UTC | #8
On 13-03-20, 15:22, andrew-sh.cheng wrote:
> I have something want to consult you.
> For your previous comment, you suggest use read-write lock to replace
> mutex lock.
> Will it be more efficiently even when all are write lock?
> (all lock region are "setting VProc voltage")

The data to be protected here isn't the VProc voltage but the list of
valid OPPs. My idea was if we can make the target() routine run a bit
faster as it really matters as it is called from scheduler hot path.

It won't be wrong to use the mutex the way you have used it right now,
but I think the read lock is much faster, though the read/write lock
is more beneficial in case where there are multiple readers and fewer
writers. The target() routine gets called multiple times here, not
in parallel, and the OPP change notifier won't be called so often.
andrew-sh.cheng April 6, 2020, 9:12 a.m. UTC | #9
On Fri, 2020-03-13 at 14:40 +0530, Viresh Kumar wrote:
> On 13-03-20, 15:22, andrew-sh.cheng wrote:
> > I have something want to consult you.
> > For your previous comment, you suggest use read-write lock to replace
> > mutex lock.
> > Will it be more efficiently even when all are write lock?
> > (all lock region are "setting VProc voltage")
> 
> The data to be protected here isn't the VProc voltage but the list of
> valid OPPs. My idea was if we can make the target() routine run a bit
> faster as it really matters as it is called from scheduler hot path.
> 
> It won't be wrong to use the mutex the way you have used it right now,
> but I think the read lock is much faster, though the read/write lock
> is more beneficial in case where there are multiple readers and fewer
> writers. The target() routine gets called multiple times here, not
> in parallel, and the OPP change notifier won't be called so often.
> 
Hi Viresh,

I will use regulator in the locked region.
And regulator will use mutex_lock.

I use read_lock/write_lock, and there will be below run time error,
Is it due to read_lock/write_lock using spin lock?
write_lock() => _raw_write_lock() @ spinlock.c
Please give me some advices.
Thank you.


[   28.109082] BUG: sleeping function called from invalid context at
kernel/locking/mutex.c:254
[   28.117710] in_atomic(): 1, irqs_disabled(): 0, pid: 1855, name:
sugov:0
[   28.124788] CPU: 0 PID: 1855 Comm: sugov:0 Tainted: G        W
4.19.107 #51
[   28.132440] Hardware name: MediaTek krane sku176 board (DT)
[   28.138006] Call trace:
[   28.140461]  dump_backtrace+0x0/0x17c
[   28.144121]  show_stack+0x20/0x2c
[   28.147432]  dump_stack+0xd4/0x10c
[   28.150831]  ___might_sleep+0x108/0x118
[   28.154659]  __might_sleep+0x50/0x84
[   28.158230]  mutex_lock+0x28/0x60
[   28.161541]  regulator_lock_dependent+0x3c/0x10c
[   28.166152]  regulator_set_voltage+0x48/0xa0
[   28.170417]  mtk_cpufreq_set_voltage+0x16c/0x324
[   28.175046]  mtk_cpufreq_set_target+0x13c/0x2c8
[   28.179574]  __cpufreq_driver_target+0x424/0x4c4
Viresh Kumar April 6, 2020, 9:29 a.m. UTC | #10
On 06-04-20, 17:12, andrew-sh.cheng wrote:
> I will use regulator in the locked region.
> And regulator will use mutex_lock.

Yeah, you can't use spinlock here, use a mutex.
andrew-sh.cheng April 7, 2020, 6:54 a.m. UTC | #11
On Mon, 2020-04-06 at 14:59 +0530, Viresh Kumar wrote:
> On 06-04-20, 17:12, andrew-sh.cheng wrote:
> > I will use regulator in the locked region.
> > And regulator will use mutex_lock.
> 
> Yeah, you can't use spinlock here, use a mutex.
> 
Hi Viresh,

I am not familiar with read/write lock.
Do you mean there is another read/write function, which is not
read_lock()/write_lock(), using mutex but not spinlock?
Could you kindly tell me the functions.
Thank you.
Viresh Kumar April 7, 2020, 8:29 a.m. UTC | #12
On 07-04-20, 14:54, andrew-sh.cheng wrote:
> On Mon, 2020-04-06 at 14:59 +0530, Viresh Kumar wrote:
> > On 06-04-20, 17:12, andrew-sh.cheng wrote:
> > > I will use regulator in the locked region.
> > > And regulator will use mutex_lock.
> > 
> > Yeah, you can't use spinlock here, use a mutex.
> > 
> Hi Viresh,
> 
> I am not familiar with read/write lock.
> Do you mean there is another read/write function, which is not
> read_lock()/write_lock(), using mutex but not spinlock?

Heh, I am asking you to use simple mutex here, leave the read/write
lock thing completely as it won't work here.
andrew-sh.cheng April 7, 2020, 9:09 a.m. UTC | #13
On Tue, 2020-04-07 at 13:59 +0530, Viresh Kumar wrote:
> On 07-04-20, 14:54, andrew-sh.cheng wrote:
> > On Mon, 2020-04-06 at 14:59 +0530, Viresh Kumar wrote:
> > > On 06-04-20, 17:12, andrew-sh.cheng wrote:
> > > > I will use regulator in the locked region.
> > > > And regulator will use mutex_lock.
> > > 
> > > Yeah, you can't use spinlock here, use a mutex.
> > > 
> > Hi Viresh,
> > 
> > I am not familiar with read/write lock.
> > Do you mean there is another read/write function, which is not
> > read_lock()/write_lock(), using mutex but not spinlock?
> 
> Heh, I am asking you to use simple mutex here, leave the read/write
> lock thing completely as it won't work here.
> 
Got it.
Thank you for your patient~
diff mbox series

Patch

diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c
index 4b0cc50dd93b..7c37ab31230a 100644
--- a/drivers/cpufreq/mediatek-cpufreq.c
+++ b/drivers/cpufreq/mediatek-cpufreq.c
@@ -42,6 +42,10 @@  struct mtk_cpu_dvfs_info {
 	struct list_head list_head;
 	int intermediate_voltage;
 	bool need_voltage_tracking;
+	struct mutex lock; /* avoid notify and policy race condition */
+	struct notifier_block opp_nb;
+	int opp_cpu;
+	unsigned long opp_freq;
 };
 
 static LIST_HEAD(dvfs_info_list);
@@ -231,6 +235,7 @@  static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
 	vproc = dev_pm_opp_get_voltage(opp);
 	dev_pm_opp_put(opp);
 
+	mutex_lock(&info->lock);
 	/*
 	 * If the new voltage or the intermediate voltage is higher than the
 	 * current voltage, scale up voltage first.
@@ -242,6 +247,7 @@  static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
 			pr_err("cpu%d: failed to scale up voltage!\n",
 			       policy->cpu);
 			mtk_cpufreq_set_voltage(info, old_vproc);
+			mutex_unlock(&info->lock);
 			return ret;
 		}
 	}
@@ -253,6 +259,7 @@  static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
 		       policy->cpu);
 		mtk_cpufreq_set_voltage(info, old_vproc);
 		WARN_ON(1);
+		mutex_unlock(&info->lock);
 		return ret;
 	}
 
@@ -263,6 +270,7 @@  static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
 		       policy->cpu);
 		clk_set_parent(cpu_clk, armpll);
 		mtk_cpufreq_set_voltage(info, old_vproc);
+		mutex_unlock(&info->lock);
 		return ret;
 	}
 
@@ -273,6 +281,7 @@  static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
 		       policy->cpu);
 		mtk_cpufreq_set_voltage(info, inter_vproc);
 		WARN_ON(1);
+		mutex_unlock(&info->lock);
 		return ret;
 	}
 
@@ -288,15 +297,75 @@  static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
 			clk_set_parent(cpu_clk, info->inter_clk);
 			clk_set_rate(armpll, old_freq_hz);
 			clk_set_parent(cpu_clk, armpll);
+			mutex_unlock(&info->lock);
 			return ret;
 		}
 	}
 
+	info->opp_freq = freq_hz;
+	mutex_unlock(&info->lock);
+
 	return 0;
 }
 
 #define DYNAMIC_POWER "dynamic-power-coefficient"
 
+static int mtk_cpufreq_opp_notifier(struct notifier_block *nb,
+				    unsigned long event, void *data)
+{
+	struct dev_pm_opp *opp = data;
+	struct dev_pm_opp *opp_item;
+	struct mtk_cpu_dvfs_info *info =
+		container_of(nb, struct mtk_cpu_dvfs_info, opp_nb);
+	unsigned long freq, volt;
+	struct cpufreq_policy *policy;
+	int ret = 0;
+
+	if (event == OPP_EVENT_ADJUST_VOLTAGE) {
+		freq = dev_pm_opp_get_freq(opp);
+
+		mutex_lock(&info->lock);
+		if (info->opp_freq == freq) {
+			volt = dev_pm_opp_get_voltage(opp);
+			ret = mtk_cpufreq_set_voltage(info, volt);
+			if (ret)
+				dev_err(info->cpu_dev, "failed to scale voltage: %d\n",
+					ret);
+		}
+		mutex_unlock(&info->lock);
+	} else if (event == OPP_EVENT_DISABLE) {
+		freq = info->opp_freq;
+		opp_item = dev_pm_opp_find_freq_ceil(info->cpu_dev, &freq);
+		if (!IS_ERR(opp_item))
+			dev_pm_opp_put(opp_item);
+		else
+			freq = 0;
+
+		/* case of current opp is disabled */
+		if (freq == 0 || freq != info->opp_freq) {
+			// find an enable opp item
+			freq = 1;
+			opp_item = dev_pm_opp_find_freq_ceil(info->cpu_dev,
+							     &freq);
+			if (!IS_ERR(opp_item)) {
+				dev_pm_opp_put(opp_item);
+				policy = cpufreq_cpu_get(info->opp_cpu);
+				if (policy) {
+					cpufreq_driver_target(policy,
+						freq / 1000,
+						CPUFREQ_RELATION_L);
+					cpufreq_cpu_put(policy);
+				}
+			} else {
+				pr_err("%s: all opp items are disabled\n",
+				       __func__);
+			}
+		}
+	}
+
+	return notifier_from_errno(ret);
+}
+
 static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu)
 {
 	struct device *cpu_dev;
@@ -383,11 +452,21 @@  static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu)
 	info->intermediate_voltage = dev_pm_opp_get_voltage(opp);
 	dev_pm_opp_put(opp);
 
+	info->opp_cpu = cpu;
+	info->opp_nb.notifier_call = mtk_cpufreq_opp_notifier;
+	ret = dev_pm_opp_register_notifier(cpu_dev, &info->opp_nb);
+	if (ret) {
+		pr_warn("cannot register opp notification\n");
+		goto out_free_opp_table;
+	}
+
+	mutex_init(&info->lock);
 	info->cpu_dev = cpu_dev;
 	info->proc_reg = proc_reg;
 	info->sram_reg = IS_ERR(sram_reg) ? NULL : sram_reg;
 	info->cpu_clk = cpu_clk;
 	info->inter_clk = inter_clk;
+	info->opp_freq = clk_get_rate(cpu_clk);
 
 	/*
 	 * If SRAM regulator is present, software "voltage tracking" is needed