[v6,4/5] cpufreq: qcom: Update the bandwidth levels on frequency change
diff mbox series

Message ID 20200605213332.609-5-sibis@codeaurora.org
State New
Delegated to: viresh kumar
Headers show
Series
  • DDR/L3 Scaling support on SDM845 and SC7180 SoCs
Related show

Commit Message

Sibi Sankar June 5, 2020, 9:33 p.m. UTC
Add support to parse optional OPP table attached to the cpu node when
the OPP bandwidth values are populated. This allows for scaling of
DDR/L3 bandwidth levels with frequency change.

Signed-off-by: Sibi Sankar <sibis@codeaurora.org>
---

v6:
 * Add global flag to distinguish between voltage update and opp add.
   Use the same flag before trying to scale ddr/l3 bw [Viresh]
 * Use dev_pm_opp_find_freq_ceil to grab all opps [Viresh] 
 * Move dev_pm_opp_of_find_icc_paths into probe [Viresh]

v5:
 * Use dev_pm_opp_adjust_voltage instead [Viresh]
 * Misc cleanup

v4:
 * Split fast switch disable into another patch [Lukasz]

 drivers/cpufreq/qcom-cpufreq-hw.c | 82 ++++++++++++++++++++++++++++++-
 1 file changed, 80 insertions(+), 2 deletions(-)

Comments

Matthias Kaehlcke June 15, 2020, 5:25 p.m. UTC | #1
Hi Sibi,

On Sat, Jun 06, 2020 at 03:03:31AM +0530, Sibi Sankar wrote:
> Add support to parse optional OPP table attached to the cpu node when
> the OPP bandwidth values are populated. This allows for scaling of
> DDR/L3 bandwidth levels with frequency change.
> 
> Signed-off-by: Sibi Sankar <sibis@codeaurora.org>
> ---
> 
> v6:
>  * Add global flag to distinguish between voltage update and opp add.
>    Use the same flag before trying to scale ddr/l3 bw [Viresh]
>  * Use dev_pm_opp_find_freq_ceil to grab all opps [Viresh] 
>  * Move dev_pm_opp_of_find_icc_paths into probe [Viresh]
> 
> v5:
>  * Use dev_pm_opp_adjust_voltage instead [Viresh]
>  * Misc cleanup
> 
> v4:
>  * Split fast switch disable into another patch [Lukasz]
> 
>  drivers/cpufreq/qcom-cpufreq-hw.c | 82 ++++++++++++++++++++++++++++++-
>  1 file changed, 80 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c
> index fc92a8842e252..8fa6ab6e0e4b6 100644
> --- a/drivers/cpufreq/qcom-cpufreq-hw.c
> +++ b/drivers/cpufreq/qcom-cpufreq-hw.c
> @@ -6,6 +6,7 @@
>  #include <linux/bitfield.h>
>  #include <linux/cpufreq.h>
>  #include <linux/init.h>
> +#include <linux/interconnect.h>
>  #include <linux/kernel.h>
>  #include <linux/module.h>
>  #include <linux/of_address.h>
> @@ -30,6 +31,48 @@
>  
>  static unsigned long cpu_hw_rate, xo_rate;
>  static struct platform_device *global_pdev;
> +static bool icc_scaling_enabled;

It seem you rely on 'icc_scaling_enabled' to be initialized to 'false'.
This works during the first initialization, but not if the 'device' is
unbound/rebound. In theory things shouldn't be different in a succesive
initialization, however for robustness the variable should be explicitly
set to 'false' somewhere in the code path (_probe(), _read_lut(), ...).

> +static int qcom_cpufreq_set_bw(struct cpufreq_policy *policy,
> +			       unsigned long freq_khz)
> +{
> +	unsigned long freq_hz = freq_khz * 1000;
> +	struct dev_pm_opp *opp;
> +	struct device *dev;
> +	int ret;
> +
> +	dev = get_cpu_device(policy->cpu);
> +	if (!dev)
> +		return -ENODEV;
> +
> +	opp = dev_pm_opp_find_freq_exact(dev, freq_hz, true);
> +	if (IS_ERR(opp))
> +		return PTR_ERR(opp);
> +
> +	ret = dev_pm_opp_set_bw(dev, opp);
> +	dev_pm_opp_put(opp);
> +	return ret;
> +}
> +
> +static int qcom_cpufreq_update_opp(struct device *cpu_dev,
> +				   unsigned long freq_khz,
> +				   unsigned long volt)
> +{
> +	unsigned long freq_hz = freq_khz * 1000;
> +	int ret;
> +
> +	/* Skip voltage update if the opp table is not available */
> +	if (!icc_scaling_enabled)
> +		return dev_pm_opp_add(cpu_dev, freq_hz, volt);
> +
> +	ret = dev_pm_opp_adjust_voltage(cpu_dev, freq_hz, volt, volt, volt);
> +	if (ret) {
> +		dev_err(cpu_dev, "Voltage update failed freq=%ld\n", freq_khz);
> +		return ret;
> +	}
> +
> +	return dev_pm_opp_enable(cpu_dev, freq_hz);
> +}
>  
>  static int qcom_cpufreq_hw_target_index(struct cpufreq_policy *policy,
>  					unsigned int index)
> @@ -39,6 +82,9 @@ static int qcom_cpufreq_hw_target_index(struct cpufreq_policy *policy,
>  
>  	writel_relaxed(index, perf_state_reg);
>  
> +	if (icc_scaling_enabled)
> +		qcom_cpufreq_set_bw(policy, freq);
> +
>  	arch_set_freq_scale(policy->related_cpus, freq,
>  			    policy->cpuinfo.max_freq);
>  	return 0;
> @@ -89,11 +135,31 @@ static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev,
>  	u32 data, src, lval, i, core_count, prev_freq = 0, freq;
>  	u32 volt;
>  	struct cpufreq_frequency_table	*table;
> +	struct dev_pm_opp *opp;
> +	unsigned long rate;
> +	int ret;
>  
>  	table = kcalloc(LUT_MAX_ENTRIES + 1, sizeof(*table), GFP_KERNEL);
>  	if (!table)
>  		return -ENOMEM;
>  
> +	ret = dev_pm_opp_of_add_table(cpu_dev);
> +	if (!ret) {
> +		/* Disable all opps and cross-validate against LUT */

nit: IIUC the cross-validation doesn't happen in this branch, so the
comment is a bit misleading. Maybe change it to "Disable all opps to
cross-validate against the LUT {below,later}".

> +		icc_scaling_enabled = true;
> +		for (rate = 0; ; rate++) {
> +			opp = dev_pm_opp_find_freq_ceil(cpu_dev, &rate);
> +			if (IS_ERR(opp))
> +				break;
> +
> +			dev_pm_opp_put(opp);
> +			dev_pm_opp_disable(cpu_dev, rate);
> +		}
> +	} else if (ret != -ENODEV) {
> +		dev_err(cpu_dev, "Invalid opp table in device tree\n");
> +		return ret;
> +	}
> +
>  	for (i = 0; i < LUT_MAX_ENTRIES; i++) {
>  		data = readl_relaxed(base + REG_FREQ_LUT +
>  				      i * LUT_ROW_SIZE);
> @@ -112,7 +178,7 @@ static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev,
>  
>  		if (freq != prev_freq && core_count != LUT_TURBO_IND) {
>  			table[i].frequency = freq;
> -			dev_pm_opp_add(cpu_dev, freq * 1000, volt);
> +			qcom_cpufreq_update_opp(cpu_dev, freq, volt);

This is the cross-validation mentioned above, right? Shouldn't it include
a check of the return value?

>  			dev_dbg(cpu_dev, "index=%d freq=%d, core_count %d\n", i,
>  				freq, core_count);
>  		} else if (core_count == LUT_TURBO_IND) {
> @@ -133,7 +199,8 @@ static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev,
>  			if (prev->frequency == CPUFREQ_ENTRY_INVALID) {
>  				prev->frequency = prev_freq;
>  				prev->flags = CPUFREQ_BOOST_FREQ;
> -				dev_pm_opp_add(cpu_dev,	prev_freq * 1000, volt);
> +				qcom_cpufreq_update_opp(cpu_dev, prev_freq,
> +							volt);

ditto

nit: with the updated max line length it isn't necessary anymore to break
this into multiple lines (https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/scripts/checkpatch.pl?h=v5.8-rc1#n54),
though the coding style still has the old limit.
Sibi Sankar June 16, 2020, 9:05 p.m. UTC | #2
Hey Matthias,
Thanks for taking time to review
the series.

On 2020-06-15 22:55, Matthias Kaehlcke wrote:
> Hi Sibi,
> 
> On Sat, Jun 06, 2020 at 03:03:31AM +0530, Sibi Sankar wrote:
>> Add support to parse optional OPP table attached to the cpu node when
>> the OPP bandwidth values are populated. This allows for scaling of
>> DDR/L3 bandwidth levels with frequency change.
>> 
>> Signed-off-by: Sibi Sankar <sibis@codeaurora.org>
>> ---
>> 
>> v6:
>>  * Add global flag to distinguish between voltage update and opp add.
>>    Use the same flag before trying to scale ddr/l3 bw [Viresh]
>>  * Use dev_pm_opp_find_freq_ceil to grab all opps [Viresh]
>>  * Move dev_pm_opp_of_find_icc_paths into probe [Viresh]
>> 
>> v5:
>>  * Use dev_pm_opp_adjust_voltage instead [Viresh]
>>  * Misc cleanup
>> 
>> v4:
>>  * Split fast switch disable into another patch [Lukasz]
>> 
>>  drivers/cpufreq/qcom-cpufreq-hw.c | 82 
>> ++++++++++++++++++++++++++++++-
>>  1 file changed, 80 insertions(+), 2 deletions(-)
>> 
>> diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c 
>> b/drivers/cpufreq/qcom-cpufreq-hw.c
>> index fc92a8842e252..8fa6ab6e0e4b6 100644
>> --- a/drivers/cpufreq/qcom-cpufreq-hw.c
>> +++ b/drivers/cpufreq/qcom-cpufreq-hw.c
>> @@ -6,6 +6,7 @@
>>  #include <linux/bitfield.h>
>>  #include <linux/cpufreq.h>
>>  #include <linux/init.h>
>> +#include <linux/interconnect.h>
>>  #include <linux/kernel.h>
>>  #include <linux/module.h>
>>  #include <linux/of_address.h>
>> @@ -30,6 +31,48 @@
>> 
>>  static unsigned long cpu_hw_rate, xo_rate;
>>  static struct platform_device *global_pdev;
>> +static bool icc_scaling_enabled;
> 
> It seem you rely on 'icc_scaling_enabled' to be initialized to 'false'.
> This works during the first initialization, but not if the 'device' is
> unbound/rebound. In theory things shouldn't be different in a succesive

yes it shouldn't but sure I'll set
it to false along the way.

> initialization, however for robustness the variable should be 
> explicitly
> set to 'false' somewhere in the code path (_probe(), _read_lut(), ...).

> 
>> +static int qcom_cpufreq_set_bw(struct cpufreq_policy *policy,
>> +			       unsigned long freq_khz)
>> +{
>> +	unsigned long freq_hz = freq_khz * 1000;
>> +	struct dev_pm_opp *opp;
>> +	struct device *dev;
>> +	int ret;
>> +
>> +	dev = get_cpu_device(policy->cpu);
>> +	if (!dev)
>> +		return -ENODEV;
>> +
>> +	opp = dev_pm_opp_find_freq_exact(dev, freq_hz, true);
>> +	if (IS_ERR(opp))
>> +		return PTR_ERR(opp);
>> +
>> +	ret = dev_pm_opp_set_bw(dev, opp);
>> +	dev_pm_opp_put(opp);
>> +	return ret;
>> +}
>> +
>> +static int qcom_cpufreq_update_opp(struct device *cpu_dev,
>> +				   unsigned long freq_khz,
>> +				   unsigned long volt)
>> +{
>> +	unsigned long freq_hz = freq_khz * 1000;
>> +	int ret;
>> +
>> +	/* Skip voltage update if the opp table is not available */
>> +	if (!icc_scaling_enabled)
>> +		return dev_pm_opp_add(cpu_dev, freq_hz, volt);
>> +
>> +	ret = dev_pm_opp_adjust_voltage(cpu_dev, freq_hz, volt, volt, volt);
>> +	if (ret) {
>> +		dev_err(cpu_dev, "Voltage update failed freq=%ld\n", freq_khz);
>> +		return ret;
>> +	}
>> +
>> +	return dev_pm_opp_enable(cpu_dev, freq_hz);
>> +}
>> 
>>  static int qcom_cpufreq_hw_target_index(struct cpufreq_policy 
>> *policy,
>>  					unsigned int index)
>> @@ -39,6 +82,9 @@ static int qcom_cpufreq_hw_target_index(struct 
>> cpufreq_policy *policy,
>> 
>>  	writel_relaxed(index, perf_state_reg);
>> 
>> +	if (icc_scaling_enabled)
>> +		qcom_cpufreq_set_bw(policy, freq);
>> +
>>  	arch_set_freq_scale(policy->related_cpus, freq,
>>  			    policy->cpuinfo.max_freq);
>>  	return 0;
>> @@ -89,11 +135,31 @@ static int qcom_cpufreq_hw_read_lut(struct device 
>> *cpu_dev,
>>  	u32 data, src, lval, i, core_count, prev_freq = 0, freq;
>>  	u32 volt;
>>  	struct cpufreq_frequency_table	*table;
>> +	struct dev_pm_opp *opp;
>> +	unsigned long rate;
>> +	int ret;
>> 
>>  	table = kcalloc(LUT_MAX_ENTRIES + 1, sizeof(*table), GFP_KERNEL);
>>  	if (!table)
>>  		return -ENOMEM;
>> 
>> +	ret = dev_pm_opp_of_add_table(cpu_dev);
>> +	if (!ret) {
>> +		/* Disable all opps and cross-validate against LUT */
> 
> nit: IIUC the cross-validation doesn't happen in this branch, so the
> comment is a bit misleading. Maybe change it to "Disable all opps to
> cross-validate against the LUT {below,later}".

sure will re-word it.

> 
>> +		icc_scaling_enabled = true;
>> +		for (rate = 0; ; rate++) {
>> +			opp = dev_pm_opp_find_freq_ceil(cpu_dev, &rate);
>> +			if (IS_ERR(opp))
>> +				break;
>> +
>> +			dev_pm_opp_put(opp);
>> +			dev_pm_opp_disable(cpu_dev, rate);
>> +		}
>> +	} else if (ret != -ENODEV) {
>> +		dev_err(cpu_dev, "Invalid opp table in device tree\n");
>> +		return ret;
>> +	}
>> +
>>  	for (i = 0; i < LUT_MAX_ENTRIES; i++) {
>>  		data = readl_relaxed(base + REG_FREQ_LUT +
>>  				      i * LUT_ROW_SIZE);
>> @@ -112,7 +178,7 @@ static int qcom_cpufreq_hw_read_lut(struct device 
>> *cpu_dev,
>> 
>>  		if (freq != prev_freq && core_count != LUT_TURBO_IND) {
>>  			table[i].frequency = freq;
>> -			dev_pm_opp_add(cpu_dev, freq * 1000, volt);
>> +			qcom_cpufreq_update_opp(cpu_dev, freq, volt);
> 
> This is the cross-validation mentioned above, right? Shouldn't it 
> include
> a check of the return value?

Yes, this is the cross-validation step,
we adjust the voltage if opp-tables are
present/added successfully and enable
them, else we would just do a add opp.
We don't want to exit early on a single
opp failure. We will error out a bit
later if the opp-count ends up to be
zero.

> 
>>  			dev_dbg(cpu_dev, "index=%d freq=%d, core_count %d\n", i,
>>  				freq, core_count);
>>  		} else if (core_count == LUT_TURBO_IND) {
>> @@ -133,7 +199,8 @@ static int qcom_cpufreq_hw_read_lut(struct device 
>> *cpu_dev,
>>  			if (prev->frequency == CPUFREQ_ENTRY_INVALID) {
>>  				prev->frequency = prev_freq;
>>  				prev->flags = CPUFREQ_BOOST_FREQ;
>> -				dev_pm_opp_add(cpu_dev,	prev_freq * 1000, volt);
>> +				qcom_cpufreq_update_opp(cpu_dev, prev_freq,
>> +							volt);
> 
> ditto
> 
> nit: with the updated max line length it isn't necessary anymore to 
> break
> this into multiple lines
> (https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/scripts/checkpatch.pl?h=v5.8-rc1#n54),
> though the coding style still has the old limit.

yeah I'll expand it.
Matthias Kaehlcke June 16, 2020, 10:11 p.m. UTC | #3
Hi Sibi,

after doing the review I noticed that Viresh replied on the cover letter
that he picked the series up for v5.9, so I'm not sure if it makes sense
to send a v7.

On Wed, Jun 17, 2020 at 02:35:00AM +0530, Sibi Sankar wrote:

> > > @@ -112,7 +178,7 @@ static int qcom_cpufreq_hw_read_lut(struct
> > > device *cpu_dev,
> > > 
> > >  		if (freq != prev_freq && core_count != LUT_TURBO_IND) {
> > >  			table[i].frequency = freq;
> > > -			dev_pm_opp_add(cpu_dev, freq * 1000, volt);
> > > +			qcom_cpufreq_update_opp(cpu_dev, freq, volt);
> > 
> > This is the cross-validation mentioned above, right? Shouldn't it
> > include
> > a check of the return value?
> 
> Yes, this is the cross-validation step,
> we adjust the voltage if opp-tables are
> present/added successfully and enable
> them, else we would just do a add opp.
> We don't want to exit early on a single
> opp failure. We will error out a bit
> later if the opp-count ends up to be
> zero.

At least an error/warning message would seem convenient when adjusting/adding
an OPP fails, otherwise you would only notice by looking at the sysfs
attributes (if you'd even spot a single/few OPPs to be missing).
Viresh Kumar June 17, 2020, 4:52 a.m. UTC | #4
On 16-06-20, 15:11, Matthias Kaehlcke wrote:
> Hi Sibi,
> 
> after doing the review I noticed that Viresh replied on the cover letter
> that he picked the series up for v5.9, so I'm not sure if it makes sense
> to send a v7.

Its okay, you can send a new version and I will apply that instead.
Sibi Sankar June 17, 2020, 4:43 p.m. UTC | #5
On 2020-06-17 03:41, Matthias Kaehlcke wrote:
> Hi Sibi,
> 
> after doing the review I noticed that Viresh replied on the cover 
> letter
> that he picked the series up for v5.9, so I'm not sure if it makes 
> sense
> to send a v7.
> 
> On Wed, Jun 17, 2020 at 02:35:00AM +0530, Sibi Sankar wrote:
> 
>> > > @@ -112,7 +178,7 @@ static int qcom_cpufreq_hw_read_lut(struct
>> > > device *cpu_dev,
>> > >
>> > >  		if (freq != prev_freq && core_count != LUT_TURBO_IND) {
>> > >  			table[i].frequency = freq;
>> > > -			dev_pm_opp_add(cpu_dev, freq * 1000, volt);
>> > > +			qcom_cpufreq_update_opp(cpu_dev, freq, volt);
>> >
>> > This is the cross-validation mentioned above, right? Shouldn't it
>> > include
>> > a check of the return value?
>> 
>> Yes, this is the cross-validation step,
>> we adjust the voltage if opp-tables are
>> present/added successfully and enable
>> them, else we would just do a add opp.
>> We don't want to exit early on a single
>> opp failure. We will error out a bit
>> later if the opp-count ends up to be
>> zero.
> 
> At least an error/warning message would seem convenient when 
> adjusting/adding
> an OPP fails, otherwise you would only notice by looking at the sysfs
> attributes (if you'd even spot a single/few OPPs to be missing).

I did consider the case where adjust
voltage fails and we do report the
freq for which it fails for as well.
If adding a OPP fails we will still
it being listed in the sysfs cpufreq
scaling_available_frequencies since
it lists the freq_table in khz there
instead.
Matthias Kaehlcke June 18, 2020, 5:05 p.m. UTC | #6
On Wed, Jun 17, 2020 at 10:13:21PM +0530, Sibi Sankar wrote:
> On 2020-06-17 03:41, Matthias Kaehlcke wrote:
> > Hi Sibi,
> > 
> > after doing the review I noticed that Viresh replied on the cover letter
> > that he picked the series up for v5.9, so I'm not sure if it makes sense
> > to send a v7.
> > 
> > On Wed, Jun 17, 2020 at 02:35:00AM +0530, Sibi Sankar wrote:
> > 
> > > > > @@ -112,7 +178,7 @@ static int qcom_cpufreq_hw_read_lut(struct
> > > > > device *cpu_dev,
> > > > >
> > > > >  		if (freq != prev_freq && core_count != LUT_TURBO_IND) {
> > > > >  			table[i].frequency = freq;
> > > > > -			dev_pm_opp_add(cpu_dev, freq * 1000, volt);
> > > > > +			qcom_cpufreq_update_opp(cpu_dev, freq, volt);
> > > >
> > > > This is the cross-validation mentioned above, right? Shouldn't it
> > > > include
> > > > a check of the return value?
> > > 
> > > Yes, this is the cross-validation step,
> > > we adjust the voltage if opp-tables are
> > > present/added successfully and enable
> > > them, else we would just do a add opp.
> > > We don't want to exit early on a single
> > > opp failure. We will error out a bit
> > > later if the opp-count ends up to be
> > > zero.
> > 
> > At least an error/warning message would seem convenient when
> > adjusting/adding
> > an OPP fails, otherwise you would only notice by looking at the sysfs
> > attributes (if you'd even spot a single/few OPPs to be missing).
> 
> I did consider the case where adjust
> voltage fails and we do report the
> freq for which it fails for as well.
> If adding a OPP fails we will still
> it being listed in the sysfs cpufreq
> scaling_available_frequencies since
> it lists the freq_table in khz there
> instead.

Ah, right, I missed that v6 added the error log to
qcom_cpufreq_update_opp(), please ignore my comment :)

Patch
diff mbox series

diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c
index fc92a8842e252..8fa6ab6e0e4b6 100644
--- a/drivers/cpufreq/qcom-cpufreq-hw.c
+++ b/drivers/cpufreq/qcom-cpufreq-hw.c
@@ -6,6 +6,7 @@ 
 #include <linux/bitfield.h>
 #include <linux/cpufreq.h>
 #include <linux/init.h>
+#include <linux/interconnect.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of_address.h>
@@ -30,6 +31,48 @@ 
 
 static unsigned long cpu_hw_rate, xo_rate;
 static struct platform_device *global_pdev;
+static bool icc_scaling_enabled;
+
+static int qcom_cpufreq_set_bw(struct cpufreq_policy *policy,
+			       unsigned long freq_khz)
+{
+	unsigned long freq_hz = freq_khz * 1000;
+	struct dev_pm_opp *opp;
+	struct device *dev;
+	int ret;
+
+	dev = get_cpu_device(policy->cpu);
+	if (!dev)
+		return -ENODEV;
+
+	opp = dev_pm_opp_find_freq_exact(dev, freq_hz, true);
+	if (IS_ERR(opp))
+		return PTR_ERR(opp);
+
+	ret = dev_pm_opp_set_bw(dev, opp);
+	dev_pm_opp_put(opp);
+	return ret;
+}
+
+static int qcom_cpufreq_update_opp(struct device *cpu_dev,
+				   unsigned long freq_khz,
+				   unsigned long volt)
+{
+	unsigned long freq_hz = freq_khz * 1000;
+	int ret;
+
+	/* Skip voltage update if the opp table is not available */
+	if (!icc_scaling_enabled)
+		return dev_pm_opp_add(cpu_dev, freq_hz, volt);
+
+	ret = dev_pm_opp_adjust_voltage(cpu_dev, freq_hz, volt, volt, volt);
+	if (ret) {
+		dev_err(cpu_dev, "Voltage update failed freq=%ld\n", freq_khz);
+		return ret;
+	}
+
+	return dev_pm_opp_enable(cpu_dev, freq_hz);
+}
 
 static int qcom_cpufreq_hw_target_index(struct cpufreq_policy *policy,
 					unsigned int index)
@@ -39,6 +82,9 @@  static int qcom_cpufreq_hw_target_index(struct cpufreq_policy *policy,
 
 	writel_relaxed(index, perf_state_reg);
 
+	if (icc_scaling_enabled)
+		qcom_cpufreq_set_bw(policy, freq);
+
 	arch_set_freq_scale(policy->related_cpus, freq,
 			    policy->cpuinfo.max_freq);
 	return 0;
@@ -89,11 +135,31 @@  static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev,
 	u32 data, src, lval, i, core_count, prev_freq = 0, freq;
 	u32 volt;
 	struct cpufreq_frequency_table	*table;
+	struct dev_pm_opp *opp;
+	unsigned long rate;
+	int ret;
 
 	table = kcalloc(LUT_MAX_ENTRIES + 1, sizeof(*table), GFP_KERNEL);
 	if (!table)
 		return -ENOMEM;
 
+	ret = dev_pm_opp_of_add_table(cpu_dev);
+	if (!ret) {
+		/* Disable all opps and cross-validate against LUT */
+		icc_scaling_enabled = true;
+		for (rate = 0; ; rate++) {
+			opp = dev_pm_opp_find_freq_ceil(cpu_dev, &rate);
+			if (IS_ERR(opp))
+				break;
+
+			dev_pm_opp_put(opp);
+			dev_pm_opp_disable(cpu_dev, rate);
+		}
+	} else if (ret != -ENODEV) {
+		dev_err(cpu_dev, "Invalid opp table in device tree\n");
+		return ret;
+	}
+
 	for (i = 0; i < LUT_MAX_ENTRIES; i++) {
 		data = readl_relaxed(base + REG_FREQ_LUT +
 				      i * LUT_ROW_SIZE);
@@ -112,7 +178,7 @@  static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev,
 
 		if (freq != prev_freq && core_count != LUT_TURBO_IND) {
 			table[i].frequency = freq;
-			dev_pm_opp_add(cpu_dev, freq * 1000, volt);
+			qcom_cpufreq_update_opp(cpu_dev, freq, volt);
 			dev_dbg(cpu_dev, "index=%d freq=%d, core_count %d\n", i,
 				freq, core_count);
 		} else if (core_count == LUT_TURBO_IND) {
@@ -133,7 +199,8 @@  static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev,
 			if (prev->frequency == CPUFREQ_ENTRY_INVALID) {
 				prev->frequency = prev_freq;
 				prev->flags = CPUFREQ_BOOST_FREQ;
-				dev_pm_opp_add(cpu_dev,	prev_freq * 1000, volt);
+				qcom_cpufreq_update_opp(cpu_dev, prev_freq,
+							volt);
 			}
 
 			break;
@@ -254,6 +321,7 @@  static int qcom_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy)
 	void __iomem *base = policy->driver_data - REG_PERF_STATE;
 
 	dev_pm_opp_remove_all_dynamic(cpu_dev);
+	dev_pm_opp_of_cpumask_remove_table(policy->related_cpus);
 	kfree(policy->freq_table);
 	devm_iounmap(&global_pdev->dev, base);
 
@@ -282,6 +350,7 @@  static struct cpufreq_driver cpufreq_qcom_hw_driver = {
 
 static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev)
 {
+	struct device *cpu_dev;
 	struct clk *clk;
 	int ret;
 
@@ -301,6 +370,15 @@  static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev)
 
 	global_pdev = pdev;
 
+	/* Check for optional interconnect paths on CPU0 */
+	cpu_dev = get_cpu_device(0);
+	if (!cpu_dev)
+		return -EPROBE_DEFER;
+
+	ret = dev_pm_opp_of_find_icc_paths(cpu_dev, NULL);
+	if (ret)
+		return ret;
+
 	ret = cpufreq_register_driver(&cpufreq_qcom_hw_driver);
 	if (ret)
 		dev_err(&pdev->dev, "CPUFreq HW driver failed to register\n");