diff mbox series

[v5,4/5] cpufreq: qcom: Update the bandwidth levels on frequency change

Message ID 20200527202153.11659-5-sibis@codeaurora.org (mailing list archive)
State Superseded
Headers show
Series DDR/L3 Scaling support on SDM845 and SC7180 SoCs | expand

Commit Message

Sibi Sankar May 27, 2020, 8:21 p.m. UTC
Add support to parse optional OPP table attached to the cpu node when
the OPP bandwidth values are populated. This allows for scaling of
DDR/L3 bandwidth levels with frequency change.

Signed-off-by: Sibi Sankar <sibis@codeaurora.org>
---

V5:
 * Use dev_pm_opp_adjust_voltage instead [Viresh]
 * Misc cleanup

v4:
 * Split fast switch disable into another patch [Lukasz]

 drivers/cpufreq/qcom-cpufreq-hw.c | 77 ++++++++++++++++++++++++++++++-
 1 file changed, 75 insertions(+), 2 deletions(-)

Comments

Viresh Kumar May 29, 2020, 10 a.m. UTC | #1
On 28-05-20, 01:51, Sibi Sankar wrote:
> Add support to parse optional OPP table attached to the cpu node when
> the OPP bandwidth values are populated. This allows for scaling of
> DDR/L3 bandwidth levels with frequency change.
> 
> Signed-off-by: Sibi Sankar <sibis@codeaurora.org>
> ---
> 
> V5:
>  * Use dev_pm_opp_adjust_voltage instead [Viresh]
>  * Misc cleanup
> 
> v4:
>  * Split fast switch disable into another patch [Lukasz]
> 
>  drivers/cpufreq/qcom-cpufreq-hw.c | 77 ++++++++++++++++++++++++++++++-
>  1 file changed, 75 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c
> index fc92a8842e252..fbd73d106a3ae 100644
> --- a/drivers/cpufreq/qcom-cpufreq-hw.c
> +++ b/drivers/cpufreq/qcom-cpufreq-hw.c
> @@ -6,6 +6,7 @@
>  #include <linux/bitfield.h>
>  #include <linux/cpufreq.h>
>  #include <linux/init.h>
> +#include <linux/interconnect.h>
>  #include <linux/kernel.h>
>  #include <linux/module.h>
>  #include <linux/of_address.h>
> @@ -31,6 +32,52 @@
>  static unsigned long cpu_hw_rate, xo_rate;
>  static struct platform_device *global_pdev;
>  
> +static int qcom_cpufreq_set_bw(struct cpufreq_policy *policy,
> +			       unsigned long freq_khz)
> +{
> +	unsigned long freq_hz = freq_khz * 1000;
> +	struct dev_pm_opp *opp;
> +	struct device *dev;
> +	int ret;
> +
> +	dev = get_cpu_device(policy->cpu);
> +	if (!dev)
> +		return -ENODEV;
> +
> +	opp = dev_pm_opp_find_freq_exact(dev, freq_hz, true);
> +	if (IS_ERR(opp))
> +		return PTR_ERR(opp);
> +
> +	ret = dev_pm_opp_set_bw(dev, opp);
> +	dev_pm_opp_put(opp);
> +	return ret;
> +}
> +
> +static int qcom_cpufreq_update_opp(struct device *cpu_dev,
> +				   unsigned long freq_khz,
> +				   unsigned long volt)
> +{
> +	unsigned long freq_hz = freq_khz * 1000;
> +
> +	if (dev_pm_opp_adjust_voltage(cpu_dev, freq_hz, volt, volt, volt))
> +		return dev_pm_opp_add(cpu_dev, freq_hz, volt);

What's going on here ? Why add OPP here ?

> +
> +	/* Enable the opp after voltage update */
> +	return dev_pm_opp_enable(cpu_dev, freq_hz);
> +}
> +
> +/* Check for optional interconnect paths on CPU0 */
> +static int qcom_cpufreq_find_icc_paths(struct device *dev)
> +{
> +	struct device *cpu_dev;
> +
> +	cpu_dev = get_cpu_device(0);
> +	if (!cpu_dev)
> +		return -EPROBE_DEFER;
> +
> +	return dev_pm_opp_of_find_icc_paths(cpu_dev, NULL);
> +}
> +

open code this into the probe routine.

>  static int qcom_cpufreq_hw_target_index(struct cpufreq_policy *policy,
>  					unsigned int index)
>  {
> @@ -39,6 +86,8 @@ static int qcom_cpufreq_hw_target_index(struct cpufreq_policy *policy,
>  
>  	writel_relaxed(index, perf_state_reg);
>  
> +	qcom_cpufreq_set_bw(policy, freq);
> +
>  	arch_set_freq_scale(policy->related_cpus, freq,
>  			    policy->cpuinfo.max_freq);
>  	return 0;
> @@ -88,12 +137,30 @@ static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev,
>  {
>  	u32 data, src, lval, i, core_count, prev_freq = 0, freq;
>  	u32 volt;
> +	u64 rate;
>  	struct cpufreq_frequency_table	*table;
> +	struct device_node *opp_table_np, *np;
> +	int ret;
>  
>  	table = kcalloc(LUT_MAX_ENTRIES + 1, sizeof(*table), GFP_KERNEL);
>  	if (!table)
>  		return -ENOMEM;
>  
> +	ret = dev_pm_opp_of_add_table(cpu_dev);
> +	if (!ret) {
> +		/* Disable all opps and cross-validate against LUT */
> +		opp_table_np = dev_pm_opp_of_get_opp_desc_node(cpu_dev);
> +		for_each_available_child_of_node(opp_table_np, np) {
> +			ret = of_property_read_u64(np, "opp-hz", &rate);

No way, please use dev_pm_opp_find_freq_*() here instead to grab OPPs
one by one.

> +			if (!ret)
> +				dev_pm_opp_disable(cpu_dev, rate);
> +		}
> +		of_node_put(opp_table_np);
> +	} else if (ret != -ENODEV) {
> +		dev_err(cpu_dev, "Invalid OPP table in Device tree\n");
> +		return ret;
> +	}

Rather put this in the if (ret) block and so the else part doesn't
need extra indentation.

> +
>  	for (i = 0; i < LUT_MAX_ENTRIES; i++) {
>  		data = readl_relaxed(base + REG_FREQ_LUT +
>  				      i * LUT_ROW_SIZE);
> @@ -112,7 +179,7 @@ static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev,
>  
>  		if (freq != prev_freq && core_count != LUT_TURBO_IND) {
>  			table[i].frequency = freq;
> -			dev_pm_opp_add(cpu_dev, freq * 1000, volt);
> +			qcom_cpufreq_update_opp(cpu_dev, freq, volt);
>  			dev_dbg(cpu_dev, "index=%d freq=%d, core_count %d\n", i,
>  				freq, core_count);
>  		} else if (core_count == LUT_TURBO_IND) {
> @@ -133,7 +200,8 @@ static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev,
>  			if (prev->frequency == CPUFREQ_ENTRY_INVALID) {
>  				prev->frequency = prev_freq;
>  				prev->flags = CPUFREQ_BOOST_FREQ;
> -				dev_pm_opp_add(cpu_dev,	prev_freq * 1000, volt);
> +				qcom_cpufreq_update_opp(cpu_dev, prev_freq,
> +							volt);
>  			}
>  
>  			break;
Sibi Sankar May 29, 2020, 11:30 a.m. UTC | #2
Hey Viresh,
Thanks for taking time to review the
series :)

On 2020-05-29 15:30, Viresh Kumar wrote:
> On 28-05-20, 01:51, Sibi Sankar wrote:
>> Add support to parse optional OPP table attached to the cpu node when
>> the OPP bandwidth values are populated. This allows for scaling of
>> DDR/L3 bandwidth levels with frequency change.
>> 
>> Signed-off-by: Sibi Sankar <sibis@codeaurora.org>
>> ---
>> 
>> V5:
>>  * Use dev_pm_opp_adjust_voltage instead [Viresh]
>>  * Misc cleanup
>> 
>> v4:
>>  * Split fast switch disable into another patch [Lukasz]
>> 
>>  drivers/cpufreq/qcom-cpufreq-hw.c | 77 
>> ++++++++++++++++++++++++++++++-
>>  1 file changed, 75 insertions(+), 2 deletions(-)
>> 
>> diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c 
>> b/drivers/cpufreq/qcom-cpufreq-hw.c
>> index fc92a8842e252..fbd73d106a3ae 100644
>> --- a/drivers/cpufreq/qcom-cpufreq-hw.c
>> +++ b/drivers/cpufreq/qcom-cpufreq-hw.c
>> @@ -6,6 +6,7 @@
>>  #include <linux/bitfield.h>
>>  #include <linux/cpufreq.h>
>>  #include <linux/init.h>
>> +#include <linux/interconnect.h>
>>  #include <linux/kernel.h>
>>  #include <linux/module.h>
>>  #include <linux/of_address.h>
>> @@ -31,6 +32,52 @@
>>  static unsigned long cpu_hw_rate, xo_rate;
>>  static struct platform_device *global_pdev;
>> 
>> +static int qcom_cpufreq_set_bw(struct cpufreq_policy *policy,
>> +			       unsigned long freq_khz)
>> +{
>> +	unsigned long freq_hz = freq_khz * 1000;
>> +	struct dev_pm_opp *opp;
>> +	struct device *dev;
>> +	int ret;
>> +
>> +	dev = get_cpu_device(policy->cpu);
>> +	if (!dev)
>> +		return -ENODEV;
>> +
>> +	opp = dev_pm_opp_find_freq_exact(dev, freq_hz, true);
>> +	if (IS_ERR(opp))
>> +		return PTR_ERR(opp);
>> +
>> +	ret = dev_pm_opp_set_bw(dev, opp);
>> +	dev_pm_opp_put(opp);
>> +	return ret;
>> +}
>> +
>> +static int qcom_cpufreq_update_opp(struct device *cpu_dev,
>> +				   unsigned long freq_khz,
>> +				   unsigned long volt)
>> +{
>> +	unsigned long freq_hz = freq_khz * 1000;
>> +
>> +	if (dev_pm_opp_adjust_voltage(cpu_dev, freq_hz, volt, volt, volt))
>> +		return dev_pm_opp_add(cpu_dev, freq_hz, volt);
> 
> What's going on here ? Why add OPP here ?

We update the voltage if opp were
initially added as part of
dev_pm_opp_of_add_table. However
if the cpu node does not have an
opp table associated with it, we
do a opp_add_v1 instead.

> 
>> +
>> +	/* Enable the opp after voltage update */
>> +	return dev_pm_opp_enable(cpu_dev, freq_hz);
>> +}
>> +
>> +/* Check for optional interconnect paths on CPU0 */
>> +static int qcom_cpufreq_find_icc_paths(struct device *dev)
>> +{
>> +	struct device *cpu_dev;
>> +
>> +	cpu_dev = get_cpu_device(0);
>> +	if (!cpu_dev)
>> +		return -EPROBE_DEFER;
>> +
>> +	return dev_pm_opp_of_find_icc_paths(cpu_dev, NULL);
>> +}
>> +
> 
> open code this into the probe routine.

sure

> 
>>  static int qcom_cpufreq_hw_target_index(struct cpufreq_policy 
>> *policy,
>>  					unsigned int index)
>>  {
>> @@ -39,6 +86,8 @@ static int qcom_cpufreq_hw_target_index(struct 
>> cpufreq_policy *policy,
>> 
>>  	writel_relaxed(index, perf_state_reg);
>> 
>> +	qcom_cpufreq_set_bw(policy, freq);
>> +
>>  	arch_set_freq_scale(policy->related_cpus, freq,
>>  			    policy->cpuinfo.max_freq);
>>  	return 0;
>> @@ -88,12 +137,30 @@ static int qcom_cpufreq_hw_read_lut(struct device 
>> *cpu_dev,
>>  {
>>  	u32 data, src, lval, i, core_count, prev_freq = 0, freq;
>>  	u32 volt;
>> +	u64 rate;
>>  	struct cpufreq_frequency_table	*table;
>> +	struct device_node *opp_table_np, *np;
>> +	int ret;
>> 
>>  	table = kcalloc(LUT_MAX_ENTRIES + 1, sizeof(*table), GFP_KERNEL);
>>  	if (!table)
>>  		return -ENOMEM;
>> 
>> +	ret = dev_pm_opp_of_add_table(cpu_dev);
>> +	if (!ret) {
>> +		/* Disable all opps and cross-validate against LUT */
>> +		opp_table_np = dev_pm_opp_of_get_opp_desc_node(cpu_dev);
>> +		for_each_available_child_of_node(opp_table_np, np) {
>> +			ret = of_property_read_u64(np, "opp-hz", &rate);
> 
> No way, please use dev_pm_opp_find_freq_*() here instead to grab OPPs
> one by one.

sure I'll use a dev_pm_opp_find_freq_ceil
loop to do the same :P

> 
>> +			if (!ret)
>> +				dev_pm_opp_disable(cpu_dev, rate);
>> +		}
>> +		of_node_put(opp_table_np);
>> +	} else if (ret != -ENODEV) {
>> +		dev_err(cpu_dev, "Invalid OPP table in Device tree\n");
>> +		return ret;
>> +	}
> 
> Rather put this in the if (ret) block and so the else part doesn't
> need extra indentation.

https://patchwork.kernel.org/patch/11573905/

I'll need to enable fast_switch
when the device does not have a
opp-table associated with it or
throw a error when an improper
table is specified. If a table
with bw values is specified, we
disable fast switch and enable
scaling.

> 
>> +
>>  	for (i = 0; i < LUT_MAX_ENTRIES; i++) {
>>  		data = readl_relaxed(base + REG_FREQ_LUT +
>>  				      i * LUT_ROW_SIZE);
>> @@ -112,7 +179,7 @@ static int qcom_cpufreq_hw_read_lut(struct device 
>> *cpu_dev,
>> 
>>  		if (freq != prev_freq && core_count != LUT_TURBO_IND) {
>>  			table[i].frequency = freq;
>> -			dev_pm_opp_add(cpu_dev, freq * 1000, volt);
>> +			qcom_cpufreq_update_opp(cpu_dev, freq, volt);
>>  			dev_dbg(cpu_dev, "index=%d freq=%d, core_count %d\n", i,
>>  				freq, core_count);
>>  		} else if (core_count == LUT_TURBO_IND) {
>> @@ -133,7 +200,8 @@ static int qcom_cpufreq_hw_read_lut(struct device 
>> *cpu_dev,
>>  			if (prev->frequency == CPUFREQ_ENTRY_INVALID) {
>>  				prev->frequency = prev_freq;
>>  				prev->flags = CPUFREQ_BOOST_FREQ;
>> -				dev_pm_opp_add(cpu_dev,	prev_freq * 1000, volt);
>> +				qcom_cpufreq_update_opp(cpu_dev, prev_freq,
>> +							volt);
>>  			}
>> 
>>  			break;
Viresh Kumar June 1, 2020, 11:01 a.m. UTC | #3
On 29-05-20, 17:00, Sibi Sankar wrote:
> > > +static int qcom_cpufreq_update_opp(struct device *cpu_dev,
> > > +				   unsigned long freq_khz,
> > > +				   unsigned long volt)
> > > +{
> > > +	unsigned long freq_hz = freq_khz * 1000;
> > > +
> > > +	if (dev_pm_opp_adjust_voltage(cpu_dev, freq_hz, volt, volt, volt))
> > > +		return dev_pm_opp_add(cpu_dev, freq_hz, volt);
> > 
> > What's going on here ? Why add OPP here ?
> 
> We update the voltage if opp were
> initially added as part of
> dev_pm_opp_of_add_table. However
> if the cpu node does not have an
> opp table associated with it, we
> do a opp_add_v1 instead.

Instead of depending on the failure of dev_pm_opp_adjust_voltage(),
pass a flag to qcom_cpufreq_update_opp() which will decide if we want
to adjust voltage or add an opp.
Sibi Sankar June 2, 2020, 6:57 a.m. UTC | #4
On 2020-06-01 16:31, Viresh Kumar wrote:
> On 29-05-20, 17:00, Sibi Sankar wrote:
>> > > +static int qcom_cpufreq_update_opp(struct device *cpu_dev,
>> > > +				   unsigned long freq_khz,
>> > > +				   unsigned long volt)
>> > > +{
>> > > +	unsigned long freq_hz = freq_khz * 1000;
>> > > +
>> > > +	if (dev_pm_opp_adjust_voltage(cpu_dev, freq_hz, volt, volt, volt))
>> > > +		return dev_pm_opp_add(cpu_dev, freq_hz, volt);
>> >
>> > What's going on here ? Why add OPP here ?
>> 
>> We update the voltage if opp were
>> initially added as part of
>> dev_pm_opp_of_add_table. However
>> if the cpu node does not have an
>> opp table associated with it, we
>> do a opp_add_v1 instead.
> 
> Instead of depending on the failure of dev_pm_opp_adjust_voltage(),
> pass a flag to qcom_cpufreq_update_opp() which will decide if we want
> to adjust voltage or add an opp.

Sure will add it in the next
re-spin.
diff mbox series

Patch

diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c
index fc92a8842e252..fbd73d106a3ae 100644
--- a/drivers/cpufreq/qcom-cpufreq-hw.c
+++ b/drivers/cpufreq/qcom-cpufreq-hw.c
@@ -6,6 +6,7 @@ 
 #include <linux/bitfield.h>
 #include <linux/cpufreq.h>
 #include <linux/init.h>
+#include <linux/interconnect.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of_address.h>
@@ -31,6 +32,52 @@ 
 static unsigned long cpu_hw_rate, xo_rate;
 static struct platform_device *global_pdev;
 
+static int qcom_cpufreq_set_bw(struct cpufreq_policy *policy,
+			       unsigned long freq_khz)
+{
+	unsigned long freq_hz = freq_khz * 1000;
+	struct dev_pm_opp *opp;
+	struct device *dev;
+	int ret;
+
+	dev = get_cpu_device(policy->cpu);
+	if (!dev)
+		return -ENODEV;
+
+	opp = dev_pm_opp_find_freq_exact(dev, freq_hz, true);
+	if (IS_ERR(opp))
+		return PTR_ERR(opp);
+
+	ret = dev_pm_opp_set_bw(dev, opp);
+	dev_pm_opp_put(opp);
+	return ret;
+}
+
+static int qcom_cpufreq_update_opp(struct device *cpu_dev,
+				   unsigned long freq_khz,
+				   unsigned long volt)
+{
+	unsigned long freq_hz = freq_khz * 1000;
+
+	if (dev_pm_opp_adjust_voltage(cpu_dev, freq_hz, volt, volt, volt))
+		return dev_pm_opp_add(cpu_dev, freq_hz, volt);
+
+	/* Enable the opp after voltage update */
+	return dev_pm_opp_enable(cpu_dev, freq_hz);
+}
+
+/* Check for optional interconnect paths on CPU0 */
+static int qcom_cpufreq_find_icc_paths(struct device *dev)
+{
+	struct device *cpu_dev;
+
+	cpu_dev = get_cpu_device(0);
+	if (!cpu_dev)
+		return -EPROBE_DEFER;
+
+	return dev_pm_opp_of_find_icc_paths(cpu_dev, NULL);
+}
+
 static int qcom_cpufreq_hw_target_index(struct cpufreq_policy *policy,
 					unsigned int index)
 {
@@ -39,6 +86,8 @@  static int qcom_cpufreq_hw_target_index(struct cpufreq_policy *policy,
 
 	writel_relaxed(index, perf_state_reg);
 
+	qcom_cpufreq_set_bw(policy, freq);
+
 	arch_set_freq_scale(policy->related_cpus, freq,
 			    policy->cpuinfo.max_freq);
 	return 0;
@@ -88,12 +137,30 @@  static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev,
 {
 	u32 data, src, lval, i, core_count, prev_freq = 0, freq;
 	u32 volt;
+	u64 rate;
 	struct cpufreq_frequency_table	*table;
+	struct device_node *opp_table_np, *np;
+	int ret;
 
 	table = kcalloc(LUT_MAX_ENTRIES + 1, sizeof(*table), GFP_KERNEL);
 	if (!table)
 		return -ENOMEM;
 
+	ret = dev_pm_opp_of_add_table(cpu_dev);
+	if (!ret) {
+		/* Disable all opps and cross-validate against LUT */
+		opp_table_np = dev_pm_opp_of_get_opp_desc_node(cpu_dev);
+		for_each_available_child_of_node(opp_table_np, np) {
+			ret = of_property_read_u64(np, "opp-hz", &rate);
+			if (!ret)
+				dev_pm_opp_disable(cpu_dev, rate);
+		}
+		of_node_put(opp_table_np);
+	} else if (ret != -ENODEV) {
+		dev_err(cpu_dev, "Invalid OPP table in Device tree\n");
+		return ret;
+	}
+
 	for (i = 0; i < LUT_MAX_ENTRIES; i++) {
 		data = readl_relaxed(base + REG_FREQ_LUT +
 				      i * LUT_ROW_SIZE);
@@ -112,7 +179,7 @@  static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev,
 
 		if (freq != prev_freq && core_count != LUT_TURBO_IND) {
 			table[i].frequency = freq;
-			dev_pm_opp_add(cpu_dev, freq * 1000, volt);
+			qcom_cpufreq_update_opp(cpu_dev, freq, volt);
 			dev_dbg(cpu_dev, "index=%d freq=%d, core_count %d\n", i,
 				freq, core_count);
 		} else if (core_count == LUT_TURBO_IND) {
@@ -133,7 +200,8 @@  static int qcom_cpufreq_hw_read_lut(struct device *cpu_dev,
 			if (prev->frequency == CPUFREQ_ENTRY_INVALID) {
 				prev->frequency = prev_freq;
 				prev->flags = CPUFREQ_BOOST_FREQ;
-				dev_pm_opp_add(cpu_dev,	prev_freq * 1000, volt);
+				qcom_cpufreq_update_opp(cpu_dev, prev_freq,
+							volt);
 			}
 
 			break;
@@ -254,6 +322,7 @@  static int qcom_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy)
 	void __iomem *base = policy->driver_data - REG_PERF_STATE;
 
 	dev_pm_opp_remove_all_dynamic(cpu_dev);
+	dev_pm_opp_of_cpumask_remove_table(policy->related_cpus);
 	kfree(policy->freq_table);
 	devm_iounmap(&global_pdev->dev, base);
 
@@ -301,6 +370,10 @@  static int qcom_cpufreq_hw_driver_probe(struct platform_device *pdev)
 
 	global_pdev = pdev;
 
+	ret = qcom_cpufreq_find_icc_paths(&pdev->dev);
+	if (ret)
+		return ret;
+
 	ret = cpufreq_register_driver(&cpufreq_qcom_hw_driver);
 	if (ret)
 		dev_err(&pdev->dev, "CPUFreq HW driver failed to register\n");