diff mbox series

[v2,7/9] cpufreq: tegra194: add OPP support and set bandwidth

Message ID 20230220140559.28289-8-sumitg@nvidia.com (mailing list archive)
State Handled Elsewhere, archived
Headers show
Series Tegra234 Memory interconnect support | expand

Commit Message

Sumit Gupta Feb. 20, 2023, 2:05 p.m. UTC
Add support to use OPP table from DT in Tegra194 cpufreq driver.
Tegra SoC's receive the frequency lookup table (LUT) from BPMP-FW.
Cross check the OPP's present in DT against the LUT from BPMP-FW
and enable only those DT OPP's which are present in LUT also.

The OPP table in DT has CPU Frequency to bandwidth mapping where
the bandwidth value is per MC channel. DRAM bandwidth depends on the
number of MC channels which can vary as per the boot configuration.
This per channel bandwidth from OPP table will be later converted by
MC driver to final bandwidth value by multiplying with number of
channels before sending the request to BPMP-FW.

If OPP table is not present in DT, then use the LUT from BPMP-FW
directy as the CPU frequency table and not do the DRAM frequency
scaling which is same as the current behavior.

Now, as the CPU Frequency table is being controlling through OPP
table in DT. Keeping fewer entries in the table will create less
frequency steps and can help to scale fast to high frequencies
when required.

Signed-off-by: Sumit Gupta <sumitg@nvidia.com>
---
 drivers/cpufreq/tegra194-cpufreq.c | 152 ++++++++++++++++++++++++++---
 1 file changed, 139 insertions(+), 13 deletions(-)

Comments

Viresh Kumar Feb. 22, 2023, 4:03 a.m. UTC | #1
On 20-02-23, 19:35, Sumit Gupta wrote:
> +static int tegra_cpufreq_set_bw(struct cpufreq_policy *policy, unsigned long freq_khz)
> +{
> +	struct dev_pm_opp *opp;
> +	struct device *dev;
> +	int ret;
> +
> +	dev = get_cpu_device(policy->cpu);
> +	if (!dev)
> +		return -ENODEV;
> +
> +	opp = dev_pm_opp_find_freq_exact(dev, freq_khz * KHZ, true);
> +	if (IS_ERR(opp))
> +		return PTR_ERR(opp);
> +
> +	ret = dev_pm_opp_set_opp(dev, opp);
> +	dev_pm_opp_put(opp);

What about dev_pm_opp_set_rate() instead ?

> +	return ret;
> +}
Sumit Gupta Feb. 23, 2023, 9:36 a.m. UTC | #2
On 22/02/23 09:33, Viresh Kumar wrote:
> External email: Use caution opening links or attachments
> 
> 
> On 20-02-23, 19:35, Sumit Gupta wrote:
>> +static int tegra_cpufreq_set_bw(struct cpufreq_policy *policy, unsigned long freq_khz)
>> +{
>> +     struct dev_pm_opp *opp;
>> +     struct device *dev;
>> +     int ret;
>> +
>> +     dev = get_cpu_device(policy->cpu);
>> +     if (!dev)
>> +             return -ENODEV;
>> +
>> +     opp = dev_pm_opp_find_freq_exact(dev, freq_khz * KHZ, true);
>> +     if (IS_ERR(opp))
>> +             return PTR_ERR(opp);
>> +
>> +     ret = dev_pm_opp_set_opp(dev, opp);
>> +     dev_pm_opp_put(opp);
> 
> What about dev_pm_opp_set_rate() instead ?
> 
>> +     return ret;
>> +}
> 
> --
> viresh

Tried using it and got below crash. It seems to be coming because we 
don't have clocks property within CPU node for SoC's having BPMP-FW.

  Unable to handle kernel NULL pointer dereference at virtual address 
000000000000002e
  ....
  Call trace:
   clk_round_rate+0x38/0xd8
   dev_pm_opp_set_rate+0xe4/0x1a8
   tegra194_cpufreq_set_target+0x74/0x88
   __cpufreq_driver_target+0x154/0x250
   cpufreq_online+0x7b4/0x9ac

Thanks,
Sumit
Thierry Reding Feb. 27, 2023, 12:44 p.m. UTC | #3
On Thu, Feb 23, 2023 at 03:06:26PM +0530, Sumit Gupta wrote:
> 
> 
> On 22/02/23 09:33, Viresh Kumar wrote:
> > External email: Use caution opening links or attachments
> > 
> > 
> > On 20-02-23, 19:35, Sumit Gupta wrote:
> > > +static int tegra_cpufreq_set_bw(struct cpufreq_policy *policy, unsigned long freq_khz)
> > > +{
> > > +     struct dev_pm_opp *opp;
> > > +     struct device *dev;
> > > +     int ret;
> > > +
> > > +     dev = get_cpu_device(policy->cpu);
> > > +     if (!dev)
> > > +             return -ENODEV;
> > > +
> > > +     opp = dev_pm_opp_find_freq_exact(dev, freq_khz * KHZ, true);
> > > +     if (IS_ERR(opp))
> > > +             return PTR_ERR(opp);
> > > +
> > > +     ret = dev_pm_opp_set_opp(dev, opp);
> > > +     dev_pm_opp_put(opp);
> > 
> > What about dev_pm_opp_set_rate() instead ?
> > 
> > > +     return ret;
> > > +}
> > 
> > --
> > viresh
> 
> Tried using it and got below crash. It seems to be coming because we don't
> have clocks property within CPU node for SoC's having BPMP-FW.
> 
>  Unable to handle kernel NULL pointer dereference at virtual address
> 000000000000002e
>  ....
>  Call trace:
>   clk_round_rate+0x38/0xd8
>   dev_pm_opp_set_rate+0xe4/0x1a8
>   tegra194_cpufreq_set_target+0x74/0x88
>   __cpufreq_driver_target+0x154/0x250
>   cpufreq_online+0x7b4/0x9ac

Can you try to find out what exactly is happening here? The clock
framework should be able to deal with NULL clock pointers just fine.
Although, looking at the OPP table code, it seems like we don't use
clk_get_optional(), so opp_table->clk may end up being a pointer-
encoded error. Perhaps we need something like this:

--- >8 ---
diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index e87567dbe99f..d7baeb6ac697 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -1397,6 +1397,7 @@ static struct opp_table *_update_opp_table_clk(struct device *dev,
 		 * frequency in opp->rates and also parse the entries in DT.
 		 */
 		opp_table->clk_count = 1;
+		opp_table->clk = NULL;
 
 		dev_dbg(dev, "%s: Couldn't find clock: %d\n", __func__, ret);
 		return opp_table;
--- >8 ---

Thierry
Viresh Kumar Feb. 28, 2023, 1:18 a.m. UTC | #4
On 27-02-23, 13:44, Thierry Reding wrote:
> On Thu, Feb 23, 2023 at 03:06:26PM +0530, Sumit Gupta wrote:
> > On 22/02/23 09:33, Viresh Kumar wrote:
> > Tried using it and got below crash. It seems to be coming because we don't
> > have clocks property within CPU node for SoC's having BPMP-FW.
> > 
> >  Unable to handle kernel NULL pointer dereference at virtual address
> > 000000000000002e
> >  ....
> >  Call trace:
> >   clk_round_rate+0x38/0xd8
> >   dev_pm_opp_set_rate+0xe4/0x1a8
> >   tegra194_cpufreq_set_target+0x74/0x88
> >   __cpufreq_driver_target+0x154/0x250
> >   cpufreq_online+0x7b4/0x9ac
> 
> Can you try to find out what exactly is happening here? The clock
> framework should be able to deal with NULL clock pointers just fine.
> Although, looking at the OPP table code, it seems like we don't use
> clk_get_optional(), so opp_table->clk may end up being a pointer-
> encoded error. Perhaps we need something like this:
> 
> --- >8 ---
> diff --git a/drivers/opp/core.c b/drivers/opp/core.c
> index e87567dbe99f..d7baeb6ac697 100644
> --- a/drivers/opp/core.c
> +++ b/drivers/opp/core.c
> @@ -1397,6 +1397,7 @@ static struct opp_table *_update_opp_table_clk(struct device *dev,
>  		 * frequency in opp->rates and also parse the entries in DT.
>  		 */
>  		opp_table->clk_count = 1;
> +		opp_table->clk = NULL;
>  
>  		dev_dbg(dev, "%s: Couldn't find clock: %d\n", __func__, ret);
>  		return opp_table;

I didn't reply earlier as I had nothing more to say and Sumit's
initial approach was correct. Maybe I should have I have clarified
this then.

The OPP core supports dev_pm_opp_set_rate() only for devices that can
set the rate, for everything else dev_pm_opp_set_opp() is the right
choice. I suggested dev_pm_opp_set_rate() earlier as I thought rate is
supported here.
diff mbox series

Patch

diff --git a/drivers/cpufreq/tegra194-cpufreq.c b/drivers/cpufreq/tegra194-cpufreq.c
index 5890e25d7f77..cda1ab12cdde 100644
--- a/drivers/cpufreq/tegra194-cpufreq.c
+++ b/drivers/cpufreq/tegra194-cpufreq.c
@@ -12,6 +12,7 @@ 
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
+#include <linux/units.h>
 
 #include <asm/smp_plat.h>
 
@@ -65,12 +66,32 @@  struct tegra_cpufreq_soc {
 
 struct tegra194_cpufreq_data {
 	void __iomem *regs;
-	struct cpufreq_frequency_table **tables;
+	struct cpufreq_frequency_table **bpmp_luts;
 	const struct tegra_cpufreq_soc *soc;
+	bool icc_dram_bw_scaling;
 };
 
 static struct workqueue_struct *read_counters_wq;
 
+static int tegra_cpufreq_set_bw(struct cpufreq_policy *policy, unsigned long freq_khz)
+{
+	struct dev_pm_opp *opp;
+	struct device *dev;
+	int ret;
+
+	dev = get_cpu_device(policy->cpu);
+	if (!dev)
+		return -ENODEV;
+
+	opp = dev_pm_opp_find_freq_exact(dev, freq_khz * KHZ, true);
+	if (IS_ERR(opp))
+		return PTR_ERR(opp);
+
+	ret = dev_pm_opp_set_opp(dev, opp);
+	dev_pm_opp_put(opp);
+	return ret;
+}
+
 static void tegra_get_cpu_mpidr(void *mpidr)
 {
 	*((u64 *)mpidr) = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
@@ -354,7 +375,7 @@  static unsigned int tegra194_get_speed(u32 cpu)
 	 * to the last written ndiv value from freq_table. This is
 	 * done to return consistent value.
 	 */
-	cpufreq_for_each_valid_entry(pos, data->tables[clusterid]) {
+	cpufreq_for_each_valid_entry(pos, data->bpmp_luts[clusterid]) {
 		if (pos->driver_data != ndiv)
 			continue;
 
@@ -369,16 +390,93 @@  static unsigned int tegra194_get_speed(u32 cpu)
 	return rate;
 }
 
+int tegra_cpufreq_init_cpufreq_table(struct cpufreq_policy *policy,
+				     struct cpufreq_frequency_table *bpmp_lut,
+				     struct cpufreq_frequency_table **opp_table)
+{
+	struct tegra194_cpufreq_data *data = cpufreq_get_driver_data();
+	struct cpufreq_frequency_table *freq_table = NULL;
+	struct cpufreq_frequency_table *pos;
+	struct device *cpu_dev;
+	struct dev_pm_opp *opp;
+	unsigned long rate;
+	int ret, max_opps;
+	int j = 0;
+
+	cpu_dev = get_cpu_device(policy->cpu);
+	if (!cpu_dev) {
+		pr_err("%s: failed to get cpu%d device\n", __func__, policy->cpu);
+		return -ENODEV;
+	}
+
+	/* Initialize OPP table mentioned in operating-points-v2 property in DT */
+	ret = dev_pm_opp_of_add_table_indexed(cpu_dev, 0);
+	if (!ret) {
+		max_opps = dev_pm_opp_get_opp_count(cpu_dev);
+		if (max_opps <= 0) {
+			dev_err(cpu_dev, "Failed to add OPPs\n");
+			return max_opps;
+		}
+
+		/* Disable all opps and cross-validate against LUT later */
+		for (rate = 0; ; rate++) {
+			opp = dev_pm_opp_find_freq_ceil(cpu_dev, &rate);
+			if (IS_ERR(opp))
+				break;
+
+			dev_pm_opp_put(opp);
+			dev_pm_opp_disable(cpu_dev, rate);
+		}
+	} else {
+		dev_err(cpu_dev, "Invalid or empty opp table in device tree\n");
+		data->icc_dram_bw_scaling = false;
+		return ret;
+	}
+
+	freq_table = kcalloc((max_opps + 1), sizeof(*freq_table), GFP_KERNEL);
+	if (!freq_table)
+		return -ENOMEM;
+
+	/*
+	 * Cross check the frequencies from BPMP-FW LUT against the OPP's present in DT.
+	 * Enable only those DT OPP's which are present in LUT also.
+	 */
+	cpufreq_for_each_valid_entry(pos, bpmp_lut) {
+		opp = dev_pm_opp_find_freq_exact(cpu_dev, pos->frequency * KHZ, false);
+		if (IS_ERR(opp))
+			continue;
+
+		ret = dev_pm_opp_enable(cpu_dev, pos->frequency * KHZ);
+		if (ret < 0)
+			return ret;
+
+		freq_table[j].driver_data = pos->driver_data;
+		freq_table[j].frequency = pos->frequency;
+		j++;
+	}
+
+	freq_table[j].driver_data = pos->driver_data;
+	freq_table[j].frequency = CPUFREQ_TABLE_END;
+
+	*opp_table = &freq_table[0];
+
+	dev_pm_opp_set_sharing_cpus(cpu_dev, policy->cpus);
+
+	return ret;
+}
+
 static int tegra194_cpufreq_init(struct cpufreq_policy *policy)
 {
 	struct tegra194_cpufreq_data *data = cpufreq_get_driver_data();
 	int maxcpus_per_cluster = data->soc->maxcpus_per_cluster;
+	struct cpufreq_frequency_table *freq_table;
+	struct cpufreq_frequency_table *bpmp_lut;
 	u32 start_cpu, cpu;
 	u32 clusterid;
+	int ret;
 
 	data->soc->ops->get_cpu_cluster_id(policy->cpu, NULL, &clusterid);
-
-	if (clusterid >= data->soc->num_clusters || !data->tables[clusterid])
+	if (clusterid >= data->soc->num_clusters || !data->bpmp_luts[clusterid])
 		return -EINVAL;
 
 	start_cpu = rounddown(policy->cpu, maxcpus_per_cluster);
@@ -387,9 +485,22 @@  static int tegra194_cpufreq_init(struct cpufreq_policy *policy)
 		if (cpu_possible(cpu))
 			cpumask_set_cpu(cpu, policy->cpus);
 	}
-	policy->freq_table = data->tables[clusterid];
 	policy->cpuinfo.transition_latency = TEGRA_CPUFREQ_TRANSITION_LATENCY;
 
+	bpmp_lut = data->bpmp_luts[clusterid];
+
+	if (data->icc_dram_bw_scaling) {
+		ret = tegra_cpufreq_init_cpufreq_table(policy, bpmp_lut, &freq_table);
+		if (!ret) {
+			policy->freq_table = freq_table;
+			return 0;
+		}
+	}
+
+	data->icc_dram_bw_scaling = false;
+	policy->freq_table = bpmp_lut;
+	pr_info("OPP tables missing from DT, EMC frequency scaling disabled\n");
+
 	return 0;
 }
 
@@ -406,6 +517,9 @@  static int tegra194_cpufreq_set_target(struct cpufreq_policy *policy,
 	 */
 	data->soc->ops->set_cpu_ndiv(policy, (u64)tbl->driver_data);
 
+	if (data->icc_dram_bw_scaling)
+		tegra_cpufreq_set_bw(policy, tbl->frequency);
+
 	return 0;
 }
 
@@ -439,8 +553,8 @@  static void tegra194_cpufreq_free_resources(void)
 }
 
 static struct cpufreq_frequency_table *
-init_freq_table(struct platform_device *pdev, struct tegra_bpmp *bpmp,
-		unsigned int cluster_id)
+tegra_cpufreq_bpmp_read_lut(struct platform_device *pdev, struct tegra_bpmp *bpmp,
+			    unsigned int cluster_id)
 {
 	struct cpufreq_frequency_table *freq_table;
 	struct mrq_cpu_ndiv_limits_response resp;
@@ -515,6 +629,7 @@  static int tegra194_cpufreq_probe(struct platform_device *pdev)
 	const struct tegra_cpufreq_soc *soc;
 	struct tegra194_cpufreq_data *data;
 	struct tegra_bpmp *bpmp;
+	struct device *cpu_dev;
 	int err, i;
 
 	data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
@@ -530,9 +645,9 @@  static int tegra194_cpufreq_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	data->tables = devm_kcalloc(&pdev->dev, data->soc->num_clusters,
-				    sizeof(*data->tables), GFP_KERNEL);
-	if (!data->tables)
+	data->bpmp_luts = devm_kcalloc(&pdev->dev, data->soc->num_clusters,
+				       sizeof(*data->bpmp_luts), GFP_KERNEL);
+	if (!data->bpmp_luts)
 		return -ENOMEM;
 
 	if (soc->actmon_cntr_base) {
@@ -556,15 +671,26 @@  static int tegra194_cpufreq_probe(struct platform_device *pdev)
 	}
 
 	for (i = 0; i < data->soc->num_clusters; i++) {
-		data->tables[i] = init_freq_table(pdev, bpmp, i);
-		if (IS_ERR(data->tables[i])) {
-			err = PTR_ERR(data->tables[i]);
+		data->bpmp_luts[i] = tegra_cpufreq_bpmp_read_lut(pdev, bpmp, i);
+		if (IS_ERR(data->bpmp_luts[i])) {
+			err = PTR_ERR(data->bpmp_luts[i]);
 			goto err_free_res;
 		}
 	}
 
 	tegra194_cpufreq_driver.driver_data = data;
 
+	/* Check for optional OPPv2 and interconnect paths on CPU0 to enable ICC scaling */
+	cpu_dev = get_cpu_device(0);
+	if (!cpu_dev)
+		return -EPROBE_DEFER;
+
+	if (dev_pm_opp_of_get_opp_desc_node(cpu_dev)) {
+		err = dev_pm_opp_of_find_icc_paths(cpu_dev, NULL);
+		if (!err)
+			data->icc_dram_bw_scaling = true;
+	}
+
 	err = cpufreq_register_driver(&tegra194_cpufreq_driver);
 	if (!err)
 		goto put_bpmp;