diff mbox series

[v4,5/6] PM / devfreq: imx8m-ddrc: Measure bandwidth with perf

Message ID a25094eac4c0f740e0e33c04af699b39a4226a08.1573252696.git.leonard.crestez@nxp.com (mailing list archive)
State Superseded
Headers show
Series PM / devfreq: Add dynamic scaling for imx8m ddr controller | expand

Commit Message

Leonard Crestez Nov. 8, 2019, 10:39 p.m. UTC
The imx8m ddrc has a performance monitoring block attached which can
be used to measure bandwidth usage and automatically adjust frequency.

There is already a perf driver for that block so instead of implementing
a devfreq events driver use the in-kernel perf API to implement
get_dev_status directly.

Signed-off-by: Leonard Crestez <leonard.crestez@nxp.com>
---
 drivers/devfreq/imx8m-ddrc.c | 153 +++++++++++++++++++++++++++++++++++
 1 file changed, 153 insertions(+)

Comments

Chanwoo Choi Nov. 11, 2019, 5:18 a.m. UTC | #1
Hi Leonard,

On 11/9/19 7:39 AM, Leonard Crestez wrote:
> The imx8m ddrc has a performance monitoring block attached which can
> be used to measure bandwidth usage and automatically adjust frequency.
> 
> There is already a perf driver for that block so instead of implementing
> a devfreq events driver use the in-kernel perf API to implement
> get_dev_status directly.
> 
> Signed-off-by: Leonard Crestez <leonard.crestez@nxp.com>
> ---
>  drivers/devfreq/imx8m-ddrc.c | 153 +++++++++++++++++++++++++++++++++++
>  1 file changed, 153 insertions(+)
> 
> diff --git a/drivers/devfreq/imx8m-ddrc.c b/drivers/devfreq/imx8m-ddrc.c
> index 51903fee21a7..6372191f72d7 100644
> --- a/drivers/devfreq/imx8m-ddrc.c
> +++ b/drivers/devfreq/imx8m-ddrc.c
> @@ -11,10 +11,13 @@
>  #include <linux/pm_opp.h>
>  #include <linux/clk.h>
>  #include <linux/clk-provider.h>
>  #include <linux/arm-smccc.h>
>  
> +#include <asm/perf_event.h>
> +#include <linux/perf_event.h>
> +
>  #define IMX_SIP_DDR_DVFS			0xc2000004
>  
>  /* Values starting from 0 switch to specific frequency */
>  #define IMX_SIP_DDR_FREQ_SET_HIGH		0x00
>  
> @@ -78,10 +81,22 @@ struct imx8m_ddrc {
>  	struct clk *dram_alt;
>  	struct clk *dram_apb;
>  
>  	int freq_count;
>  	struct imx8m_ddrc_freq freq_table[IMX8M_DDRC_MAX_FREQ_COUNT];
> +
> +	/* For measuring load with perf events: */
> +	struct platform_device *pmu_pdev;
> +	struct pmu *pmu;
> +
> +	struct perf_event_attr rd_event_attr;
> +	struct perf_event_attr wr_event_attr;
> +	struct perf_event *rd_event;
> +	struct perf_event *wr_event;
> +
> +	u64 last_rd_val, last_rd_ena, last_rd_run;
> +	u64 last_wr_val, last_wr_ena, last_wr_run;
>  };
>  
>  static struct imx8m_ddrc_freq *imx8m_ddrc_find_freq(struct imx8m_ddrc *priv,
>  						    unsigned long rate)
>  {
> @@ -245,10 +260,131 @@ static int imx8m_ddrc_get_cur_freq(struct device *dev, unsigned long *freq)
>  	*freq = clk_get_rate(priv->dram_core);
>  
>  	return 0;
>  }
>  
> +static int imx8m_ddrc_get_dev_status(struct device *dev,
> +				     struct devfreq_dev_status *stat)
> +{
> +	struct imx8m_ddrc *priv = dev_get_drvdata(dev);
> +
> +	stat->current_frequency = clk_get_rate(priv->dram_core);
> +
> +	if (priv->rd_event && priv->wr_event) {
> +		u64 rd_delta, rd_val, rd_ena, rd_run;
> +		u64 wr_delta, wr_val, wr_ena, wr_run;
> +
> +		rd_val = perf_event_read_value(priv->rd_event,
> +					       &rd_ena, &rd_run);
> +		wr_val = perf_event_read_value(priv->wr_event,
> +					       &wr_ena, &wr_run);
> +
> +		rd_delta = (rd_val - priv->last_rd_val) *
> +			   (rd_ena - priv->last_rd_ena);
> +		do_div(rd_delta, rd_run - priv->last_rd_run);
> +		priv->last_rd_val = rd_val;
> +		priv->last_rd_ena = rd_ena;
> +		priv->last_rd_run = rd_run;
> +
> +		wr_delta = (wr_val - priv->last_wr_val) *
> +			   (wr_ena - priv->last_wr_ena);
> +		do_div(wr_delta, wr_run - priv->last_wr_run);
> +		priv->last_wr_val = wr_val;
> +		priv->last_wr_ena = wr_ena;
> +		priv->last_wr_run = wr_run;
> +
> +		/* magic numbers, possibly wrong */
> +		stat->busy_time = 4 * (rd_delta + wr_delta);
> +		stat->total_time = stat->current_frequency;
> +	} else {
> +		stat->busy_time = 0;
> +		stat->total_time = 0;
> +	}
> +
> +	return 0;
> +}
> +
> +static int imx8m_ddrc_perf_disable(struct imx8m_ddrc *priv)
> +{
> +	/* release and set to NULL */
> +	if (!IS_ERR_OR_NULL(priv->rd_event))
> +		perf_event_release_kernel(priv->rd_event);
> +	if (!IS_ERR_OR_NULL(priv->wr_event))
> +		perf_event_release_kernel(priv->wr_event);
> +	priv->rd_event = NULL;
> +	priv->wr_event = NULL;
> +
> +	return 0;
> +}
> +
> +static int imx8m_ddrc_perf_enable(struct imx8m_ddrc *priv)

Actually, this function have to call the just function for enabling
the bandwidth monitoring instead of writing the detailed something.
It looks like that you move the part of the different device driver into here.

> +{
> +	int ret;
> +
> +	priv->rd_event_attr.size = sizeof(priv->rd_event_attr);
> +	priv->rd_event_attr.type = priv->pmu->type;
> +	priv->rd_event_attr.config = 0x2a;
> +
> +	priv->rd_event = perf_event_create_kernel_counter(
> +			&priv->rd_event_attr, 0, NULL, NULL, NULL);
> +	if (IS_ERR(priv->rd_event)) {
> +		ret = PTR_ERR(priv->rd_event);
> +		goto err;
> +	}
> +
> +	priv->wr_event_attr.size = sizeof(priv->wr_event_attr);
> +	priv->wr_event_attr.type = priv->pmu->type;
> +	priv->wr_event_attr.config = 0x2b;
> +
> +	priv->wr_event = perf_event_create_kernel_counter(
> +			&priv->wr_event_attr, 0, NULL, NULL, NULL);
> +	if (IS_ERR(priv->wr_event)) {
> +		ret = PTR_ERR(priv->wr_event);
> +		goto err;
> +	}
> +
> +	return 0;
> +
> +err:
> +	imx8m_ddrc_perf_disable(priv);
> +	return ret;
> +}
> +
> +static int imx8m_ddrc_init_events(struct device *dev,
> +				  struct device_node *events_node)

ditto.

> +{
> +	struct imx8m_ddrc *priv = dev_get_drvdata(dev);
> +	struct device_driver *driver;
> +
> +	/*
> +	 * We need pmu->type for perf_event_attr but there is no API for
> +	 * mapping device_node to pmu. Fetch private data for imx-ddr-pmu and
> +	 * cast that to a struct pmu instead.
> +	 */
> +	priv->pmu_pdev = of_find_device_by_node(events_node);
> +	if (!priv->pmu_pdev)
> +		return -EPROBE_DEFER;
> +	driver = priv->pmu_pdev->dev.driver;
> +	if (!driver)
> +		return -EPROBE_DEFER;
> +	if (strcmp(driver->name, "imx-ddr-pmu")) {
> +		dev_warn(dev, "devfreq-events node %pOF has unexpected driver %s\n",
> +				events_node, driver->name);
> +		return -ENODEV;
> +	}
> +
> +	priv->pmu = platform_get_drvdata(priv->pmu_pdev);

It seems that you access the different device driver without
any standard interface from some linux kernel subsystem.

For the communication or control between different device drivers,
we have to use the standard interface instead of direct access or call.
I think that it make it too tightly coupled driver between them.

So, I developed the devfreq-event subsystem for this reason
in order to remove the non-standard direct access to other device driver.

Unfortunately, I can't agree this approach. I don't prefer to use
the direct access of other device driver(imx-ddr-pmu). You need to
use standard interface provided from subsystem. or You need to make
the new device driver with devfreq-event subsystem.



> +	if (!priv->pmu) {
> +		dev_err(dev, "devfreq-events device missing private data\n");
> +		return -EINVAL;
> +	}
> +
> +	dev_dbg(dev, "events from pmu %s\n", priv->pmu->name);
> +
> +	return imx8m_ddrc_perf_enable(priv);
> +}
> +
>  static int imx8m_ddrc_init_freq_info(struct device *dev)
>  {
>  	struct imx8m_ddrc *priv = dev_get_drvdata(dev);
>  	struct arm_smccc_res res;
>  	int index;
> @@ -328,17 +464,23 @@ static int imx8m_ddrc_check_opps(struct device *dev)
>  	return 0;
>  }
>  
>  static void imx8m_ddrc_exit(struct device *dev)
>  {
> +	struct imx8m_ddrc *priv = dev_get_drvdata(dev);
> +
> +	imx8m_ddrc_perf_disable(priv);
> +	platform_device_put(priv->pmu_pdev);
> +
>  	dev_pm_opp_of_remove_table(dev);
>  }
>  
>  static int imx8m_ddrc_probe(struct platform_device *pdev)
>  {
>  	struct device *dev = &pdev->dev;
>  	struct imx8m_ddrc *priv;
> +	struct device_node *events_node;
>  	const char *gov = DEVFREQ_GOV_USERSPACE;
>  	int ret;
>  
>  	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
>  	if (!priv)
> @@ -350,10 +492,19 @@ static int imx8m_ddrc_probe(struct platform_device *pdev)
>  	if (ret) {
>  		dev_err(dev, "failed to init firmware freq info: %d\n", ret);
>  		return ret;
>  	}
>  
> +	events_node = of_parse_phandle(dev->of_node, "devfreq-events", 0);
> +	if (events_node) {
> +		ret = imx8m_ddrc_init_events(dev, events_node);
> +		of_node_put(events_node);
> +		if (ret)
> +			goto err;
> +		gov = DEVFREQ_GOV_SIMPLE_ONDEMAND;
> +	}
> +
>  	priv->dram_core = devm_clk_get(dev, "core");
>  	priv->dram_pll = devm_clk_get(dev, "pll");
>  	priv->dram_alt = devm_clk_get(dev, "alt");
>  	priv->dram_apb = devm_clk_get(dev, "apb");
>  	if (IS_ERR(priv->dram_core) ||
> @@ -390,10 +541,12 @@ static int imx8m_ddrc_probe(struct platform_device *pdev)
>  	}
>  
>  	return 0;
>  
>  err:
> +	imx8m_ddrc_perf_disable(priv);
> +	platform_device_put(priv->pmu_pdev);
>  	dev_pm_opp_of_remove_table(dev);
>  	return ret;
>  }
>  
>  static const struct of_device_id imx8m_ddrc_of_match[] = {
>
Leonard Crestez Nov. 12, 2019, 1:17 p.m. UTC | #2
On 11.11.2019 07:13, Chanwoo Choi wrote:
> Hi Leonard,
> 
> On 11/9/19 7:39 AM, Leonard Crestez wrote:
>> The imx8m ddrc has a performance monitoring block attached which can
>> be used to measure bandwidth usage and automatically adjust frequency.
>>
>> There is already a perf driver for that block so instead of implementing
>> a devfreq events driver use the in-kernel perf API to implement
>> get_dev_status directly.
>>
>> Signed-off-by: Leonard Crestez <leonard.crestez@nxp.com>
>> ---
>>   drivers/devfreq/imx8m-ddrc.c | 153 +++++++++++++++++++++++++++++++++++
>>   1 file changed, 153 insertions(+)
>>
>> diff --git a/drivers/devfreq/imx8m-ddrc.c b/drivers/devfreq/imx8m-ddrc.c
>> index 51903fee21a7..6372191f72d7 100644
>> --- a/drivers/devfreq/imx8m-ddrc.c
>> +++ b/drivers/devfreq/imx8m-ddrc.c
>> @@ -11,10 +11,13 @@
>>   #include <linux/pm_opp.h>
>>   #include <linux/clk.h>
>>   #include <linux/clk-provider.h>
>>   #include <linux/arm-smccc.h>
>>   
>> +#include <asm/perf_event.h>
>> +#include <linux/perf_event.h>
>> +
>>   #define IMX_SIP_DDR_DVFS			0xc2000004
>>   
>>   /* Values starting from 0 switch to specific frequency */
>>   #define IMX_SIP_DDR_FREQ_SET_HIGH		0x00
>>   
>> @@ -78,10 +81,22 @@ struct imx8m_ddrc {
>>   	struct clk *dram_alt;
>>   	struct clk *dram_apb;
>>   
>>   	int freq_count;
>>   	struct imx8m_ddrc_freq freq_table[IMX8M_DDRC_MAX_FREQ_COUNT];
>> +
>> +	/* For measuring load with perf events: */
>> +	struct platform_device *pmu_pdev;
>> +	struct pmu *pmu;
>> +
>> +	struct perf_event_attr rd_event_attr;
>> +	struct perf_event_attr wr_event_attr;
>> +	struct perf_event *rd_event;
>> +	struct perf_event *wr_event;
>> +
>> +	u64 last_rd_val, last_rd_ena, last_rd_run;
>> +	u64 last_wr_val, last_wr_ena, last_wr_run;
>>   };
>>   
>>   static struct imx8m_ddrc_freq *imx8m_ddrc_find_freq(struct imx8m_ddrc *priv,
>>   						    unsigned long rate)
>>   {
>> @@ -245,10 +260,131 @@ static int imx8m_ddrc_get_cur_freq(struct device *dev, unsigned long *freq)
>>   	*freq = clk_get_rate(priv->dram_core);
>>   
>>   	return 0;
>>   }
>>   
>> +static int imx8m_ddrc_get_dev_status(struct device *dev,
>> +				     struct devfreq_dev_status *stat)
>> +{
>> +	struct imx8m_ddrc *priv = dev_get_drvdata(dev);
>> +
>> +	stat->current_frequency = clk_get_rate(priv->dram_core);
>> +
>> +	if (priv->rd_event && priv->wr_event) {
>> +		u64 rd_delta, rd_val, rd_ena, rd_run;
>> +		u64 wr_delta, wr_val, wr_ena, wr_run;
>> +
>> +		rd_val = perf_event_read_value(priv->rd_event,
>> +					       &rd_ena, &rd_run);
>> +		wr_val = perf_event_read_value(priv->wr_event,
>> +					       &wr_ena, &wr_run);
>> +
>> +		rd_delta = (rd_val - priv->last_rd_val) *
>> +			   (rd_ena - priv->last_rd_ena);
>> +		do_div(rd_delta, rd_run - priv->last_rd_run);
>> +		priv->last_rd_val = rd_val;
>> +		priv->last_rd_ena = rd_ena;
>> +		priv->last_rd_run = rd_run;
>> +
>> +		wr_delta = (wr_val - priv->last_wr_val) *
>> +			   (wr_ena - priv->last_wr_ena);
>> +		do_div(wr_delta, wr_run - priv->last_wr_run);
>> +		priv->last_wr_val = wr_val;
>> +		priv->last_wr_ena = wr_ena;
>> +		priv->last_wr_run = wr_run;
>> +
>> +		/* magic numbers, possibly wrong */
>> +		stat->busy_time = 4 * (rd_delta + wr_delta);
>> +		stat->total_time = stat->current_frequency;
>> +	} else {
>> +		stat->busy_time = 0;
>> +		stat->total_time = 0;
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +static int imx8m_ddrc_perf_disable(struct imx8m_ddrc *priv)
>> +{
>> +	/* release and set to NULL */
>> +	if (!IS_ERR_OR_NULL(priv->rd_event))
>> +		perf_event_release_kernel(priv->rd_event);
>> +	if (!IS_ERR_OR_NULL(priv->wr_event))
>> +		perf_event_release_kernel(priv->wr_event);
>> +	priv->rd_event = NULL;
>> +	priv->wr_event = NULL;
>> +
>> +	return 0;
>> +}
>> +
>> +static int imx8m_ddrc_perf_enable(struct imx8m_ddrc *priv)
> 
> Actually, this function have to call the just function for enabling
> the bandwidth monitoring instead of writing the detailed something.
> It looks like that you move the part of the different device driver into here.

This is the code for enabling bandwith monitoring: it creates perf 
counters using in-kernel API. The perf api doesn't seem to expose 
anything else to enable/disable the counter after creation and honestly 
just create/destroy seems fine.

As far as I can tell bandwidth monitoring in devfreq is always enabled 
on probe anyway, no matter which governor is in use. It would be nice if 
devfreq drivers could receive a callback and dynamically acquire/release 
monitoring resources only when the ondemand governor is in used.

Until then this driver will permanently allocate 2 (out of 3) counters 
in ddr pmu hardware.

>> +{
>> +	int ret;
>> +
>> +	priv->rd_event_attr.size = sizeof(priv->rd_event_attr);
>> +	priv->rd_event_attr.type = priv->pmu->type;
>> +	priv->rd_event_attr.config = 0x2a;
>> +
>> +	priv->rd_event = perf_event_create_kernel_counter(
>> +			&priv->rd_event_attr, 0, NULL, NULL, NULL);
>> +	if (IS_ERR(priv->rd_event)) {
>> +		ret = PTR_ERR(priv->rd_event);
>> +		goto err;
>> +	}
>> +
>> +	priv->wr_event_attr.size = sizeof(priv->wr_event_attr);
>> +	priv->wr_event_attr.type = priv->pmu->type;
>> +	priv->wr_event_attr.config = 0x2b;
>> +
>> +	priv->wr_event = perf_event_create_kernel_counter(
>> +			&priv->wr_event_attr, 0, NULL, NULL, NULL);
>> +	if (IS_ERR(priv->wr_event)) {
>> +		ret = PTR_ERR(priv->wr_event);
>> +		goto err;
>> +	}
>> +
>> +	return 0;
>> +
>> +err:
>> +	imx8m_ddrc_perf_disable(priv);
>> +	return ret;
>> +}
>> +
>> +static int imx8m_ddrc_init_events(struct device *dev,
>> +				  struct device_node *events_node)
> 
> ditto.
> 
>> +{
>> +	struct imx8m_ddrc *priv = dev_get_drvdata(dev);
>> +	struct device_driver *driver;
>> +
>> +	/*
>> +	 * We need pmu->type for perf_event_attr but there is no API for
>> +	 * mapping device_node to pmu. Fetch private data for imx-ddr-pmu and
>> +	 * cast that to a struct pmu instead.
>> +	 */
>> +	priv->pmu_pdev = of_find_device_by_node(events_node);
>> +	if (!priv->pmu_pdev)
>> +		return -EPROBE_DEFER;
>> +	driver = priv->pmu_pdev->dev.driver;
>> +	if (!driver)
>> +		return -EPROBE_DEFER;
>> +	if (strcmp(driver->name, "imx-ddr-pmu")) {
>> +		dev_warn(dev, "devfreq-events node %pOF has unexpected driver %s\n",
>> +				events_node, driver->name);
>> +		return -ENODEV;
>> +	}
>> +
>> +	priv->pmu = platform_get_drvdata(priv->pmu_pdev);
> 
> It seems that you access the different device driver without
> any standard interface from some linux kernel subsystem.
> 
> For the communication or control between different device drivers,
> we have to use the standard interface instead of direct access or call.
> I think that it make it too tightly coupled driver between them.
> 
> So, I developed the devfreq-event subsystem for this reason
> in order to remove the non-standard direct access to other device driver.
> 
> Unfortunately, I can't agree this approach. I don't prefer to use
> the direct access of other device driver(imx-ddr-pmu). You need to
> use standard interface provided from subsystem. or You need to make
> the new device driver with devfreq-event subsystem.

This could be cleaned-up by adding a new API to "fetch struct pmu* by 
struct device_node*" as available for many other subsystems. The perf 
api is otherwise standard/generic and has a few other in-kernel users.

The perf driver for DDR PMU is already functional and useful for 
profiling and reusing it seem very worthwhile. If you're suggesting I 
implemented an unrelated "devfreq-event" driver then how would it get 
probed? There's only one PMU node in DT.

I wouldn't mind to delay the monitoring part into a second series.

>> +	if (!priv->pmu) {
>> +		dev_err(dev, "devfreq-events device missing private data\n");
>> +		return -EINVAL;
>> +	}
>> +
>> +	dev_dbg(dev, "events from pmu %s\n", priv->pmu->name);
>> +
>> +	return imx8m_ddrc_perf_enable(priv);
>> +}
>> +
>>   static int imx8m_ddrc_init_freq_info(struct device *dev)
>>   {
>>   	struct imx8m_ddrc *priv = dev_get_drvdata(dev);
>>   	struct arm_smccc_res res;
>>   	int index;
>> @@ -328,17 +464,23 @@ static int imx8m_ddrc_check_opps(struct device *dev)
>>   	return 0;
>>   }
>>   
>>   static void imx8m_ddrc_exit(struct device *dev)
>>   {
>> +	struct imx8m_ddrc *priv = dev_get_drvdata(dev);
>> +
>> +	imx8m_ddrc_perf_disable(priv);
>> +	platform_device_put(priv->pmu_pdev);
>> +
>>   	dev_pm_opp_of_remove_table(dev);
>>   }
>>   
>>   static int imx8m_ddrc_probe(struct platform_device *pdev)
>>   {
>>   	struct device *dev = &pdev->dev;
>>   	struct imx8m_ddrc *priv;
>> +	struct device_node *events_node;
>>   	const char *gov = DEVFREQ_GOV_USERSPACE;
>>   	int ret;
>>   
>>   	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
>>   	if (!priv)
>> @@ -350,10 +492,19 @@ static int imx8m_ddrc_probe(struct platform_device *pdev)
>>   	if (ret) {
>>   		dev_err(dev, "failed to init firmware freq info: %d\n", ret);
>>   		return ret;
>>   	}
>>   
>> +	events_node = of_parse_phandle(dev->of_node, "devfreq-events", 0);
>> +	if (events_node) {
>> +		ret = imx8m_ddrc_init_events(dev, events_node);
>> +		of_node_put(events_node);
>> +		if (ret)
>> +			goto err;
>> +		gov = DEVFREQ_GOV_SIMPLE_ONDEMAND;
>> +	}
>> +
>>   	priv->dram_core = devm_clk_get(dev, "core");
>>   	priv->dram_pll = devm_clk_get(dev, "pll");
>>   	priv->dram_alt = devm_clk_get(dev, "alt");
>>   	priv->dram_apb = devm_clk_get(dev, "apb");
>>   	if (IS_ERR(priv->dram_core) ||
>> @@ -390,10 +541,12 @@ static int imx8m_ddrc_probe(struct platform_device *pdev)
>>   	}
>>   
>>   	return 0;
>>   
>>   err:
>> +	imx8m_ddrc_perf_disable(priv);
>> +	platform_device_put(priv->pmu_pdev);
>>   	dev_pm_opp_of_remove_table(dev);
>>   	return ret;
>>   }
>>   
>>   static const struct of_device_id imx8m_ddrc_of_match[] = {
Chanwoo Choi Nov. 13, 2019, 1:43 a.m. UTC | #3
Hi Leonard,

On 11/12/19 10:17 PM, Leonard Crestez wrote:
> On 11.11.2019 07:13, Chanwoo Choi wrote:
>> Hi Leonard,
>>
>> On 11/9/19 7:39 AM, Leonard Crestez wrote:
>>> The imx8m ddrc has a performance monitoring block attached which can
>>> be used to measure bandwidth usage and automatically adjust frequency.
>>>
>>> There is already a perf driver for that block so instead of implementing
>>> a devfreq events driver use the in-kernel perf API to implement
>>> get_dev_status directly.
>>>
>>> Signed-off-by: Leonard Crestez <leonard.crestez@nxp.com>
>>> ---
>>>   drivers/devfreq/imx8m-ddrc.c | 153 +++++++++++++++++++++++++++++++++++
>>>   1 file changed, 153 insertions(+)
>>>
>>> diff --git a/drivers/devfreq/imx8m-ddrc.c b/drivers/devfreq/imx8m-ddrc.c
>>> index 51903fee21a7..6372191f72d7 100644
>>> --- a/drivers/devfreq/imx8m-ddrc.c
>>> +++ b/drivers/devfreq/imx8m-ddrc.c
>>> @@ -11,10 +11,13 @@
>>>   #include <linux/pm_opp.h>
>>>   #include <linux/clk.h>
>>>   #include <linux/clk-provider.h>
>>>   #include <linux/arm-smccc.h>
>>>   
>>> +#include <asm/perf_event.h>
>>> +#include <linux/perf_event.h>
>>> +
>>>   #define IMX_SIP_DDR_DVFS			0xc2000004
>>>   
>>>   /* Values starting from 0 switch to specific frequency */
>>>   #define IMX_SIP_DDR_FREQ_SET_HIGH		0x00
>>>   
>>> @@ -78,10 +81,22 @@ struct imx8m_ddrc {
>>>   	struct clk *dram_alt;
>>>   	struct clk *dram_apb;
>>>   
>>>   	int freq_count;
>>>   	struct imx8m_ddrc_freq freq_table[IMX8M_DDRC_MAX_FREQ_COUNT];
>>> +
>>> +	/* For measuring load with perf events: */
>>> +	struct platform_device *pmu_pdev;
>>> +	struct pmu *pmu;
>>> +
>>> +	struct perf_event_attr rd_event_attr;
>>> +	struct perf_event_attr wr_event_attr;
>>> +	struct perf_event *rd_event;
>>> +	struct perf_event *wr_event;
>>> +
>>> +	u64 last_rd_val, last_rd_ena, last_rd_run;
>>> +	u64 last_wr_val, last_wr_ena, last_wr_run;
>>>   };
>>>   
>>>   static struct imx8m_ddrc_freq *imx8m_ddrc_find_freq(struct imx8m_ddrc *priv,
>>>   						    unsigned long rate)
>>>   {
>>> @@ -245,10 +260,131 @@ static int imx8m_ddrc_get_cur_freq(struct device *dev, unsigned long *freq)
>>>   	*freq = clk_get_rate(priv->dram_core);
>>>   
>>>   	return 0;
>>>   }
>>>   
>>> +static int imx8m_ddrc_get_dev_status(struct device *dev,
>>> +				     struct devfreq_dev_status *stat)
>>> +{
>>> +	struct imx8m_ddrc *priv = dev_get_drvdata(dev);
>>> +
>>> +	stat->current_frequency = clk_get_rate(priv->dram_core);
>>> +
>>> +	if (priv->rd_event && priv->wr_event) {
>>> +		u64 rd_delta, rd_val, rd_ena, rd_run;
>>> +		u64 wr_delta, wr_val, wr_ena, wr_run;
>>> +
>>> +		rd_val = perf_event_read_value(priv->rd_event,
>>> +					       &rd_ena, &rd_run);
>>> +		wr_val = perf_event_read_value(priv->wr_event,
>>> +					       &wr_ena, &wr_run);
>>> +
>>> +		rd_delta = (rd_val - priv->last_rd_val) *
>>> +			   (rd_ena - priv->last_rd_ena);
>>> +		do_div(rd_delta, rd_run - priv->last_rd_run);
>>> +		priv->last_rd_val = rd_val;
>>> +		priv->last_rd_ena = rd_ena;
>>> +		priv->last_rd_run = rd_run;
>>> +
>>> +		wr_delta = (wr_val - priv->last_wr_val) *
>>> +			   (wr_ena - priv->last_wr_ena);
>>> +		do_div(wr_delta, wr_run - priv->last_wr_run);
>>> +		priv->last_wr_val = wr_val;
>>> +		priv->last_wr_ena = wr_ena;
>>> +		priv->last_wr_run = wr_run;
>>> +
>>> +		/* magic numbers, possibly wrong */
>>> +		stat->busy_time = 4 * (rd_delta + wr_delta);
>>> +		stat->total_time = stat->current_frequency;
>>> +	} else {
>>> +		stat->busy_time = 0;
>>> +		stat->total_time = 0;
>>> +	}
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static int imx8m_ddrc_perf_disable(struct imx8m_ddrc *priv)
>>> +{
>>> +	/* release and set to NULL */
>>> +	if (!IS_ERR_OR_NULL(priv->rd_event))
>>> +		perf_event_release_kernel(priv->rd_event);
>>> +	if (!IS_ERR_OR_NULL(priv->wr_event))
>>> +		perf_event_release_kernel(priv->wr_event);
>>> +	priv->rd_event = NULL;
>>> +	priv->wr_event = NULL;
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static int imx8m_ddrc_perf_enable(struct imx8m_ddrc *priv)
>>
>> Actually, this function have to call the just function for enabling
>> the bandwidth monitoring instead of writing the detailed something.
>> It looks like that you move the part of the different device driver into here.
> 
> This is the code for enabling bandwith monitoring: it creates perf 
> counters using in-kernel API. The perf api doesn't seem to expose 
> anything else to enable/disable the counter after creation and honestly 
> just create/destroy seems fine.
> 
> As far as I can tell bandwidth monitoring in devfreq is always enabled 
> on probe anyway, no matter which governor is in use. It would be nice if 
> devfreq drivers could receive a callback and dynamically acquire/release 
> monitoring resources only when the ondemand governor is in used.

As you commented, it doesn't matter to keep the enabling state of perf.
Just, I want to say that I think that it is not proper to access
the different device driver directly without any standard subsystem interface.

> 
> Until then this driver will permanently allocate 2 (out of 3) counters 
> in ddr pmu hardware.
> 
>>> +{
>>> +	int ret;
>>> +
>>> +	priv->rd_event_attr.size = sizeof(priv->rd_event_attr);
>>> +	priv->rd_event_attr.type = priv->pmu->type;
>>> +	priv->rd_event_attr.config = 0x2a;
>>> +
>>> +	priv->rd_event = perf_event_create_kernel_counter(
>>> +			&priv->rd_event_attr, 0, NULL, NULL, NULL);
>>> +	if (IS_ERR(priv->rd_event)) {
>>> +		ret = PTR_ERR(priv->rd_event);
>>> +		goto err;
>>> +	}
>>> +
>>> +	priv->wr_event_attr.size = sizeof(priv->wr_event_attr);
>>> +	priv->wr_event_attr.type = priv->pmu->type;
>>> +	priv->wr_event_attr.config = 0x2b;
>>> +
>>> +	priv->wr_event = perf_event_create_kernel_counter(
>>> +			&priv->wr_event_attr, 0, NULL, NULL, NULL);
>>> +	if (IS_ERR(priv->wr_event)) {
>>> +		ret = PTR_ERR(priv->wr_event);
>>> +		goto err;
>>> +	}
>>> +
>>> +	return 0;
>>> +
>>> +err:
>>> +	imx8m_ddrc_perf_disable(priv);
>>> +	return ret;
>>> +}
>>> +
>>> +static int imx8m_ddrc_init_events(struct device *dev,
>>> +				  struct device_node *events_node)
>>
>> ditto.
>>
>>> +{
>>> +	struct imx8m_ddrc *priv = dev_get_drvdata(dev);
>>> +	struct device_driver *driver;
>>> +
>>> +	/*
>>> +	 * We need pmu->type for perf_event_attr but there is no API for
>>> +	 * mapping device_node to pmu. Fetch private data for imx-ddr-pmu and
>>> +	 * cast that to a struct pmu instead.
>>> +	 */
>>> +	priv->pmu_pdev = of_find_device_by_node(events_node);
>>> +	if (!priv->pmu_pdev)
>>> +		return -EPROBE_DEFER;
>>> +	driver = priv->pmu_pdev->dev.driver;
>>> +	if (!driver)
>>> +		return -EPROBE_DEFER;
>>> +	if (strcmp(driver->name, "imx-ddr-pmu")) {
>>> +		dev_warn(dev, "devfreq-events node %pOF has unexpected driver %s\n",
>>> +				events_node, driver->name);
>>> +		return -ENODEV;
>>> +	}
>>> +
>>> +	priv->pmu = platform_get_drvdata(priv->pmu_pdev);
>>
>> It seems that you access the different device driver without
>> any standard interface from some linux kernel subsystem.
>>
>> For the communication or control between different device drivers,
>> we have to use the standard interface instead of direct access or call.
>> I think that it make it too tightly coupled driver between them.
>>
>> So, I developed the devfreq-event subsystem for this reason
>> in order to remove the non-standard direct access to other device driver.
>>
>> Unfortunately, I can't agree this approach. I don't prefer to use
>> the direct access of other device driver(imx-ddr-pmu). You need to
>> use standard interface provided from subsystem. or You need to make
>> the new device driver with devfreq-event subsystem.
> 
> This could be cleaned-up by adding a new API to "fetch struct pmu* by 
> struct device_node*" as available for many other subsystems.

OK. I'll expect the next version with keeping the rule without
any direct access between two device drivers.

 The perf 
> api is otherwise standard/generic and has a few other in-kernel users.

> 
> The perf driver for DDR PMU is already functional and useful for 
> profiling and reusing it seem very worthwhile. If you're suggesting I 
> implemented an unrelated "devfreq-event" driver then how would it get 
> probed? There's only one PMU node in DT.

No, I don't force to develop the new devfreq-event instead of exiting perf driver.
As I said already,I think that it is not proper to access the different device driver
directly without any standard subsystem interface.

The following style access is not safe. Any device driver might change
the platform data of the other device driver. 
	priv->pmu = platform_get_drvdata(priv->pmu_pdev);

If you support this patch with standard subsystem interface, I'm OK.
Just, I don't want to access the data of different device driver.

> 
> I wouldn't mind to delay the monitoring part into a second series.
> 
>>> +	if (!priv->pmu) {
>>> +		dev_err(dev, "devfreq-events device missing private data\n");
>>> +		return -EINVAL;
>>> +	}
>>> +
>>> +	dev_dbg(dev, "events from pmu %s\n", priv->pmu->name);
>>> +
>>> +	return imx8m_ddrc_perf_enable(priv);
>>> +}
>>> +
>>>   static int imx8m_ddrc_init_freq_info(struct device *dev)
>>>   {
>>>   	struct imx8m_ddrc *priv = dev_get_drvdata(dev);
>>>   	struct arm_smccc_res res;
>>>   	int index;
>>> @@ -328,17 +464,23 @@ static int imx8m_ddrc_check_opps(struct device *dev)
>>>   	return 0;
>>>   }
>>>   
>>>   static void imx8m_ddrc_exit(struct device *dev)
>>>   {
>>> +	struct imx8m_ddrc *priv = dev_get_drvdata(dev);
>>> +
>>> +	imx8m_ddrc_perf_disable(priv);
>>> +	platform_device_put(priv->pmu_pdev);
>>> +
>>>   	dev_pm_opp_of_remove_table(dev);
>>>   }
>>>   
>>>   static int imx8m_ddrc_probe(struct platform_device *pdev)
>>>   {
>>>   	struct device *dev = &pdev->dev;
>>>   	struct imx8m_ddrc *priv;
>>> +	struct device_node *events_node;
>>>   	const char *gov = DEVFREQ_GOV_USERSPACE;
>>>   	int ret;
>>>   
>>>   	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
>>>   	if (!priv)
>>> @@ -350,10 +492,19 @@ static int imx8m_ddrc_probe(struct platform_device *pdev)
>>>   	if (ret) {
>>>   		dev_err(dev, "failed to init firmware freq info: %d\n", ret);
>>>   		return ret;
>>>   	}
>>>   
>>> +	events_node = of_parse_phandle(dev->of_node, "devfreq-events", 0);
>>> +	if (events_node) {
>>> +		ret = imx8m_ddrc_init_events(dev, events_node);
>>> +		of_node_put(events_node);
>>> +		if (ret)
>>> +			goto err;
>>> +		gov = DEVFREQ_GOV_SIMPLE_ONDEMAND;
>>> +	}
>>> +
>>>   	priv->dram_core = devm_clk_get(dev, "core");
>>>   	priv->dram_pll = devm_clk_get(dev, "pll");
>>>   	priv->dram_alt = devm_clk_get(dev, "alt");
>>>   	priv->dram_apb = devm_clk_get(dev, "apb");
>>>   	if (IS_ERR(priv->dram_core) ||
>>> @@ -390,10 +541,12 @@ static int imx8m_ddrc_probe(struct platform_device *pdev)
>>>   	}
>>>   
>>>   	return 0;
>>>   
>>>   err:
>>> +	imx8m_ddrc_perf_disable(priv);
>>> +	platform_device_put(priv->pmu_pdev);
>>>   	dev_pm_opp_of_remove_table(dev);
>>>   	return ret;
>>>   }
>>>   
>>>   static const struct of_device_id imx8m_ddrc_of_match[] = {
> 
>
diff mbox series

Patch

diff --git a/drivers/devfreq/imx8m-ddrc.c b/drivers/devfreq/imx8m-ddrc.c
index 51903fee21a7..6372191f72d7 100644
--- a/drivers/devfreq/imx8m-ddrc.c
+++ b/drivers/devfreq/imx8m-ddrc.c
@@ -11,10 +11,13 @@ 
 #include <linux/pm_opp.h>
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/arm-smccc.h>
 
+#include <asm/perf_event.h>
+#include <linux/perf_event.h>
+
 #define IMX_SIP_DDR_DVFS			0xc2000004
 
 /* Values starting from 0 switch to specific frequency */
 #define IMX_SIP_DDR_FREQ_SET_HIGH		0x00
 
@@ -78,10 +81,22 @@  struct imx8m_ddrc {
 	struct clk *dram_alt;
 	struct clk *dram_apb;
 
 	int freq_count;
 	struct imx8m_ddrc_freq freq_table[IMX8M_DDRC_MAX_FREQ_COUNT];
+
+	/* For measuring load with perf events: */
+	struct platform_device *pmu_pdev;
+	struct pmu *pmu;
+
+	struct perf_event_attr rd_event_attr;
+	struct perf_event_attr wr_event_attr;
+	struct perf_event *rd_event;
+	struct perf_event *wr_event;
+
+	u64 last_rd_val, last_rd_ena, last_rd_run;
+	u64 last_wr_val, last_wr_ena, last_wr_run;
 };
 
 static struct imx8m_ddrc_freq *imx8m_ddrc_find_freq(struct imx8m_ddrc *priv,
 						    unsigned long rate)
 {
@@ -245,10 +260,131 @@  static int imx8m_ddrc_get_cur_freq(struct device *dev, unsigned long *freq)
 	*freq = clk_get_rate(priv->dram_core);
 
 	return 0;
 }
 
+static int imx8m_ddrc_get_dev_status(struct device *dev,
+				     struct devfreq_dev_status *stat)
+{
+	struct imx8m_ddrc *priv = dev_get_drvdata(dev);
+
+	stat->current_frequency = clk_get_rate(priv->dram_core);
+
+	if (priv->rd_event && priv->wr_event) {
+		u64 rd_delta, rd_val, rd_ena, rd_run;
+		u64 wr_delta, wr_val, wr_ena, wr_run;
+
+		rd_val = perf_event_read_value(priv->rd_event,
+					       &rd_ena, &rd_run);
+		wr_val = perf_event_read_value(priv->wr_event,
+					       &wr_ena, &wr_run);
+
+		rd_delta = (rd_val - priv->last_rd_val) *
+			   (rd_ena - priv->last_rd_ena);
+		do_div(rd_delta, rd_run - priv->last_rd_run);
+		priv->last_rd_val = rd_val;
+		priv->last_rd_ena = rd_ena;
+		priv->last_rd_run = rd_run;
+
+		wr_delta = (wr_val - priv->last_wr_val) *
+			   (wr_ena - priv->last_wr_ena);
+		do_div(wr_delta, wr_run - priv->last_wr_run);
+		priv->last_wr_val = wr_val;
+		priv->last_wr_ena = wr_ena;
+		priv->last_wr_run = wr_run;
+
+		/* magic numbers, possibly wrong */
+		stat->busy_time = 4 * (rd_delta + wr_delta);
+		stat->total_time = stat->current_frequency;
+	} else {
+		stat->busy_time = 0;
+		stat->total_time = 0;
+	}
+
+	return 0;
+}
+
+static int imx8m_ddrc_perf_disable(struct imx8m_ddrc *priv)
+{
+	/* release and set to NULL */
+	if (!IS_ERR_OR_NULL(priv->rd_event))
+		perf_event_release_kernel(priv->rd_event);
+	if (!IS_ERR_OR_NULL(priv->wr_event))
+		perf_event_release_kernel(priv->wr_event);
+	priv->rd_event = NULL;
+	priv->wr_event = NULL;
+
+	return 0;
+}
+
+static int imx8m_ddrc_perf_enable(struct imx8m_ddrc *priv)
+{
+	int ret;
+
+	priv->rd_event_attr.size = sizeof(priv->rd_event_attr);
+	priv->rd_event_attr.type = priv->pmu->type;
+	priv->rd_event_attr.config = 0x2a;
+
+	priv->rd_event = perf_event_create_kernel_counter(
+			&priv->rd_event_attr, 0, NULL, NULL, NULL);
+	if (IS_ERR(priv->rd_event)) {
+		ret = PTR_ERR(priv->rd_event);
+		goto err;
+	}
+
+	priv->wr_event_attr.size = sizeof(priv->wr_event_attr);
+	priv->wr_event_attr.type = priv->pmu->type;
+	priv->wr_event_attr.config = 0x2b;
+
+	priv->wr_event = perf_event_create_kernel_counter(
+			&priv->wr_event_attr, 0, NULL, NULL, NULL);
+	if (IS_ERR(priv->wr_event)) {
+		ret = PTR_ERR(priv->wr_event);
+		goto err;
+	}
+
+	return 0;
+
+err:
+	imx8m_ddrc_perf_disable(priv);
+	return ret;
+}
+
+static int imx8m_ddrc_init_events(struct device *dev,
+				  struct device_node *events_node)
+{
+	struct imx8m_ddrc *priv = dev_get_drvdata(dev);
+	struct device_driver *driver;
+
+	/*
+	 * We need pmu->type for perf_event_attr but there is no API for
+	 * mapping device_node to pmu. Fetch private data for imx-ddr-pmu and
+	 * cast that to a struct pmu instead.
+	 */
+	priv->pmu_pdev = of_find_device_by_node(events_node);
+	if (!priv->pmu_pdev)
+		return -EPROBE_DEFER;
+	driver = priv->pmu_pdev->dev.driver;
+	if (!driver)
+		return -EPROBE_DEFER;
+	if (strcmp(driver->name, "imx-ddr-pmu")) {
+		dev_warn(dev, "devfreq-events node %pOF has unexpected driver %s\n",
+				events_node, driver->name);
+		return -ENODEV;
+	}
+
+	priv->pmu = platform_get_drvdata(priv->pmu_pdev);
+	if (!priv->pmu) {
+		dev_err(dev, "devfreq-events device missing private data\n");
+		return -EINVAL;
+	}
+
+	dev_dbg(dev, "events from pmu %s\n", priv->pmu->name);
+
+	return imx8m_ddrc_perf_enable(priv);
+}
+
 static int imx8m_ddrc_init_freq_info(struct device *dev)
 {
 	struct imx8m_ddrc *priv = dev_get_drvdata(dev);
 	struct arm_smccc_res res;
 	int index;
@@ -328,17 +464,23 @@  static int imx8m_ddrc_check_opps(struct device *dev)
 	return 0;
 }
 
 static void imx8m_ddrc_exit(struct device *dev)
 {
+	struct imx8m_ddrc *priv = dev_get_drvdata(dev);
+
+	imx8m_ddrc_perf_disable(priv);
+	platform_device_put(priv->pmu_pdev);
+
 	dev_pm_opp_of_remove_table(dev);
 }
 
 static int imx8m_ddrc_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct imx8m_ddrc *priv;
+	struct device_node *events_node;
 	const char *gov = DEVFREQ_GOV_USERSPACE;
 	int ret;
 
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
@@ -350,10 +492,19 @@  static int imx8m_ddrc_probe(struct platform_device *pdev)
 	if (ret) {
 		dev_err(dev, "failed to init firmware freq info: %d\n", ret);
 		return ret;
 	}
 
+	events_node = of_parse_phandle(dev->of_node, "devfreq-events", 0);
+	if (events_node) {
+		ret = imx8m_ddrc_init_events(dev, events_node);
+		of_node_put(events_node);
+		if (ret)
+			goto err;
+		gov = DEVFREQ_GOV_SIMPLE_ONDEMAND;
+	}
+
 	priv->dram_core = devm_clk_get(dev, "core");
 	priv->dram_pll = devm_clk_get(dev, "pll");
 	priv->dram_alt = devm_clk_get(dev, "alt");
 	priv->dram_apb = devm_clk_get(dev, "apb");
 	if (IS_ERR(priv->dram_core) ||
@@ -390,10 +541,12 @@  static int imx8m_ddrc_probe(struct platform_device *pdev)
 	}
 
 	return 0;
 
 err:
+	imx8m_ddrc_perf_disable(priv);
+	platform_device_put(priv->pmu_pdev);
 	dev_pm_opp_of_remove_table(dev);
 	return ret;
 }
 
 static const struct of_device_id imx8m_ddrc_of_match[] = {