diff mbox series

[v7,36/47] memory: tegra20-emc: Add devfreq support

Message ID 20201104164923.21238-37-digetx@gmail.com (mailing list archive)
State Not Applicable, archived
Delegated to: Chanwoo Choi
Headers show
Series Introduce memory interconnect for NVIDIA Tegra SoCs | expand

Commit Message

Dmitry Osipenko Nov. 4, 2020, 4:49 p.m. UTC
Add devfreq support to the Tegra20 EMC driver. Memory utilization
statistics will be periodically polled from the memory controller and
appropriate minimum clock rate will be selected by the devfreq governor.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
---
 drivers/memory/tegra/Kconfig       |  2 +
 drivers/memory/tegra/tegra20-emc.c | 92 ++++++++++++++++++++++++++++++
 2 files changed, 94 insertions(+)

Comments

Chanwoo Choi Nov. 5, 2020, 2:30 a.m. UTC | #1
On 11/5/20 1:49 AM, Dmitry Osipenko wrote:
> Add devfreq support to the Tegra20 EMC driver. Memory utilization
> statistics will be periodically polled from the memory controller and
> appropriate minimum clock rate will be selected by the devfreq governor.
> 
> Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
> ---
>  drivers/memory/tegra/Kconfig       |  2 +
>  drivers/memory/tegra/tegra20-emc.c | 92 ++++++++++++++++++++++++++++++
>  2 files changed, 94 insertions(+)
> 
> diff --git a/drivers/memory/tegra/Kconfig b/drivers/memory/tegra/Kconfig
> index ac3dfe155505..76e9a3b10839 100644
> --- a/drivers/memory/tegra/Kconfig
> +++ b/drivers/memory/tegra/Kconfig
> @@ -12,6 +12,8 @@ config TEGRA20_EMC
>  	tristate "NVIDIA Tegra20 External Memory Controller driver"
>  	default y
>  	depends on TEGRA_MC && ARCH_TEGRA_2x_SOC
> +	select DEVFREQ_GOV_SIMPLE_ONDEMAND
> +	select PM_DEVFREQ
>  	select PM_OPP

nitpick. If you select PM_DEVFREQ, don't need to select 'PM_OPP'
bacause PM_DEVFREQ use OPP as mandatory with 'select PM_OPP' in Kconfig.


>  	help
>  	  This driver is for the External Memory Controller (EMC) found on
> diff --git a/drivers/memory/tegra/tegra20-emc.c b/drivers/memory/tegra/tegra20-emc.c
> index 5e10aa97809f..9946b957bb01 100644
> --- a/drivers/memory/tegra/tegra20-emc.c
> +++ b/drivers/memory/tegra/tegra20-emc.c
> @@ -8,6 +8,7 @@
>  #include <linux/clk.h>
>  #include <linux/clk/tegra.h>
>  #include <linux/debugfs.h>
> +#include <linux/devfreq.h>
>  #include <linux/err.h>
>  #include <linux/interconnect-provider.h>
>  #include <linux/interrupt.h>
> @@ -102,6 +103,10 @@
>  
>  #define EMC_FBIO_CFG5_DRAM_WIDTH_X16		BIT(4)
>  
> +#define EMC_PWR_GATHER_CLEAR			(1 << 8)
> +#define EMC_PWR_GATHER_DISABLE			(2 << 8)
> +#define EMC_PWR_GATHER_ENABLE			(3 << 8)
> +
>  static const u16 emc_timing_registers[] = {
>  	EMC_RC,
>  	EMC_RFC,
> @@ -157,6 +162,7 @@ struct emc_timing {
>  };
>  
>  enum emc_rate_request_type {
> +	EMC_RATE_DEVFREQ,
>  	EMC_RATE_DEBUG,
>  	EMC_RATE_ICC,
>  	EMC_RATE_TYPE_MAX,
> @@ -193,6 +199,9 @@ struct tegra_emc {
>  
>  	/* protect shared rate-change code path */
>  	struct mutex rate_lock;
> +
> +	struct devfreq_simple_ondemand_data ondemand_data;
> +	struct devfreq *devfreq;
>  };
>  
>  static irqreturn_t tegra_emc_isr(int irq, void *data)
> @@ -952,6 +961,88 @@ static int tegra_emc_opp_table_init(struct tegra_emc *emc)
>  	return err;
>  }
>  
> +static int tegra_emc_devfreq_target(struct device *dev, unsigned long *freq,
> +				    u32 flags)
> +{
> +	struct tegra_emc *emc = dev_get_drvdata(dev);
> +	struct dev_pm_opp *opp;
> +	unsigned long rate;
> +
> +	opp = devfreq_recommended_opp(dev, freq, flags);
> +	if (IS_ERR(opp)) {
> +		dev_err(dev, "failed to find opp for %lu Hz\n", *freq);
> +		return PTR_ERR(opp);
> +	}
> +
> +	rate = dev_pm_opp_get_freq(opp);
> +	dev_pm_opp_put(opp);
> +
> +	return emc_set_min_rate(emc, rate, EMC_RATE_DEVFREQ);
> +}
> +
> +static int tegra_emc_devfreq_get_dev_status(struct device *dev,
> +					    struct devfreq_dev_status *stat)
> +{
> +	struct tegra_emc *emc = dev_get_drvdata(dev);
> +
> +	/* freeze counters */
> +	writel_relaxed(EMC_PWR_GATHER_DISABLE, emc->regs + EMC_STAT_CONTROL);
> +
> +	/*
> +	 * busy_time:  number of clocks EMC request was accepted
> +	 * total_time: number of clocks PWR_GATHER control was set to ENABLE
> +	 */
> +	stat->busy_time = readl_relaxed(emc->regs + EMC_STAT_PWR_COUNT);
> +	stat->total_time = readl_relaxed(emc->regs + EMC_STAT_PWR_CLOCKS);
> +	stat->current_frequency = clk_get_rate(emc->clk);
> +
> +	/* clear counters and restart */
> +	writel_relaxed(EMC_PWR_GATHER_CLEAR, emc->regs + EMC_STAT_CONTROL);
> +	writel_relaxed(EMC_PWR_GATHER_ENABLE, emc->regs + EMC_STAT_CONTROL);
> +
> +	return 0;
> +}
> +
> +static struct devfreq_dev_profile tegra_emc_devfreq_profile = {
> +	.polling_ms	= 30,
> +	.target		= tegra_emc_devfreq_target,
> +	.get_dev_status	= tegra_emc_devfreq_get_dev_status,
> +};
> +
> +static int tegra_emc_devfreq_init(struct tegra_emc *emc)
> +{
> +	int err;
> +
> +	/*
> +	 * PWR_COUNT is 1/2 of PWR_CLOCKS at max, and thus, the up-threshold
> +	 * should be less than 50.  Secondly, multiple active memory clients
> +	 * may cause over 20% of lost clock cycles due to stalls caused by
> +	 * competing memory accesses.  This means that threshold should be
> +	 * set to a less than 30 in order to have a properly working governor.
> +	 */
> +	emc->ondemand_data.upthreshold = 20;
> +
> +	/*
> +	 * Reset statistic gathers state, select global bandwidth for the
> +	 * statistics collection mode and set clocks counter saturation
> +	 * limit to maximum.
> +	 */
> +	writel_relaxed(0x00000000, emc->regs + EMC_STAT_CONTROL);
> +	writel_relaxed(0x00000000, emc->regs + EMC_STAT_LLMC_CONTROL);
> +	writel_relaxed(0xffffffff, emc->regs + EMC_STAT_PWR_CLOCK_LIMIT);
> +
> +	emc->devfreq = devfreq_add_device(emc->dev, &tegra_emc_devfreq_profile,
> +					  DEVFREQ_GOV_SIMPLE_ONDEMAND,
> +					  &emc->ondemand_data);

Do you want to use 'devfreq_add_device' instead of
'devm_devfreq_add_device()'? If you have to use 'devfreq_add_device'
due to some reason, you need to call 'devfreq_remove_device' on exit.

> +	if (IS_ERR(emc->devfreq)) {
> +		err = PTR_ERR(emc->devfreq);
> +		dev_err(emc->dev, "failed to initialize devfreq: %d", err);
> +		return err;
> +	}
> +
> +	return 0;
> +}
> +
>  static int tegra_emc_probe(struct platform_device *pdev)
>  {
>  	struct device_node *np;
> @@ -1019,6 +1110,7 @@ static int tegra_emc_probe(struct platform_device *pdev)
>  	tegra_emc_rate_requests_init(emc);
>  	tegra_emc_debugfs_init(emc);
>  	tegra_emc_interconnect_init(emc);
> +	tegra_emc_devfreq_init(emc);
>  
>  	/*
>  	 * Don't allow the kernel module to be unloaded. Unloading adds some
>
Dmitry Osipenko Nov. 5, 2020, 1:50 p.m. UTC | #2
05.11.2020 05:30, Chanwoo Choi пишет:
> On 11/5/20 1:49 AM, Dmitry Osipenko wrote:
>> Add devfreq support to the Tegra20 EMC driver. Memory utilization
>> statistics will be periodically polled from the memory controller and
>> appropriate minimum clock rate will be selected by the devfreq governor.
>>
>> Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
>> ---
>>  drivers/memory/tegra/Kconfig       |  2 +
>>  drivers/memory/tegra/tegra20-emc.c | 92 ++++++++++++++++++++++++++++++
>>  2 files changed, 94 insertions(+)
>>
>> diff --git a/drivers/memory/tegra/Kconfig b/drivers/memory/tegra/Kconfig
>> index ac3dfe155505..76e9a3b10839 100644
>> --- a/drivers/memory/tegra/Kconfig
>> +++ b/drivers/memory/tegra/Kconfig
>> @@ -12,6 +12,8 @@ config TEGRA20_EMC
>>  	tristate "NVIDIA Tegra20 External Memory Controller driver"
>>  	default y
>>  	depends on TEGRA_MC && ARCH_TEGRA_2x_SOC
>> +	select DEVFREQ_GOV_SIMPLE_ONDEMAND
>> +	select PM_DEVFREQ
>>  	select PM_OPP
> 
> nitpick. If you select PM_DEVFREQ, don't need to select 'PM_OPP'
> bacause PM_DEVFREQ use OPP as mandatory with 'select PM_OPP' in Kconfig.

Ok

...
>> +static int tegra_emc_devfreq_init(struct tegra_emc *emc)
>> +{
>> +	int err;
>> +
>> +	/*
>> +	 * PWR_COUNT is 1/2 of PWR_CLOCKS at max, and thus, the up-threshold
>> +	 * should be less than 50.  Secondly, multiple active memory clients
>> +	 * may cause over 20% of lost clock cycles due to stalls caused by
>> +	 * competing memory accesses.  This means that threshold should be
>> +	 * set to a less than 30 in order to have a properly working governor.
>> +	 */
>> +	emc->ondemand_data.upthreshold = 20;
>> +
>> +	/*
>> +	 * Reset statistic gathers state, select global bandwidth for the
>> +	 * statistics collection mode and set clocks counter saturation
>> +	 * limit to maximum.
>> +	 */
>> +	writel_relaxed(0x00000000, emc->regs + EMC_STAT_CONTROL);
>> +	writel_relaxed(0x00000000, emc->regs + EMC_STAT_LLMC_CONTROL);
>> +	writel_relaxed(0xffffffff, emc->regs + EMC_STAT_PWR_CLOCK_LIMIT);
>> +
>> +	emc->devfreq = devfreq_add_device(emc->dev, &tegra_emc_devfreq_profile,
>> +					  DEVFREQ_GOV_SIMPLE_ONDEMAND,
>> +					  &emc->ondemand_data);
> 
> Do you want to use 'devfreq_add_device' instead of
> 'devm_devfreq_add_device()'? If you have to use 'devfreq_add_device'
> due to some reason, you need to call 'devfreq_remove_device' on exit.

The reason I didn't use the devm here is because the EMC-clk callback
should be unregistered *after* devfreq is removed.

Thinking a bit more about it, I guess the best variant will be to add
devm support to the clk callback registration and then it should be
possible to use devm for the devfreq. I'll try to implement it in v8,
thanks.
Krzysztof Kozlowski Nov. 6, 2020, 7:13 p.m. UTC | #3
On Wed, Nov 04, 2020 at 07:49:12PM +0300, Dmitry Osipenko wrote:
> Add devfreq support to the Tegra20 EMC driver. Memory utilization
> statistics will be periodically polled from the memory controller and
> appropriate minimum clock rate will be selected by the devfreq governor.
> 
> Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
> ---
>  drivers/memory/tegra/Kconfig       |  2 +
>  drivers/memory/tegra/tegra20-emc.c | 92 ++++++++++++++++++++++++++++++
>  2 files changed, 94 insertions(+)
> 

I see this one still received comments. I skipped the DTS patches and
applied everything till patch #35. I understand you will send v8, so in
such case please skip the applied ones (you can rebase on my for-next or
on Monday's linux-next).

Best regards,
Krzysztof
Dmitry Osipenko Nov. 6, 2020, 9:53 p.m. UTC | #4
06.11.2020 22:13, Krzysztof Kozlowski пишет:
> On Wed, Nov 04, 2020 at 07:49:12PM +0300, Dmitry Osipenko wrote:
>> Add devfreq support to the Tegra20 EMC driver. Memory utilization
>> statistics will be periodically polled from the memory controller and
>> appropriate minimum clock rate will be selected by the devfreq governor.
>>
>> Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
>> ---
>>  drivers/memory/tegra/Kconfig       |  2 +
>>  drivers/memory/tegra/tegra20-emc.c | 92 ++++++++++++++++++++++++++++++
>>  2 files changed, 94 insertions(+)
>>
> 
> I see this one still received comments. I skipped the DTS patches and
> applied everything till patch #35. I understand you will send v8, so in
> such case please skip the applied ones (you can rebase on my for-next or
> on Monday's linux-next).

Thank you! I'll also need to wait for a reply from Viresh Kumar in other
thread regarding dev_pm_opp_get_opp_table() usage and then will probably
need to correct patch #35+ as well now, since turned out it may be wrong
for drivers to use dev_pm_opp_get_opp_table().
diff mbox series

Patch

diff --git a/drivers/memory/tegra/Kconfig b/drivers/memory/tegra/Kconfig
index ac3dfe155505..76e9a3b10839 100644
--- a/drivers/memory/tegra/Kconfig
+++ b/drivers/memory/tegra/Kconfig
@@ -12,6 +12,8 @@  config TEGRA20_EMC
 	tristate "NVIDIA Tegra20 External Memory Controller driver"
 	default y
 	depends on TEGRA_MC && ARCH_TEGRA_2x_SOC
+	select DEVFREQ_GOV_SIMPLE_ONDEMAND
+	select PM_DEVFREQ
 	select PM_OPP
 	help
 	  This driver is for the External Memory Controller (EMC) found on
diff --git a/drivers/memory/tegra/tegra20-emc.c b/drivers/memory/tegra/tegra20-emc.c
index 5e10aa97809f..9946b957bb01 100644
--- a/drivers/memory/tegra/tegra20-emc.c
+++ b/drivers/memory/tegra/tegra20-emc.c
@@ -8,6 +8,7 @@ 
 #include <linux/clk.h>
 #include <linux/clk/tegra.h>
 #include <linux/debugfs.h>
+#include <linux/devfreq.h>
 #include <linux/err.h>
 #include <linux/interconnect-provider.h>
 #include <linux/interrupt.h>
@@ -102,6 +103,10 @@ 
 
 #define EMC_FBIO_CFG5_DRAM_WIDTH_X16		BIT(4)
 
+#define EMC_PWR_GATHER_CLEAR			(1 << 8)
+#define EMC_PWR_GATHER_DISABLE			(2 << 8)
+#define EMC_PWR_GATHER_ENABLE			(3 << 8)
+
 static const u16 emc_timing_registers[] = {
 	EMC_RC,
 	EMC_RFC,
@@ -157,6 +162,7 @@  struct emc_timing {
 };
 
 enum emc_rate_request_type {
+	EMC_RATE_DEVFREQ,
 	EMC_RATE_DEBUG,
 	EMC_RATE_ICC,
 	EMC_RATE_TYPE_MAX,
@@ -193,6 +199,9 @@  struct tegra_emc {
 
 	/* protect shared rate-change code path */
 	struct mutex rate_lock;
+
+	struct devfreq_simple_ondemand_data ondemand_data;
+	struct devfreq *devfreq;
 };
 
 static irqreturn_t tegra_emc_isr(int irq, void *data)
@@ -952,6 +961,88 @@  static int tegra_emc_opp_table_init(struct tegra_emc *emc)
 	return err;
 }
 
+static int tegra_emc_devfreq_target(struct device *dev, unsigned long *freq,
+				    u32 flags)
+{
+	struct tegra_emc *emc = dev_get_drvdata(dev);
+	struct dev_pm_opp *opp;
+	unsigned long rate;
+
+	opp = devfreq_recommended_opp(dev, freq, flags);
+	if (IS_ERR(opp)) {
+		dev_err(dev, "failed to find opp for %lu Hz\n", *freq);
+		return PTR_ERR(opp);
+	}
+
+	rate = dev_pm_opp_get_freq(opp);
+	dev_pm_opp_put(opp);
+
+	return emc_set_min_rate(emc, rate, EMC_RATE_DEVFREQ);
+}
+
+static int tegra_emc_devfreq_get_dev_status(struct device *dev,
+					    struct devfreq_dev_status *stat)
+{
+	struct tegra_emc *emc = dev_get_drvdata(dev);
+
+	/* freeze counters */
+	writel_relaxed(EMC_PWR_GATHER_DISABLE, emc->regs + EMC_STAT_CONTROL);
+
+	/*
+	 * busy_time:  number of clocks EMC request was accepted
+	 * total_time: number of clocks PWR_GATHER control was set to ENABLE
+	 */
+	stat->busy_time = readl_relaxed(emc->regs + EMC_STAT_PWR_COUNT);
+	stat->total_time = readl_relaxed(emc->regs + EMC_STAT_PWR_CLOCKS);
+	stat->current_frequency = clk_get_rate(emc->clk);
+
+	/* clear counters and restart */
+	writel_relaxed(EMC_PWR_GATHER_CLEAR, emc->regs + EMC_STAT_CONTROL);
+	writel_relaxed(EMC_PWR_GATHER_ENABLE, emc->regs + EMC_STAT_CONTROL);
+
+	return 0;
+}
+
+static struct devfreq_dev_profile tegra_emc_devfreq_profile = {
+	.polling_ms	= 30,
+	.target		= tegra_emc_devfreq_target,
+	.get_dev_status	= tegra_emc_devfreq_get_dev_status,
+};
+
+static int tegra_emc_devfreq_init(struct tegra_emc *emc)
+{
+	int err;
+
+	/*
+	 * PWR_COUNT is 1/2 of PWR_CLOCKS at max, and thus, the up-threshold
+	 * should be less than 50.  Secondly, multiple active memory clients
+	 * may cause over 20% of lost clock cycles due to stalls caused by
+	 * competing memory accesses.  This means that threshold should be
+	 * set to a less than 30 in order to have a properly working governor.
+	 */
+	emc->ondemand_data.upthreshold = 20;
+
+	/*
+	 * Reset statistic gathers state, select global bandwidth for the
+	 * statistics collection mode and set clocks counter saturation
+	 * limit to maximum.
+	 */
+	writel_relaxed(0x00000000, emc->regs + EMC_STAT_CONTROL);
+	writel_relaxed(0x00000000, emc->regs + EMC_STAT_LLMC_CONTROL);
+	writel_relaxed(0xffffffff, emc->regs + EMC_STAT_PWR_CLOCK_LIMIT);
+
+	emc->devfreq = devfreq_add_device(emc->dev, &tegra_emc_devfreq_profile,
+					  DEVFREQ_GOV_SIMPLE_ONDEMAND,
+					  &emc->ondemand_data);
+	if (IS_ERR(emc->devfreq)) {
+		err = PTR_ERR(emc->devfreq);
+		dev_err(emc->dev, "failed to initialize devfreq: %d", err);
+		return err;
+	}
+
+	return 0;
+}
+
 static int tegra_emc_probe(struct platform_device *pdev)
 {
 	struct device_node *np;
@@ -1019,6 +1110,7 @@  static int tegra_emc_probe(struct platform_device *pdev)
 	tegra_emc_rate_requests_init(emc);
 	tegra_emc_debugfs_init(emc);
 	tegra_emc_interconnect_init(emc);
+	tegra_emc_devfreq_init(emc);
 
 	/*
 	 * Don't allow the kernel module to be unloaded. Unloading adds some