diff mbox series

[v6,33/52] memory: tegra20: Support interconnect framework

Message ID 20201025221735.3062-34-digetx@gmail.com (mailing list archive)
State Not Applicable, archived
Headers show
Series Introduce memory interconnect for NVIDIA Tegra SoCs | expand

Commit Message

Dmitry Osipenko Oct. 25, 2020, 10:17 p.m. UTC
Now Internal and External Memory Controllers are memory interconnection
providers. This allows us to use interconnect API for tuning of memory
configuration. EMC driver now supports OPPs and DVFS.

Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
---
 drivers/memory/tegra/Kconfig       |   3 +-
 drivers/memory/tegra/mc.h          |  12 ++
 drivers/memory/tegra/tegra20-emc.c | 176 +++++++++++++++++++++++++++++
 drivers/memory/tegra/tegra20.c     |  34 ++++++
 4 files changed, 224 insertions(+), 1 deletion(-)

Comments

Krzysztof Kozlowski Oct. 27, 2020, 10:09 a.m. UTC | #1
On Mon, Oct 26, 2020 at 01:17:16AM +0300, Dmitry Osipenko wrote:
> Now Internal and External Memory Controllers are memory interconnection
> providers. This allows us to use interconnect API for tuning of memory
> configuration. EMC driver now supports OPPs and DVFS.
> 
> Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
> ---
>  drivers/memory/tegra/Kconfig       |   3 +-
>  drivers/memory/tegra/mc.h          |  12 ++
>  drivers/memory/tegra/tegra20-emc.c | 176 +++++++++++++++++++++++++++++
>  drivers/memory/tegra/tegra20.c     |  34 ++++++
>  4 files changed, 224 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/memory/tegra/Kconfig b/drivers/memory/tegra/Kconfig
> index ff426747cd7d..ac3dfe155505 100644
> --- a/drivers/memory/tegra/Kconfig
> +++ b/drivers/memory/tegra/Kconfig
> @@ -11,7 +11,8 @@ config TEGRA_MC
>  config TEGRA20_EMC
>  	tristate "NVIDIA Tegra20 External Memory Controller driver"
>  	default y
> -	depends on ARCH_TEGRA_2x_SOC
> +	depends on TEGRA_MC && ARCH_TEGRA_2x_SOC
> +	select PM_OPP
>  	help
>  	  This driver is for the External Memory Controller (EMC) found on
>  	  Tegra20 chips. The EMC controls the external DRAM on the board.
> diff --git a/drivers/memory/tegra/mc.h b/drivers/memory/tegra/mc.h
> index abeb6a2cc36a..531fb4fb7b17 100644
> --- a/drivers/memory/tegra/mc.h
> +++ b/drivers/memory/tegra/mc.h
> @@ -78,6 +78,18 @@
>  
>  #define MC_TIMING_UPDATE				BIT(0)
>  
> +static inline u32 tegra_mc_scale_percents(u64 val, unsigned int percents)
> +{
> +	val = val * percents;
> +	do_div(val, 100);
> +
> +	/*
> +	 * High freq + high boosting percent + large polling interval are
> +	 * resulting in integer overflow when watermarks are calculated.
> +	 */
> +	return min_t(u64, val, U32_MAX);
> +}
> +
>  static inline u32 mc_readl(struct tegra_mc *mc, unsigned long offset)
>  {
>  	return readl_relaxed(mc->regs + offset);
> diff --git a/drivers/memory/tegra/tegra20-emc.c b/drivers/memory/tegra/tegra20-emc.c
> index 34085e26dced..69ccb3fe5b0b 100644
> --- a/drivers/memory/tegra/tegra20-emc.c
> +++ b/drivers/memory/tegra/tegra20-emc.c
> @@ -9,6 +9,7 @@
>  #include <linux/clk/tegra.h>
>  #include <linux/debugfs.h>
>  #include <linux/err.h>
> +#include <linux/interconnect-provider.h>
>  #include <linux/interrupt.h>
>  #include <linux/io.h>
>  #include <linux/iopoll.h>
> @@ -16,11 +17,15 @@
>  #include <linux/module.h>
>  #include <linux/of.h>
>  #include <linux/platform_device.h>
> +#include <linux/pm_opp.h>
> +#include <linux/slab.h>
>  #include <linux/sort.h>
>  #include <linux/types.h>
>  
>  #include <soc/tegra/fuse.h>
>  
> +#include "mc.h"
> +
>  #define EMC_INTSTATUS				0x000
>  #define EMC_INTMASK				0x004
>  #define EMC_DBG					0x008
> @@ -144,6 +149,9 @@ struct emc_timing {
>  
>  struct tegra_emc {
>  	struct device *dev;
> +	struct tegra_mc *mc;
> +	struct opp_table *opp_table;
> +	struct icc_provider provider;
>  	struct notifier_block clk_nb;
>  	struct clk *clk;
>  	void __iomem *regs;
> @@ -658,6 +666,166 @@ static void tegra_emc_debugfs_init(struct tegra_emc *emc)
>  			    emc, &tegra_emc_debug_max_rate_fops);
>  }
>  
> +static inline struct tegra_emc *
> +to_tegra_emc_provider(struct icc_provider *provider)
> +{
> +	return container_of(provider, struct tegra_emc, provider);
> +}
> +
> +static struct icc_node_data *
> +emc_of_icc_xlate_extended(struct of_phandle_args *spec, void *data)
> +{
> +	struct icc_provider *provider = data;
> +	struct icc_node_data *ndata;
> +	struct icc_node *node;
> +
> +	/* External Memory is the only possible ICC route */
> +	list_for_each_entry(node, &provider->nodes, node_list) {
> +		if (node->id != TEGRA_ICC_EMEM)
> +			continue;
> +
> +		ndata = kzalloc(sizeof(*ndata), GFP_KERNEL);
> +		if (!ndata)
> +			return ERR_PTR(-ENOMEM);
> +
> +		/*
> +		 * SRC and DST nodes should have matching TAG in order to have
> +		 * it set by default for a requested path.
> +		 */
> +		ndata->tag = TEGRA_MC_ICC_TAG_ISO;
> +		ndata->node = node;
> +
> +		return ndata;
> +	}
> +
> +	return ERR_PTR(-EINVAL);
> +}
> +
> +static int emc_icc_set(struct icc_node *src, struct icc_node *dst)
> +{
> +	struct tegra_emc *emc = to_tegra_emc_provider(dst->provider);
> +	unsigned long long peak_bw = icc_units_to_bps(dst->peak_bw);
> +	unsigned long long avg_bw = icc_units_to_bps(dst->avg_bw);
> +	unsigned long long rate = max(avg_bw, peak_bw);
> +	unsigned int dram_data_bus_width_bytes = 4;
> +	long rounded_rate;
> +	int err;
> +
> +	/*
> +	 * Tegra20 EMC runs on x2 clock rate of SDRAM bus because DDR data
> +	 * is sampled on both clock edges. This means that EMC clock rate
> +	 * equals to the peak data rate.
> +	 */
> +	do_div(rate, dram_data_bus_width_bytes);
> +	rate = min_t(u64, rate, U32_MAX);
> +
> +	rounded_rate = emc_round_rate(rate, 0, U32_MAX, emc);
> +	if (rounded_rate < 0)
> +		return rounded_rate;
> +
> +	err = dev_pm_opp_set_rate(emc->dev, rounded_rate);
> +	if (err)
> +		return err;
> +
> +	return 0;
> +}
> +
> +static int tegra_emc_interconnect_init(struct tegra_emc *emc)
> +{
> +	const struct tegra_mc_soc *soc;
> +	struct icc_node *node;
> +	int err;
> +
> +	emc->mc = devm_tegra_get_memory_controller(emc->dev);
> +	if (IS_ERR(emc->mc))
> +		return PTR_ERR(emc->mc);
> +
> +	soc = emc->mc->soc;
> +
> +	emc->provider.dev = emc->dev;
> +	emc->provider.set = emc_icc_set;
> +	emc->provider.data = &emc->provider;
> +	emc->provider.aggregate = soc->icc_ops->aggregate;
> +	emc->provider.xlate_extended = emc_of_icc_xlate_extended;
> +
> +	err = icc_provider_add(&emc->provider);
> +	if (err)
> +		goto err_msg;
> +
> +	/* create External Memory Controller node */
> +	node = icc_node_create(TEGRA_ICC_EMC);
> +	err = PTR_ERR_OR_ZERO(node);
> +	if (err)
> +		goto del_provider;
> +
> +	node->name = "External Memory Controller";
> +	icc_node_add(node, &emc->provider);
> +
> +	/* link External Memory Controller to External Memory (DRAM) */
> +	err = icc_link_create(node, TEGRA_ICC_EMEM);
> +	if (err)
> +		goto remove_nodes;
> +
> +	/* create External Memory node */
> +	node = icc_node_create(TEGRA_ICC_EMEM);
> +	err = PTR_ERR_OR_ZERO(node);
> +	if (err)
> +		goto remove_nodes;
> +
> +	node->name = "External Memory (DRAM)";
> +	icc_node_add(node, &emc->provider);
> +
> +	return 0;
> +
> +remove_nodes:
> +	icc_nodes_remove(&emc->provider);
> +del_provider:
> +	icc_provider_del(&emc->provider);
> +err_msg:
> +	dev_err(emc->dev, "failed to initialize ICC: %d\n", err);

You will print such errors on all existing DTBs. Since it is not a
failure of probe (it is actually quite expected, normal situation when
booting with older DTB), let's change it to warning (here and in all
other places and drivers).

> +
> +	return err;
> +}
> +
> +static int tegra_emc_opp_table_init(struct tegra_emc *emc)
> +{
> +	const char *rname = "core";
> +	int err;
> +
> +	/*
> +	 * Legacy device-trees don't have OPP table and EMC driver isn't
> +	 * useful in this case.
> +	 */
> +	if (!device_property_present(emc->dev, "operating-points-v2")) {
> +		dev_err(emc->dev, "OPP table not found\n");
> +		dev_err(emc->dev, "please update your device tree\n");
> +		return -ENODEV;
> +	}
> +
> +	/* voltage scaling is optional */
> +	if (device_property_present(emc->dev, "core-supply"))
> +		emc->opp_table = dev_pm_opp_set_regulators(emc->dev, &rname, 1);
> +	else
> +		emc->opp_table = dev_pm_opp_get_opp_table(emc->dev);
> +
> +	if (IS_ERR(emc->opp_table))
> +		return dev_err_probe(emc->dev, PTR_ERR(emc->opp_table),
> +				     "failed to prepare OPP table\n");
> +
> +	err = dev_pm_opp_of_add_table(emc->dev);
> +	if (err) {
> +		dev_err(emc->dev, "failed to add OPP table: %d\n", err);
> +		goto put_table;
> +	}
> +
> +	return 0;
> +
> +put_table:
> +	dev_pm_opp_put_opp_table(emc->opp_table);
> +
> +	return err;
> +}
> +
>  static int tegra_emc_probe(struct platform_device *pdev)
>  {
>  	struct device_node *np;
> @@ -717,8 +885,13 @@ static int tegra_emc_probe(struct platform_device *pdev)
>  		goto unset_cb;
>  	}
>  
> +	err = tegra_emc_opp_table_init(emc);
> +	if (err)
> +		goto unreg_notifier;

This looks like the ABI break I mentioned around DT bindings. Are the
bindings marked as unstable?

Best regards,
Krzysztof
Thierry Reding Oct. 27, 2020, 2:11 p.m. UTC | #2
On Mon, Oct 26, 2020 at 01:17:16AM +0300, Dmitry Osipenko wrote:
> Now Internal and External Memory Controllers are memory interconnection
> providers. This allows us to use interconnect API for tuning of memory
> configuration. EMC driver now supports OPPs and DVFS.
> 
> Signed-off-by: Dmitry Osipenko <digetx@gmail.com>
> ---
>  drivers/memory/tegra/Kconfig       |   3 +-
>  drivers/memory/tegra/mc.h          |  12 ++
>  drivers/memory/tegra/tegra20-emc.c | 176 +++++++++++++++++++++++++++++
>  drivers/memory/tegra/tegra20.c     |  34 ++++++
>  4 files changed, 224 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/memory/tegra/Kconfig b/drivers/memory/tegra/Kconfig
> index ff426747cd7d..ac3dfe155505 100644
> --- a/drivers/memory/tegra/Kconfig
> +++ b/drivers/memory/tegra/Kconfig
> @@ -11,7 +11,8 @@ config TEGRA_MC
>  config TEGRA20_EMC
>  	tristate "NVIDIA Tegra20 External Memory Controller driver"
>  	default y
> -	depends on ARCH_TEGRA_2x_SOC
> +	depends on TEGRA_MC && ARCH_TEGRA_2x_SOC
> +	select PM_OPP
>  	help
>  	  This driver is for the External Memory Controller (EMC) found on
>  	  Tegra20 chips. The EMC controls the external DRAM on the board.
> diff --git a/drivers/memory/tegra/mc.h b/drivers/memory/tegra/mc.h
> index abeb6a2cc36a..531fb4fb7b17 100644
> --- a/drivers/memory/tegra/mc.h
> +++ b/drivers/memory/tegra/mc.h
> @@ -78,6 +78,18 @@
>  
>  #define MC_TIMING_UPDATE				BIT(0)
>  
> +static inline u32 tegra_mc_scale_percents(u64 val, unsigned int percents)
> +{
> +	val = val * percents;
> +	do_div(val, 100);
> +
> +	/*
> +	 * High freq + high boosting percent + large polling interval are
> +	 * resulting in integer overflow when watermarks are calculated.
> +	 */
> +	return min_t(u64, val, U32_MAX);
> +}
> +
>  static inline u32 mc_readl(struct tegra_mc *mc, unsigned long offset)
>  {
>  	return readl_relaxed(mc->regs + offset);
> diff --git a/drivers/memory/tegra/tegra20-emc.c b/drivers/memory/tegra/tegra20-emc.c
> index 34085e26dced..69ccb3fe5b0b 100644
> --- a/drivers/memory/tegra/tegra20-emc.c
> +++ b/drivers/memory/tegra/tegra20-emc.c
> @@ -9,6 +9,7 @@
>  #include <linux/clk/tegra.h>
>  #include <linux/debugfs.h>
>  #include <linux/err.h>
> +#include <linux/interconnect-provider.h>
>  #include <linux/interrupt.h>
>  #include <linux/io.h>
>  #include <linux/iopoll.h>
> @@ -16,11 +17,15 @@
>  #include <linux/module.h>
>  #include <linux/of.h>
>  #include <linux/platform_device.h>
> +#include <linux/pm_opp.h>
> +#include <linux/slab.h>
>  #include <linux/sort.h>
>  #include <linux/types.h>
>  
>  #include <soc/tegra/fuse.h>
>  
> +#include "mc.h"
> +
>  #define EMC_INTSTATUS				0x000
>  #define EMC_INTMASK				0x004
>  #define EMC_DBG					0x008
> @@ -144,6 +149,9 @@ struct emc_timing {
>  
>  struct tegra_emc {
>  	struct device *dev;
> +	struct tegra_mc *mc;
> +	struct opp_table *opp_table;
> +	struct icc_provider provider;
>  	struct notifier_block clk_nb;
>  	struct clk *clk;
>  	void __iomem *regs;
> @@ -658,6 +666,166 @@ static void tegra_emc_debugfs_init(struct tegra_emc *emc)
>  			    emc, &tegra_emc_debug_max_rate_fops);
>  }
>  
> +static inline struct tegra_emc *
> +to_tegra_emc_provider(struct icc_provider *provider)
> +{
> +	return container_of(provider, struct tegra_emc, provider);
> +}
> +
> +static struct icc_node_data *
> +emc_of_icc_xlate_extended(struct of_phandle_args *spec, void *data)
> +{
> +	struct icc_provider *provider = data;
> +	struct icc_node_data *ndata;
> +	struct icc_node *node;
> +
> +	/* External Memory is the only possible ICC route */
> +	list_for_each_entry(node, &provider->nodes, node_list) {
> +		if (node->id != TEGRA_ICC_EMEM)
> +			continue;
> +
> +		ndata = kzalloc(sizeof(*ndata), GFP_KERNEL);
> +		if (!ndata)
> +			return ERR_PTR(-ENOMEM);
> +
> +		/*
> +		 * SRC and DST nodes should have matching TAG in order to have
> +		 * it set by default for a requested path.
> +		 */
> +		ndata->tag = TEGRA_MC_ICC_TAG_ISO;
> +		ndata->node = node;
> +
> +		return ndata;
> +	}
> +
> +	return ERR_PTR(-EINVAL);
> +}
> +
> +static int emc_icc_set(struct icc_node *src, struct icc_node *dst)
> +{
> +	struct tegra_emc *emc = to_tegra_emc_provider(dst->provider);
> +	unsigned long long peak_bw = icc_units_to_bps(dst->peak_bw);
> +	unsigned long long avg_bw = icc_units_to_bps(dst->avg_bw);
> +	unsigned long long rate = max(avg_bw, peak_bw);
> +	unsigned int dram_data_bus_width_bytes = 4;

Perhaps use something shorter for this variable (like dram_bus_width)? Also,
since it's never modified, perhaps make it const? Or a #define?

> +	long rounded_rate;
> +	int err;
> +
> +	/*
> +	 * Tegra20 EMC runs on x2 clock rate of SDRAM bus because DDR data
> +	 * is sampled on both clock edges. This means that EMC clock rate
> +	 * equals to the peak data rate.
> +	 */
> +	do_div(rate, dram_data_bus_width_bytes);
> +	rate = min_t(u64, rate, U32_MAX);
> +
> +	rounded_rate = emc_round_rate(rate, 0, U32_MAX, emc);
> +	if (rounded_rate < 0)
> +		return rounded_rate;
> +
> +	err = dev_pm_opp_set_rate(emc->dev, rounded_rate);
> +	if (err)
> +		return err;
> +
> +	return 0;
> +}
> +
> +static int tegra_emc_interconnect_init(struct tegra_emc *emc)
> +{
> +	const struct tegra_mc_soc *soc;
> +	struct icc_node *node;
> +	int err;
> +
> +	emc->mc = devm_tegra_get_memory_controller(emc->dev);
> +	if (IS_ERR(emc->mc))
> +		return PTR_ERR(emc->mc);
> +
> +	soc = emc->mc->soc;
> +
> +	emc->provider.dev = emc->dev;
> +	emc->provider.set = emc_icc_set;
> +	emc->provider.data = &emc->provider;
> +	emc->provider.aggregate = soc->icc_ops->aggregate;
> +	emc->provider.xlate_extended = emc_of_icc_xlate_extended;
> +
> +	err = icc_provider_add(&emc->provider);
> +	if (err)
> +		goto err_msg;
> +
> +	/* create External Memory Controller node */
> +	node = icc_node_create(TEGRA_ICC_EMC);
> +	err = PTR_ERR_OR_ZERO(node);
> +	if (err)
> +		goto del_provider;

As far as I can tell, icc_node_create() always returns either a valid
pointer or an ERR_PTR-encoded negative error-code. So I think the more
idiomatic way to write this would be:

	node = icc_node_create(TEGRA_ICC_EMC);
	if (IS_ERR(node)) {
		err = PTR_ERR(node);
		goto del_provider;
	}

> +
> +	node->name = "External Memory Controller";
> +	icc_node_add(node, &emc->provider);
> +
> +	/* link External Memory Controller to External Memory (DRAM) */
> +	err = icc_link_create(node, TEGRA_ICC_EMEM);
> +	if (err)
> +		goto remove_nodes;
> +
> +	/* create External Memory node */
> +	node = icc_node_create(TEGRA_ICC_EMEM);
> +	err = PTR_ERR_OR_ZERO(node);
> +	if (err)
> +		goto remove_nodes;

Same here.

> +
> +	node->name = "External Memory (DRAM)";
> +	icc_node_add(node, &emc->provider);
> +
> +	return 0;
> +
> +remove_nodes:
> +	icc_nodes_remove(&emc->provider);
> +del_provider:
> +	icc_provider_del(&emc->provider);
> +err_msg:
> +	dev_err(emc->dev, "failed to initialize ICC: %d\n", err);

It might be worth duplicating this error message to the failure
locations so that the exact failure can be identified.

> +
> +	return err;
> +}
> +
> +static int tegra_emc_opp_table_init(struct tegra_emc *emc)
> +{
> +	const char *rname = "core";
> +	int err;
> +
> +	/*
> +	 * Legacy device-trees don't have OPP table and EMC driver isn't
> +	 * useful in this case.
> +	 */
> +	if (!device_property_present(emc->dev, "operating-points-v2")) {
> +		dev_err(emc->dev, "OPP table not found\n");
> +		dev_err(emc->dev, "please update your device tree\n");

This should be a single error message. These messages end up in kmsg
records and having this split into two dev_err() calls makes them into
two separate records and that in turn makes it more difficult to
determine whether they belong together or not.

> +		return -ENODEV;
> +	}
> +
> +	/* voltage scaling is optional */
> +	if (device_property_present(emc->dev, "core-supply"))
> +		emc->opp_table = dev_pm_opp_set_regulators(emc->dev, &rname, 1);
> +	else
> +		emc->opp_table = dev_pm_opp_get_opp_table(emc->dev);
> +
> +	if (IS_ERR(emc->opp_table))
> +		return dev_err_probe(emc->dev, PTR_ERR(emc->opp_table),
> +				     "failed to prepare OPP table\n");
> +
> +	err = dev_pm_opp_of_add_table(emc->dev);
> +	if (err) {
> +		dev_err(emc->dev, "failed to add OPP table: %d\n", err);
> +		goto put_table;
> +	}
> +
> +	return 0;
> +
> +put_table:
> +	dev_pm_opp_put_opp_table(emc->opp_table);
> +
> +	return err;
> +}
> +
>  static int tegra_emc_probe(struct platform_device *pdev)
>  {
>  	struct device_node *np;
> @@ -717,8 +885,13 @@ static int tegra_emc_probe(struct platform_device *pdev)
>  		goto unset_cb;
>  	}
>  
> +	err = tegra_emc_opp_table_init(emc);
> +	if (err)
> +		goto unreg_notifier;
> +
>  	platform_set_drvdata(pdev, emc);
>  	tegra_emc_debugfs_init(emc);
> +	tegra_emc_interconnect_init(emc);
>  
>  	/*
>  	 * Don't allow the kernel module to be unloaded. Unloading adds some
> @@ -729,6 +902,8 @@ static int tegra_emc_probe(struct platform_device *pdev)
>  
>  	return 0;
>  
> +unreg_notifier:
> +	clk_notifier_unregister(emc->clk, &emc->clk_nb);
>  unset_cb:
>  	tegra20_clk_set_emc_round_callback(NULL, NULL);
>  
> @@ -747,6 +922,7 @@ static struct platform_driver tegra_emc_driver = {
>  		.name = "tegra20-emc",
>  		.of_match_table = tegra_emc_of_match,
>  		.suppress_bind_attrs = true,
> +		.sync_state = icc_sync_state,
>  	},
>  };
>  module_platform_driver(tegra_emc_driver);
> diff --git a/drivers/memory/tegra/tegra20.c b/drivers/memory/tegra/tegra20.c
> index a8098bff91d9..5127e8e8250f 100644
> --- a/drivers/memory/tegra/tegra20.c
> +++ b/drivers/memory/tegra/tegra20.c
> @@ -280,6 +280,39 @@ static const struct tegra_mc_reset_ops tegra20_mc_reset_ops = {
>  	.reset_status = tegra20_mc_reset_status,
>  };
>  
> +static int tegra20_mc_icc_set(struct icc_node *src, struct icc_node *dst)
> +{
> +	/*
> +	 * Technically, it should be possible to tune arbitration knobs here,
> +	 * but the default values are known to work well on all devices.
> +	 * Hence nothing to do here so far.
> +	 */
> +	return 0;
> +}
> +
> +static int tegra20_mc_icc_aggreate(struct icc_node *node, u32 tag, u32 avg_bw,
> +				   u32 peak_bw, u32 *agg_avg, u32 *agg_peak)
> +{
> +	/*
> +	 * ISO clients need to reserve extra bandwidth up-front because
> +	 * there could high bandwidth pressure during initial fulling-up

"filling of the client's FIFO buffers"

> +	 * of the client's FIFO buffers. Secondly, we need to take into
> +	 * account impurities of the memory subsystem.
> +	 */
> +	if (tag == TEGRA_MC_ICC_TAG_ISO)
> +		peak_bw = tegra_mc_scale_percents(peak_bw, 300);

300% sounds a bit excessive. Do we really need that much?

> +
> +	*agg_avg += avg_bw;
> +	*agg_peak = max(*agg_peak, peak_bw);

I'm not very familiar with ICC, but shouldn't the aggregated peak value
be the sum of the current aggregated peak and the new peak bandwidth?
Currently you're selecting the maximum peak bandwidth across all
clients, so isn't that going to be too small if for whatever reason
multiple clients need peak bandwidth at the same time?

Thierry
Dmitry Osipenko Oct. 27, 2020, 8:22 p.m. UTC | #3
27.10.2020 17:11, Thierry Reding пишет:
...
>> +static int emc_icc_set(struct icc_node *src, struct icc_node *dst)
>> +{
>> +	struct tegra_emc *emc = to_tegra_emc_provider(dst->provider);
>> +	unsigned long long peak_bw = icc_units_to_bps(dst->peak_bw);
>> +	unsigned long long avg_bw = icc_units_to_bps(dst->avg_bw);
>> +	unsigned long long rate = max(avg_bw, peak_bw);
>> +	unsigned int dram_data_bus_width_bytes = 4;
> 
> Perhaps use something shorter for this variable (like dram_bus_width)? Also,
> since it's never modified, perhaps make it const? Or a #define?

It actually could be 2, depending on a board configuration, but I don't
know whether a 16bit bus was ever used in a wild. AFAIK, nv-tegra
kernels assumes 32bit bus for all devices.

...
>> +err_msg:
>> +	dev_err(emc->dev, "failed to initialize ICC: %d\n", err);
> 
> It might be worth duplicating this error message to the failure
> locations so that the exact failure can be identified.

I think it should be better to extend error messages on by as-needed
basis. It's very unlikely that we will ever see this error in practice.
Okay?

...
>> +	 * of the client's FIFO buffers. Secondly, we need to take into
>> +	 * account impurities of the memory subsystem.
>> +	 */
>> +	if (tag == TEGRA_MC_ICC_TAG_ISO)
>> +		peak_bw = tegra_mc_scale_percents(peak_bw, 300);
> 
> 300% sounds a bit excessive. Do we really need that much?

It should be possible to drop it to 150% by tuning priority timers and
hysteresis of the clients, but some of those configurations are placed
within device registers range and we will need a more complicated
bandwidth manager.

The 300% is an overestimation, but it's better to overestimate for the
starter than have an unusable devices. This is what nv-tegra kernel does
as well, btw.

>> +
>> +	*agg_avg += avg_bw;
>> +	*agg_peak = max(*agg_peak, peak_bw);
> 
> I'm not very familiar with ICC, but shouldn't the aggregated peak value
> be the sum of the current aggregated peak and the new peak bandwidth?
> Currently you're selecting the maximum peak bandwidth across all
> clients, so isn't that going to be too small if for whatever reason
> multiple clients need peak bandwidth at the same time?

It's up to the platform drivers to decide how to interpret and use the
avg and peak values.

Please see the above emc_icc_set() which selects max of (avg, peak)
values, but maybe it also should be good to move it out from ICC set()
to the ICC aggregate() callback:

*agg_peak = max(*agg_peak, *agg_avg);

I'll need to take a closer look.
Dmitry Osipenko Oct. 27, 2020, 8:25 p.m. UTC | #4
27.10.2020 13:09, Krzysztof Kozlowski пишет:
...
>> +err_msg:
>> +	dev_err(emc->dev, "failed to initialize ICC: %d\n", err);
> 
> You will print such errors on all existing DTBs. Since it is not a
> failure of probe (it is actually quite expected, normal situation when
> booting with older DTB), let's change it to warning (here and in all
> other places and drivers).

The existing DTBs will be stopped on the error message below.

>> +
>> +	return err;
>> +}
>> +
>> +static int tegra_emc_opp_table_init(struct tegra_emc *emc)
>> +{
>> +	const char *rname = "core";
>> +	int err;
>> +
>> +	/*
>> +	 * Legacy device-trees don't have OPP table and EMC driver isn't
>> +	 * useful in this case.
>> +	 */
>> +	if (!device_property_present(emc->dev, "operating-points-v2")) {
>> +		dev_err(emc->dev, "OPP table not found\n");
>> +		dev_err(emc->dev, "please update your device tree\n");
>> +		return -ENODEV;
>> +	}

The existing DTBs are stopped here.

...
>> +	err = tegra_emc_opp_table_init(emc);
>> +	if (err)
>> +		goto unreg_notifier;
> 
> This looks like the ABI break I mentioned around DT bindings. Are the
> bindings marked as unstable?

This T20 EMC driver wasn't ever used so far at all and this series makes
it useful. Hence I think it should be fine to assume that the T20 EMC
ABI is unstable.
Dmitry Osipenko Oct. 27, 2020, 9:12 p.m. UTC | #5
27.10.2020 23:22, Dmitry Osipenko пишет:
...
>>> +
>>> +	*agg_avg += avg_bw;
>>> +	*agg_peak = max(*agg_peak, peak_bw);
>>
>> I'm not very familiar with ICC, but shouldn't the aggregated peak value
>> be the sum of the current aggregated peak and the new peak bandwidth?
>> Currently you're selecting the maximum peak bandwidth across all
>> clients, so isn't that going to be too small if for whatever reason
>> multiple clients need peak bandwidth at the same time?

The current variant with max-peak selection should be okay since it
takes into account the competing ISO bandwidths of other devices by
overestimating the bandwidth.

For now we have only display ISO clients and it won't be a problem to
tune the algorithm later on if it won't work well for other ISO clients.
diff mbox series

Patch

diff --git a/drivers/memory/tegra/Kconfig b/drivers/memory/tegra/Kconfig
index ff426747cd7d..ac3dfe155505 100644
--- a/drivers/memory/tegra/Kconfig
+++ b/drivers/memory/tegra/Kconfig
@@ -11,7 +11,8 @@  config TEGRA_MC
 config TEGRA20_EMC
 	tristate "NVIDIA Tegra20 External Memory Controller driver"
 	default y
-	depends on ARCH_TEGRA_2x_SOC
+	depends on TEGRA_MC && ARCH_TEGRA_2x_SOC
+	select PM_OPP
 	help
 	  This driver is for the External Memory Controller (EMC) found on
 	  Tegra20 chips. The EMC controls the external DRAM on the board.
diff --git a/drivers/memory/tegra/mc.h b/drivers/memory/tegra/mc.h
index abeb6a2cc36a..531fb4fb7b17 100644
--- a/drivers/memory/tegra/mc.h
+++ b/drivers/memory/tegra/mc.h
@@ -78,6 +78,18 @@ 
 
 #define MC_TIMING_UPDATE				BIT(0)
 
+static inline u32 tegra_mc_scale_percents(u64 val, unsigned int percents)
+{
+	val = val * percents;
+	do_div(val, 100);
+
+	/*
+	 * High freq + high boosting percent + large polling interval are
+	 * resulting in integer overflow when watermarks are calculated.
+	 */
+	return min_t(u64, val, U32_MAX);
+}
+
 static inline u32 mc_readl(struct tegra_mc *mc, unsigned long offset)
 {
 	return readl_relaxed(mc->regs + offset);
diff --git a/drivers/memory/tegra/tegra20-emc.c b/drivers/memory/tegra/tegra20-emc.c
index 34085e26dced..69ccb3fe5b0b 100644
--- a/drivers/memory/tegra/tegra20-emc.c
+++ b/drivers/memory/tegra/tegra20-emc.c
@@ -9,6 +9,7 @@ 
 #include <linux/clk/tegra.h>
 #include <linux/debugfs.h>
 #include <linux/err.h>
+#include <linux/interconnect-provider.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/iopoll.h>
@@ -16,11 +17,15 @@ 
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
+#include <linux/pm_opp.h>
+#include <linux/slab.h>
 #include <linux/sort.h>
 #include <linux/types.h>
 
 #include <soc/tegra/fuse.h>
 
+#include "mc.h"
+
 #define EMC_INTSTATUS				0x000
 #define EMC_INTMASK				0x004
 #define EMC_DBG					0x008
@@ -144,6 +149,9 @@  struct emc_timing {
 
 struct tegra_emc {
 	struct device *dev;
+	struct tegra_mc *mc;
+	struct opp_table *opp_table;
+	struct icc_provider provider;
 	struct notifier_block clk_nb;
 	struct clk *clk;
 	void __iomem *regs;
@@ -658,6 +666,166 @@  static void tegra_emc_debugfs_init(struct tegra_emc *emc)
 			    emc, &tegra_emc_debug_max_rate_fops);
 }
 
+static inline struct tegra_emc *
+to_tegra_emc_provider(struct icc_provider *provider)
+{
+	return container_of(provider, struct tegra_emc, provider);
+}
+
+static struct icc_node_data *
+emc_of_icc_xlate_extended(struct of_phandle_args *spec, void *data)
+{
+	struct icc_provider *provider = data;
+	struct icc_node_data *ndata;
+	struct icc_node *node;
+
+	/* External Memory is the only possible ICC route */
+	list_for_each_entry(node, &provider->nodes, node_list) {
+		if (node->id != TEGRA_ICC_EMEM)
+			continue;
+
+		ndata = kzalloc(sizeof(*ndata), GFP_KERNEL);
+		if (!ndata)
+			return ERR_PTR(-ENOMEM);
+
+		/*
+		 * SRC and DST nodes should have matching TAG in order to have
+		 * it set by default for a requested path.
+		 */
+		ndata->tag = TEGRA_MC_ICC_TAG_ISO;
+		ndata->node = node;
+
+		return ndata;
+	}
+
+	return ERR_PTR(-EINVAL);
+}
+
+static int emc_icc_set(struct icc_node *src, struct icc_node *dst)
+{
+	struct tegra_emc *emc = to_tegra_emc_provider(dst->provider);
+	unsigned long long peak_bw = icc_units_to_bps(dst->peak_bw);
+	unsigned long long avg_bw = icc_units_to_bps(dst->avg_bw);
+	unsigned long long rate = max(avg_bw, peak_bw);
+	unsigned int dram_data_bus_width_bytes = 4;
+	long rounded_rate;
+	int err;
+
+	/*
+	 * Tegra20 EMC runs on x2 clock rate of SDRAM bus because DDR data
+	 * is sampled on both clock edges. This means that EMC clock rate
+	 * equals to the peak data rate.
+	 */
+	do_div(rate, dram_data_bus_width_bytes);
+	rate = min_t(u64, rate, U32_MAX);
+
+	rounded_rate = emc_round_rate(rate, 0, U32_MAX, emc);
+	if (rounded_rate < 0)
+		return rounded_rate;
+
+	err = dev_pm_opp_set_rate(emc->dev, rounded_rate);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int tegra_emc_interconnect_init(struct tegra_emc *emc)
+{
+	const struct tegra_mc_soc *soc;
+	struct icc_node *node;
+	int err;
+
+	emc->mc = devm_tegra_get_memory_controller(emc->dev);
+	if (IS_ERR(emc->mc))
+		return PTR_ERR(emc->mc);
+
+	soc = emc->mc->soc;
+
+	emc->provider.dev = emc->dev;
+	emc->provider.set = emc_icc_set;
+	emc->provider.data = &emc->provider;
+	emc->provider.aggregate = soc->icc_ops->aggregate;
+	emc->provider.xlate_extended = emc_of_icc_xlate_extended;
+
+	err = icc_provider_add(&emc->provider);
+	if (err)
+		goto err_msg;
+
+	/* create External Memory Controller node */
+	node = icc_node_create(TEGRA_ICC_EMC);
+	err = PTR_ERR_OR_ZERO(node);
+	if (err)
+		goto del_provider;
+
+	node->name = "External Memory Controller";
+	icc_node_add(node, &emc->provider);
+
+	/* link External Memory Controller to External Memory (DRAM) */
+	err = icc_link_create(node, TEGRA_ICC_EMEM);
+	if (err)
+		goto remove_nodes;
+
+	/* create External Memory node */
+	node = icc_node_create(TEGRA_ICC_EMEM);
+	err = PTR_ERR_OR_ZERO(node);
+	if (err)
+		goto remove_nodes;
+
+	node->name = "External Memory (DRAM)";
+	icc_node_add(node, &emc->provider);
+
+	return 0;
+
+remove_nodes:
+	icc_nodes_remove(&emc->provider);
+del_provider:
+	icc_provider_del(&emc->provider);
+err_msg:
+	dev_err(emc->dev, "failed to initialize ICC: %d\n", err);
+
+	return err;
+}
+
+static int tegra_emc_opp_table_init(struct tegra_emc *emc)
+{
+	const char *rname = "core";
+	int err;
+
+	/*
+	 * Legacy device-trees don't have OPP table and EMC driver isn't
+	 * useful in this case.
+	 */
+	if (!device_property_present(emc->dev, "operating-points-v2")) {
+		dev_err(emc->dev, "OPP table not found\n");
+		dev_err(emc->dev, "please update your device tree\n");
+		return -ENODEV;
+	}
+
+	/* voltage scaling is optional */
+	if (device_property_present(emc->dev, "core-supply"))
+		emc->opp_table = dev_pm_opp_set_regulators(emc->dev, &rname, 1);
+	else
+		emc->opp_table = dev_pm_opp_get_opp_table(emc->dev);
+
+	if (IS_ERR(emc->opp_table))
+		return dev_err_probe(emc->dev, PTR_ERR(emc->opp_table),
+				     "failed to prepare OPP table\n");
+
+	err = dev_pm_opp_of_add_table(emc->dev);
+	if (err) {
+		dev_err(emc->dev, "failed to add OPP table: %d\n", err);
+		goto put_table;
+	}
+
+	return 0;
+
+put_table:
+	dev_pm_opp_put_opp_table(emc->opp_table);
+
+	return err;
+}
+
 static int tegra_emc_probe(struct platform_device *pdev)
 {
 	struct device_node *np;
@@ -717,8 +885,13 @@  static int tegra_emc_probe(struct platform_device *pdev)
 		goto unset_cb;
 	}
 
+	err = tegra_emc_opp_table_init(emc);
+	if (err)
+		goto unreg_notifier;
+
 	platform_set_drvdata(pdev, emc);
 	tegra_emc_debugfs_init(emc);
+	tegra_emc_interconnect_init(emc);
 
 	/*
 	 * Don't allow the kernel module to be unloaded. Unloading adds some
@@ -729,6 +902,8 @@  static int tegra_emc_probe(struct platform_device *pdev)
 
 	return 0;
 
+unreg_notifier:
+	clk_notifier_unregister(emc->clk, &emc->clk_nb);
 unset_cb:
 	tegra20_clk_set_emc_round_callback(NULL, NULL);
 
@@ -747,6 +922,7 @@  static struct platform_driver tegra_emc_driver = {
 		.name = "tegra20-emc",
 		.of_match_table = tegra_emc_of_match,
 		.suppress_bind_attrs = true,
+		.sync_state = icc_sync_state,
 	},
 };
 module_platform_driver(tegra_emc_driver);
diff --git a/drivers/memory/tegra/tegra20.c b/drivers/memory/tegra/tegra20.c
index a8098bff91d9..5127e8e8250f 100644
--- a/drivers/memory/tegra/tegra20.c
+++ b/drivers/memory/tegra/tegra20.c
@@ -280,6 +280,39 @@  static const struct tegra_mc_reset_ops tegra20_mc_reset_ops = {
 	.reset_status = tegra20_mc_reset_status,
 };
 
+static int tegra20_mc_icc_set(struct icc_node *src, struct icc_node *dst)
+{
+	/*
+	 * Technically, it should be possible to tune arbitration knobs here,
+	 * but the default values are known to work well on all devices.
+	 * Hence nothing to do here so far.
+	 */
+	return 0;
+}
+
+static int tegra20_mc_icc_aggreate(struct icc_node *node, u32 tag, u32 avg_bw,
+				   u32 peak_bw, u32 *agg_avg, u32 *agg_peak)
+{
+	/*
+	 * ISO clients need to reserve extra bandwidth up-front because
+	 * there could high bandwidth pressure during initial fulling-up
+	 * of the client's FIFO buffers. Secondly, we need to take into
+	 * account impurities of the memory subsystem.
+	 */
+	if (tag == TEGRA_MC_ICC_TAG_ISO)
+		peak_bw = tegra_mc_scale_percents(peak_bw, 300);
+
+	*agg_avg += avg_bw;
+	*agg_peak = max(*agg_peak, peak_bw);
+
+	return 0;
+}
+
+static const struct tegra_mc_icc_ops tegra20_mc_icc_ops = {
+	.aggregate = tegra20_mc_icc_aggreate,
+	.set = tegra20_mc_icc_set,
+};
+
 const struct tegra_mc_soc tegra20_mc_soc = {
 	.clients = tegra20_mc_clients,
 	.num_clients = ARRAY_SIZE(tegra20_mc_clients),
@@ -290,4 +323,5 @@  const struct tegra_mc_soc tegra20_mc_soc = {
 	.reset_ops = &tegra20_mc_reset_ops,
 	.resets = tegra20_mc_resets,
 	.num_resets = ARRAY_SIZE(tegra20_mc_resets),
+	.icc_ops = &tegra20_mc_icc_ops,
 };