diff mbox series

[RFC,09/11] devfreq: exynos-bus: Add interconnect functionality to exynos-bus

Message ID 20190723122016.30279-10-a.swigon@partner.samsung.com (mailing list archive)
State Not Applicable
Headers show
Series Simple QoS for exynos-bus driver using interconnect | expand

Commit Message

Artur Świgoń July 23, 2019, 12:20 p.m. UTC
This patch adds interconnect functionality to the exynos-bus devfreq
driver.

The SoC topology is a graph (or, more specifically, a tree) and most of its
edges are taken from the devfreq parent-child hierarchy (cf.
Documentation/devicetree/bindings/devfreq/exynos-bus.txt). The previous
patch adds missing edges to the DT (under the name 'parent'). Due to
unspecified relative probing order, -EPROBE_DEFER may be propagated to
guarantee that a child is probed before its parent.

Each bus is now an interconnect provider and an interconnect node as well
(cf. Documentation/interconnect/interconnect.rst), i.e. every bus registers
itself as a node. Node IDs are not hardcoded but rather assigned at
runtime, in probing order (subject to the above-mentioned exception
regarding relative order). This approach allows for using this driver with
various Exynos SoCs.

The devfreq target() callback provided by exynos-bus now selects either the
frequency calculated by the devfreq governor or the frequency requested via
the interconnect API for the given node, whichever is higher.

Please note that it is not an error when CONFIG_INTERCONNECT is 'n', in
which case all interconnect API functions are no-op.

Signed-off-by: Artur Świgoń <a.swigon@partner.samsung.com>
---
 drivers/devfreq/exynos-bus.c | 145 +++++++++++++++++++++++++++++++++++
 1 file changed, 145 insertions(+)

Comments

Krzysztof Kozlowski July 24, 2019, 6:36 p.m. UTC | #1
On Tue, Jul 23, 2019 at 02:20:14PM +0200, Artur Świgoń wrote:
> This patch adds interconnect functionality to the exynos-bus devfreq
> driver.
> 
> The SoC topology is a graph (or, more specifically, a tree) and most of its
> edges are taken from the devfreq parent-child hierarchy (cf.
> Documentation/devicetree/bindings/devfreq/exynos-bus.txt). The previous
> patch adds missing edges to the DT (under the name 'parent'). Due to

Do not refer to DT patches. They will come through different tree so
"previous" will not be correct anymore. You mentioned dependencies in
cover letter so it is sufficient.

> unspecified relative probing order, -EPROBE_DEFER may be propagated to
> guarantee that a child is probed before its parent.
> 
> Each bus is now an interconnect provider and an interconnect node as well
> (cf. Documentation/interconnect/interconnect.rst), i.e. every bus registers
> itself as a node. Node IDs are not hardcoded but rather assigned at
> runtime, in probing order (subject to the above-mentioned exception
> regarding relative order). This approach allows for using this driver with
> various Exynos SoCs.
> 
> The devfreq target() callback provided by exynos-bus now selects either the
> frequency calculated by the devfreq governor or the frequency requested via
> the interconnect API for the given node, whichever is higher.
> 
> Please note that it is not an error when CONFIG_INTERCONNECT is 'n', in
> which case all interconnect API functions are no-op.
> 
> Signed-off-by: Artur Świgoń <a.swigon@partner.samsung.com>
> ---
>  drivers/devfreq/exynos-bus.c | 145 +++++++++++++++++++++++++++++++++++
>  1 file changed, 145 insertions(+)
> 
> diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c
> index 412511ca7703..12fb7c84ae50 100644
> --- a/drivers/devfreq/exynos-bus.c
> +++ b/drivers/devfreq/exynos-bus.c
> @@ -14,6 +14,7 @@
>  #include <linux/devfreq-event.h>
>  #include <linux/device.h>
>  #include <linux/export.h>
> +#include <linux/interconnect-provider.h>
>  #include <linux/module.h>
>  #include <linux/of.h>
>  #include <linux/pm_opp.h>
> @@ -23,6 +24,8 @@
>  #define DEFAULT_SATURATION_RATIO	40
>  #define DEFAULT_VOLTAGE_TOLERANCE	2
>  
> +#define icc_units_to_hz(x) ((x) * 1000UL / 8)
> +
>  struct exynos_bus {
>  	struct device *dev;
>  
> @@ -31,12 +34,17 @@ struct exynos_bus {
>  	unsigned int edev_count;
>  	struct mutex lock;
>  
> +	unsigned long min_freq;
>  	unsigned long curr_freq;
>  
>  	struct regulator *regulator;
>  	struct clk *clk;
>  	unsigned int voltage_tolerance;
>  	unsigned int ratio;
> +
> +	/* One provider per bus, one node per provider */
> +	struct icc_provider provider;
> +	struct icc_node *node;
>  };
>  
>  /*
> @@ -61,6 +69,13 @@ exynos_bus_ops_edev(enable_edev);
>  exynos_bus_ops_edev(disable_edev);
>  exynos_bus_ops_edev(set_event);
>  
> +static int exynos_bus_next_id(void)
> +{
> +	static int exynos_bus_node_id;
> +
> +	return exynos_bus_node_id++;

This does not look robust. Use IDR for IDs. 

> +}
> +
>  static int exynos_bus_get_event(struct exynos_bus *bus,
>  				struct devfreq_event_data *edata)
>  {
> @@ -98,6 +113,8 @@ static int exynos_bus_target(struct device *dev, unsigned long *freq, u32 flags)
>  	unsigned long old_freq, new_freq, new_volt, tol;
>  	int ret = 0;
>  
> +	*freq = max(*freq, bus->min_freq);
> +
>  	/* Get new opp-bus instance according to new bus clock */
>  	new_opp = devfreq_recommended_opp(dev, freq, flags);
>  	if (IS_ERR(new_opp)) {
> @@ -208,6 +225,8 @@ static int exynos_bus_passive_target(struct device *dev, unsigned long *freq,
>  	unsigned long old_freq, new_freq;
>  	int ret = 0;
>  
> +	*freq = max(*freq, bus->min_freq);
> +
>  	/* Get new opp-bus instance according to new bus clock */
>  	new_opp = devfreq_recommended_opp(dev, freq, flags);
>  	if (IS_ERR(new_opp)) {
> @@ -251,6 +270,35 @@ static void exynos_bus_passive_exit(struct device *dev)
>  	clk_disable_unprepare(bus->clk);
>  }
>  
> +static int exynos_bus_icc_set(struct icc_node *src, struct icc_node *dst)
> +{
> +	struct exynos_bus *src_bus = src->data, *dst_bus = dst->data;
> +
> +	src_bus->min_freq = icc_units_to_hz(src->peak_bw);
> +	dst_bus->min_freq = icc_units_to_hz(dst->peak_bw);
> +
> +	return 0;
> +}
> +
> +static int exynos_bus_icc_aggregate(struct icc_node *node, u32 avg_bw,
> +				    u32 peak_bw, u32 *agg_avg, u32 *agg_peak)
> +{
> +	*agg_peak = *agg_avg = peak_bw;
> +
> +	return 0;
> +}
> +
> +static struct icc_node *exynos_bus_icc_xlate(struct of_phandle_args *spec,
> +					     void *data)
> +{
> +	struct exynos_bus *bus = data;
> +
> +	if (spec->np != bus->dev->of_node)
> +		return ERR_PTR(-EINVAL);
> +
> +	return bus->node;
> +}
> +
>  static int exynos_bus_parent_parse_of(struct device_node *np,
>  					struct exynos_bus *bus)
>  {
> @@ -469,6 +517,95 @@ static int exynos_bus_profile_init_passive(struct exynos_bus *bus,
>  	return ret;
>  }
>  
> +static int exynos_bus_icc_connect(struct exynos_bus *bus)
> +{
> +	struct device_node *np = bus->dev->of_node;
> +	struct devfreq *parent_devfreq;
> +	struct icc_node *parent_node = NULL;
> +	struct of_phandle_args args;
> +	int ret = 0;
> +
> +	parent_devfreq = devfreq_get_devfreq_by_phandle(bus->dev, 0);
> +	if (!IS_ERR(parent_devfreq)) {
> +		struct exynos_bus *parent_bus;

What if someone unbinds this parent devfreq? I guess everything in
devfreq starts exploding... however it's not the problem of this patch.

Do you also need suspend/resume order (device links)? I guess the other
side, e.g.  mixer, should resume before the bus?

> +
> +		parent_bus = dev_get_drvdata(parent_devfreq->dev.parent);
> +		parent_node = parent_bus->node;
> +	} else {
> +		/* Look for parent in DT */
> +		int num = of_count_phandle_with_args(np, "parent",
> +						     "#interconnect-cells");
> +		if (num != 1)

You will return here 0 but isn't it an error?

Best regards,
Krzysztof
Georgi Djakov July 26, 2019, 8:05 a.m. UTC | #2
Hi Artur,

On 7/23/19 15:20, Artur Świgoń wrote:
> This patch adds interconnect functionality to the exynos-bus devfreq
> driver.
> 
> The SoC topology is a graph (or, more specifically, a tree) and most of its
> edges are taken from the devfreq parent-child hierarchy (cf.
> Documentation/devicetree/bindings/devfreq/exynos-bus.txt). The previous
> patch adds missing edges to the DT (under the name 'parent'). Due to
> unspecified relative probing order, -EPROBE_DEFER may be propagated to
> guarantee that a child is probed before its parent.
> 
> Each bus is now an interconnect provider and an interconnect node as well
> (cf. Documentation/interconnect/interconnect.rst), i.e. every bus registers
> itself as a node. Node IDs are not hardcoded but rather assigned at
> runtime, in probing order (subject to the above-mentioned exception
> regarding relative order). This approach allows for using this driver with
> various Exynos SoCs.

I am not familiar with the Exynos bus topology, but it seems to me that it's not
represented correctly. An interconnect provider with just a single node (port)
is odd. I would expect that each provider consists of multiple master and slave
nodes. This data would be used by a framework to understand what are the links
and how the traffic flows between the IP blocks and through which buses.

> The devfreq target() callback provided by exynos-bus now selects either the
> frequency calculated by the devfreq governor or the frequency requested via
> the interconnect API for the given node, whichever is higher.

This completely makes sense. We just need to be sure that the interconnect
framework is used correctly.

Thanks,
Georgi
Chanwoo Choi July 29, 2019, 1:52 a.m. UTC | #3
Hi,

On 19. 7. 23. 오후 9:20, Artur Świgoń wrote:
> This patch adds interconnect functionality to the exynos-bus devfreq
> driver.
> 
> The SoC topology is a graph (or, more specifically, a tree) and most of its
> edges are taken from the devfreq parent-child hierarchy (cf.
> Documentation/devicetree/bindings/devfreq/exynos-bus.txt). The previous
> patch adds missing edges to the DT (under the name 'parent'). Due to
> unspecified relative probing order, -EPROBE_DEFER may be propagated to
> guarantee that a child is probed before its parent.
> 
> Each bus is now an interconnect provider and an interconnect node as well
> (cf. Documentation/interconnect/interconnect.rst), i.e. every bus registers
> itself as a node. Node IDs are not hardcoded but rather assigned at
> runtime, in probing order (subject to the above-mentioned exception
> regarding relative order). This approach allows for using this driver with
> various Exynos SoCs.
> 
> The devfreq target() callback provided by exynos-bus now selects either the
> frequency calculated by the devfreq governor or the frequency requested via
> the interconnect API for the given node, whichever is higher.

Basically, I agree to support the QoS requirement between devices.
But, I think that need to consider the multiple cases.


1. When changing the devfreq governor by user,
For example of the connection between bus_dmc/leftbus/display on patch8,
there are possible multiple cases with various devfreq governor
which is changed on the runtime by user through sysfs interface.

If users changes the devfreq governor as following:
Before,
- bus_dmc (simple_ondemand, available frequency 100/200/300/400 MHz)
--> bus_leftbus(simple_ondemand, available frequency 100/200/300/400 MHz)
----> bus_display(passive)

After changed governor of bus_dmc,
if the min_freq by interconnect requirement is 400Mhz,
- bus_dmc (powersave) : min_freq and max_freq and cur_freq is 100MHz
--> bus_leftbus(simple_ondemand) : cur_freq is 400Mhz
----> bus_display(passive)

The final frequency is 400MHz of bus_dmc
even if the min_freq/max_freq/cur_freq is 100MHz.
It cannot show the correct min_freq/max_freq through
devfreq sysfs interface.


2. When disabling the some frequency by devfreq-thermal throttling,
This patch checks the min_freq of interconnect requirement
in the exynos_bus_target() and exynos_bus_passive_target().
Also, it cannot show the correct min_freq/max_freq through
devfreq sysfs interface.

For example of bus_dmc bus,
- The available frequencies are 100MHz, 200MHz, 300MHz, 400MHz
- Disable 400MHz by devfreq-thermal throttling 
- min_freq is 100MHz
- max_freq is 300MHz
- min_freq of interconnect is 400MHz

In result, the final frequency is 400MHz by exynos_bus_target()
There are no problem for working. But, the user cannot know
reason why cur_freq is 400MHz even if max_freq is 300MHz.

Basically, update_devfreq() considers the all constraints
of min_freq/max_freq to decide the proper target frequency.


3.
I think that the exynos_bus_passive_target() is used for devfreq device
using 'passive' governor. The frequency already depends on the parent device.

If already the parent devfreq device like bus_leftbus consider
the minimum frequency of QoS requirement like interconnect,
it is not necessary. The next frequency of devfreq device
with 'passive' governor, it will apply the QoS requirement
without any additional code.

> 
> Please note that it is not an error when CONFIG_INTERCONNECT is 'n', in
> which case all interconnect API functions are no-op.
> 
> Signed-off-by: Artur Świgoń <a.swigon@partner.samsung.com>
> ---
>  drivers/devfreq/exynos-bus.c | 145 +++++++++++++++++++++++++++++++++++
>  1 file changed, 145 insertions(+)
> 
> diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c
> index 412511ca7703..12fb7c84ae50 100644
> --- a/drivers/devfreq/exynos-bus.c
> +++ b/drivers/devfreq/exynos-bus.c
> @@ -14,6 +14,7 @@
>  #include <linux/devfreq-event.h>
>  #include <linux/device.h>
>  #include <linux/export.h>
> +#include <linux/interconnect-provider.h>
>  #include <linux/module.h>
>  #include <linux/of.h>
>  #include <linux/pm_opp.h>
> @@ -23,6 +24,8 @@
>  #define DEFAULT_SATURATION_RATIO	40
>  #define DEFAULT_VOLTAGE_TOLERANCE	2
>  
> +#define icc_units_to_hz(x) ((x) * 1000UL / 8)
> +
>  struct exynos_bus {
>  	struct device *dev;
>  
> @@ -31,12 +34,17 @@ struct exynos_bus {
>  	unsigned int edev_count;
>  	struct mutex lock;
>  
> +	unsigned long min_freq;
>  	unsigned long curr_freq;
>  
>  	struct regulator *regulator;
>  	struct clk *clk;
>  	unsigned int voltage_tolerance;
>  	unsigned int ratio;
> +
> +	/* One provider per bus, one node per provider */
> +	struct icc_provider provider;
> +	struct icc_node *node;
>  };
>  
>  /*
> @@ -61,6 +69,13 @@ exynos_bus_ops_edev(enable_edev);
>  exynos_bus_ops_edev(disable_edev);
>  exynos_bus_ops_edev(set_event);
>  
> +static int exynos_bus_next_id(void)
> +{
> +	static int exynos_bus_node_id;
> +
> +	return exynos_bus_node_id++;
> +}
> +
>  static int exynos_bus_get_event(struct exynos_bus *bus,
>  				struct devfreq_event_data *edata)
>  {
> @@ -98,6 +113,8 @@ static int exynos_bus_target(struct device *dev, unsigned long *freq, u32 flags)
>  	unsigned long old_freq, new_freq, new_volt, tol;
>  	int ret = 0;
>  
> +	*freq = max(*freq, bus->min_freq);
> +
>  	/* Get new opp-bus instance according to new bus clock */
>  	new_opp = devfreq_recommended_opp(dev, freq, flags);
>  	if (IS_ERR(new_opp)) {
> @@ -208,6 +225,8 @@ static int exynos_bus_passive_target(struct device *dev, unsigned long *freq,
>  	unsigned long old_freq, new_freq;
>  	int ret = 0;
>  
> +	*freq = max(*freq, bus->min_freq);
> +
>  	/* Get new opp-bus instance according to new bus clock */
>  	new_opp = devfreq_recommended_opp(dev, freq, flags);
>  	if (IS_ERR(new_opp)) {
> @@ -251,6 +270,35 @@ static void exynos_bus_passive_exit(struct device *dev)
>  	clk_disable_unprepare(bus->clk);
>  }
>  
> +static int exynos_bus_icc_set(struct icc_node *src, struct icc_node *dst)
> +{
> +	struct exynos_bus *src_bus = src->data, *dst_bus = dst->data;
> +
> +	src_bus->min_freq = icc_units_to_hz(src->peak_bw);
> +	dst_bus->min_freq = icc_units_to_hz(dst->peak_bw);
> +
> +	return 0;
> +}
> +
> +static int exynos_bus_icc_aggregate(struct icc_node *node, u32 avg_bw,
> +				    u32 peak_bw, u32 *agg_avg, u32 *agg_peak)
> +{
> +	*agg_peak = *agg_avg = peak_bw;
> +
> +	return 0;
> +}
> +
> +static struct icc_node *exynos_bus_icc_xlate(struct of_phandle_args *spec,
> +					     void *data)
> +{
> +	struct exynos_bus *bus = data;
> +
> +	if (spec->np != bus->dev->of_node)
> +		return ERR_PTR(-EINVAL);
> +
> +	return bus->node;
> +}
> +
>  static int exynos_bus_parent_parse_of(struct device_node *np,
>  					struct exynos_bus *bus)
>  {
> @@ -469,6 +517,95 @@ static int exynos_bus_profile_init_passive(struct exynos_bus *bus,
>  	return ret;
>  }
>  
> +static int exynos_bus_icc_connect(struct exynos_bus *bus)
> +{
> +	struct device_node *np = bus->dev->of_node;
> +	struct devfreq *parent_devfreq;
> +	struct icc_node *parent_node = NULL;
> +	struct of_phandle_args args;
> +	int ret = 0;
> +
> +	parent_devfreq = devfreq_get_devfreq_by_phandle(bus->dev, 0);
> +	if (!IS_ERR(parent_devfreq)) {
> +		struct exynos_bus *parent_bus;
> +
> +		parent_bus = dev_get_drvdata(parent_devfreq->dev.parent);
> +		parent_node = parent_bus->node;
> +	} else {
> +		/* Look for parent in DT */
> +		int num = of_count_phandle_with_args(np, "parent",
> +						     "#interconnect-cells");
> +		if (num != 1)
> +			goto out;
> +
> +		ret = of_parse_phandle_with_args(np, "parent",
> +						 "#interconnect-cells",
> +						 0, &args);
> +		if (ret < 0)
> +			goto out;
> +
> +		of_node_put(args.np);
> +
> +		parent_node = of_icc_get_from_provider(&args);
> +		if (IS_ERR(parent_node)) {
> +			/* May be -EPROBE_DEFER */
> +			ret = PTR_ERR(parent_node);
> +			goto out;
> +		}
> +	}
> +
> +	ret = icc_link_create(bus->node, parent_node->id);
> +
> +out:
> +	return ret;
> +}
> +
> +static int exynos_bus_icc_init(struct exynos_bus *bus)
> +{
> +	struct device *dev = bus->dev;
> +	struct icc_provider *provider = &bus->provider;
> +	struct icc_node *node;
> +	int id, ret;
> +
> +	/* Initialize the interconnect provider */
> +	provider->set = exynos_bus_icc_set;
> +	provider->aggregate = exynos_bus_icc_aggregate;
> +	provider->xlate = exynos_bus_icc_xlate;
> +	provider->dev = dev;
> +	provider->data = bus;
> +
> +	ret = icc_provider_add(provider);
> +	if (ret < 0)
> +		goto out;
> +
> +	id = exynos_bus_next_id();
> +	node = icc_node_create(id);
> +	if (IS_ERR(node)) {
> +		ret = PTR_ERR(node);
> +		goto err_node;
> +	}
> +
> +	bus->node = node;
> +	node->name = dev->of_node->name;
> +	node->data = bus;
> +	icc_node_add(node, provider);
> +
> +	ret = exynos_bus_icc_connect(bus);
> +	if (ret < 0)
> +		goto err_connect;
> +
> +out:
> +	return ret;
> +
> +err_connect:
> +	icc_node_del(node);
> +	icc_node_destroy(id);
> +err_node:
> +	icc_provider_del(provider);
> +
> +	return ret;
> +}
> +
>  static int exynos_bus_probe(struct platform_device *pdev)
>  {
>  	struct device *dev = &pdev->dev;
> @@ -517,6 +654,14 @@ static int exynos_bus_probe(struct platform_device *pdev)
>  			goto err;
>  	}
>  
> +	/*
> +	 * Initialize interconnect provider. A return value of -ENOTSUPP means
> +	 * that CONFIG_INTERCONNECT is disabled.
> +	 */
> +	ret = exynos_bus_icc_init(bus);
> +	if (ret < 0 && ret != -ENOTSUPP)
> +		goto err;
> +
>  	max_state = bus->devfreq->profile->max_state;
>  	min_freq = (bus->devfreq->profile->freq_table[0] / 1000);
>  	max_freq = (bus->devfreq->profile->freq_table[max_state - 1] / 1000);
>
Artur Świgoń July 31, 2019, 1:01 p.m. UTC | #4
On Wed, 2019-07-24 at 20:36 +0200, Krzysztof Kozlowski wrote:
> On Tue, Jul 23, 2019 at 02:20:14PM +0200, Artur Świgoń wrote:
> > This patch adds interconnect functionality to the exynos-bus devfreq
> > driver.
> > 
> > The SoC topology is a graph (or, more specifically, a tree) and most of its
> > edges are taken from the devfreq parent-child hierarchy (cf.
> > Documentation/devicetree/bindings/devfreq/exynos-bus.txt). The previous
> > patch adds missing edges to the DT (under the name 'parent'). Due to
> 
> Do not refer to DT patches. They will come through different tree so
> "previous" will not be correct anymore. You mentioned dependencies in
> cover letter so it is sufficient.

OK.
 
> >  /*
> > @@ -61,6 +69,13 @@ exynos_bus_ops_edev(enable_edev);
> >  exynos_bus_ops_edev(disable_edev);
> >  exynos_bus_ops_edev(set_event);
> >  
> > +static int exynos_bus_next_id(void)
> > +{
> > +	static int exynos_bus_node_id;
> > +
> > +	return exynos_bus_node_id++;
> 
> This does not look robust. Use IDR for IDs. 

OK.

> > +static int exynos_bus_icc_connect(struct exynos_bus *bus)
> > +{
> > +	struct device_node *np = bus->dev->of_node;
> > +	struct devfreq *parent_devfreq;
> > +	struct icc_node *parent_node = NULL;
> > +	struct of_phandle_args args;
> > +	int ret = 0;
> > +
> > +	parent_devfreq = devfreq_get_devfreq_by_phandle(bus->dev, 0);
> > +	if (!IS_ERR(parent_devfreq)) {
> > +		struct exynos_bus *parent_bus;
> 
> What if someone unbinds this parent devfreq? I guess everything in
> devfreq starts exploding... however it's not the problem of this patch.
> 
> Do you also need suspend/resume order (device links)? I guess the other
> side, e.g.  mixer, should resume before the bus?

Actually, I think that the bus (devfreq) should resume before mixer. However,
suspend/resume order is another issue that applies to this driver regardless of
using the interconnect framework, although device links could probably also be
implemented in the interconnect framework itself.

> > +		parent_bus = dev_get_drvdata(parent_devfreq->dev.parent);
> > +		parent_node = parent_bus->node;
> > +	} else {
> > +		/* Look for parent in DT */
> > +		int num = of_count_phandle_with_args(np, "parent",
> > +						     "#interconnect-cells");
> > +		if (num != 1)
> 
> You will return here 0 but isn't it an error?

It is definitely not an error when 'parent' does not exist in DT (for buses that
are parents themselves). I can extend the comment in the code to explicitly
state that.

Best regards,
Artur Świgoń Aug. 1, 2019, 7:59 a.m. UTC | #5
Hi Georgi,

On Fri, 2019-07-26 at 11:05 +0300, Georgi Djakov wrote:
> Hi Artur,
> 
> On 7/23/19 15:20, Artur Świgoń wrote:
> > This patch adds interconnect functionality to the exynos-bus devfreq
> > driver.
> > 
> > The SoC topology is a graph (or, more specifically, a tree) and most of its
> > edges are taken from the devfreq parent-child hierarchy (cf.
> > Documentation/devicetree/bindings/devfreq/exynos-bus.txt). The previous
> > patch adds missing edges to the DT (under the name 'parent'). Due to
> > unspecified relative probing order, -EPROBE_DEFER may be propagated to
> > guarantee that a child is probed before its parent.
> > 
> > Each bus is now an interconnect provider and an interconnect node as well
> > (cf. Documentation/interconnect/interconnect.rst), i.e. every bus registers
> > itself as a node. Node IDs are not hardcoded but rather assigned at
> > runtime, in probing order (subject to the above-mentioned exception
> > regarding relative order). This approach allows for using this driver with
> > various Exynos SoCs.
> 
> I am not familiar with the Exynos bus topology, but it seems to me that it's not
> represented correctly. An interconnect provider with just a single node (port)
> is odd. I would expect that each provider consists of multiple master and slave
> nodes. This data would be used by a framework to understand what are the links
> and how the traffic flows between the IP blocks and through which buses.

To summarize the exynos-bus topology[1] used by the devfreq driver: There are
many data buses for data transfer in Samsung Exynos SoC. Every bus has its own
clock. Buses often share power lines, in which case one of the buses on the
power line is referred to as 'parent' (or as 'devfreq' in the DT). In the
particular case of Exynos4412[1][2], the topology can be expressed as follows:

bus_dmc
-- bus_acp
-- bus_c2c

bus_leftbus
-- bus_rightbus
-- bus_display
-- bus_fsys
-- bus_peri
-- bus_mfc

Where bus_dmc and bus_leftbus probably could be referred to as masters, and the
following indented nodes as slaves. Patch 08/11 of this RFC additionally adds
the following to the DT:

bus_dmc
-- bus_leftbus

Which makes the topology a valid tree.

The exynos-bus concept in devfreq[3] is designed in such a way that every bus is
probed separately as a platform device, and is a largely independent entity.
This RFC proposes an extension to the existing devfreq driver that basically
provides a simple QoS to ensure minimum clock frequency for selected buses
(possibly overriding devfreq governor calculations) using the interconnect
framework.

The hierarchy is modelled in such a way that every bus is an interconnect node.
On the other hand, what is considered an interconnect provider here is quite
arbitrary, but for the reasons mentioned in the above paragraph, this RFC
assumes that every bus is a provider of itself as a node. Using an alternative
singleton provider approach was deemed more complicated since the 'dev' field in
'struct icc_provider' has to be set to something meaningful and we are tied to
the 'samsung,exynos-bus' compatible string in the driver (and multiple instances
of exynos-bus probed in indeterminate relative order).

I'm looking forward to hearing any additional thoughts you may have on this
topic.

Best regards,
Leonard Crestez Aug. 6, 2019, 1:41 p.m. UTC | #6
On 23.07.2019 15:21, Artur Świgoń wrote:

> +static int exynos_bus_icc_aggregate(struct icc_node *node, u32 avg_bw,
> +				    u32 peak_bw, u32 *agg_avg, u32 *agg_peak)
> +{
> +	*agg_peak = *agg_avg = peak_bw;
> +
> +	return 0;
> +}

The only current provider aggregates "avg" with "sum" and "peak" with 
"max", any particular reason to do something different? This function 
doesn't even really do aggregation, if there is a second request for "0" 
then the first request is lost.

I didn't find any docs but my interpretation of avg/peak is that "avg" 
is for constant traffic like a display or VPU pushing pixels and "peak" 
is for variable traffic like networking. I assume devices which make 
"peak" requests are aggregated with max because they're not expected to 
all max-out together.

In PATCH 11 you're making a bandwidth request based on resolution, that 
traffic is constant and guaranteed to happend while the display is on so 
it would make sense to request it as an "avg" and aggregate it with "sum".

--
Regards,
Leonard
Georgi Djakov Aug. 7, 2019, 2:21 p.m. UTC | #7
Hi Artur,

On 8/1/19 10:59, Artur Świgoń wrote:
> Hi Georgi,
> 
> On Fri, 2019-07-26 at 11:05 +0300, Georgi Djakov wrote:
>> Hi Artur,
>>
>> On 7/23/19 15:20, Artur Świgoń wrote:
>>> This patch adds interconnect functionality to the exynos-bus devfreq
>>> driver.
>>>
>>> The SoC topology is a graph (or, more specifically, a tree) and most of its
>>> edges are taken from the devfreq parent-child hierarchy (cf.
>>> Documentation/devicetree/bindings/devfreq/exynos-bus.txt). The previous
>>> patch adds missing edges to the DT (under the name 'parent'). Due to
>>> unspecified relative probing order, -EPROBE_DEFER may be propagated to
>>> guarantee that a child is probed before its parent.
>>>
>>> Each bus is now an interconnect provider and an interconnect node as well
>>> (cf. Documentation/interconnect/interconnect.rst), i.e. every bus registers
>>> itself as a node. Node IDs are not hardcoded but rather assigned at
>>> runtime, in probing order (subject to the above-mentioned exception
>>> regarding relative order). This approach allows for using this driver with
>>> various Exynos SoCs.
>>
>> I am not familiar with the Exynos bus topology, but it seems to me that it's not
>> represented correctly. An interconnect provider with just a single node (port)
>> is odd. I would expect that each provider consists of multiple master and slave
>> nodes. This data would be used by a framework to understand what are the links
>> and how the traffic flows between the IP blocks and through which buses.
> 
> To summarize the exynos-bus topology[1] used by the devfreq driver: There are
> many data buses for data transfer in Samsung Exynos SoC. Every bus has its own
> clock. Buses often share power lines, in which case one of the buses on the
> power line is referred to as 'parent' (or as 'devfreq' in the DT). In the
> particular case of Exynos4412[1][2], the topology can be expressed as follows:
> 
> bus_dmc
> -- bus_acp
> -- bus_c2c
> 
> bus_leftbus
> -- bus_rightbus
> -- bus_display
> -- bus_fsys
> -- bus_peri
> -- bus_mfc
> 
> Where bus_dmc and bus_leftbus probably could be referred to as masters, and the
> following indented nodes as slaves. Patch 08/11 of this RFC additionally adds
> the following to the DT:
> 
> bus_dmc
> -- bus_leftbus
> 
> Which makes the topology a valid tree.
> 
> The exynos-bus concept in devfreq[3] is designed in such a way that every bus is
> probed separately as a platform device, and is a largely independent entity.
>
> This RFC proposes an extension to the existing devfreq driver that basically
> provides a simple QoS to ensure minimum clock frequency for selected buses
> (possibly overriding devfreq governor calculations) using the interconnect
> framework.
> 
> The hierarchy is modelled in such a way that every bus is an interconnect node.
> On the other hand, what is considered an interconnect provider here is quite
> arbitrary, but for the reasons mentioned in the above paragraph, this RFC
> assumes that every bus is a provider of itself as a node. Using an alternative

IIUC, in case we want to transfer data between the display and the memory
controller, the path would look like this:

display --> bus_display --> bus_leftbus --> bus_dmc --> memory

But the bus_display for example would have not one, but two nodes (ports),
right?  One representing the link to the display controller and another one
representing the link to bus_leftbus? So i think that all the buses should
have at least two nodes, to represent each end of the wire.

> singleton provider approach was deemed more complicated since the 'dev' field in
> 'struct icc_provider' has to be set to something meaningful and we are tied to
> the 'samsung,exynos-bus' compatible string in the driver (and multiple instances
> of exynos-bus probed in indeterminate relative order).
> 

Sure, the rest makes sense to me.

Thanks,
Georgi
Artur Świgoń Aug. 8, 2019, 1:18 p.m. UTC | #8
Hi,

Thank you for your remarks. I will take them into account while preparing RFCv2.

On Mon, 2019-07-29 at 10:52 +0900, Chanwoo Choi wrote:
> Hi,
> 
> On 19. 7. 23. 오후 9:20, Artur Świgoń wrote:
> > This patch adds interconnect functionality to the exynos-bus devfreq
> > driver.
> > 
> > The SoC topology is a graph (or, more specifically, a tree) and most of its
> > edges are taken from the devfreq parent-child hierarchy (cf.
> > Documentation/devicetree/bindings/devfreq/exynos-bus.txt). The previous
> > patch adds missing edges to the DT (under the name 'parent'). Due to
> > unspecified relative probing order, -EPROBE_DEFER may be propagated to
> > guarantee that a child is probed before its parent.
> > 
> > Each bus is now an interconnect provider and an interconnect node as well
> > (cf. Documentation/interconnect/interconnect.rst), i.e. every bus registers
> > itself as a node. Node IDs are not hardcoded but rather assigned at
> > runtime, in probing order (subject to the above-mentioned exception
> > regarding relative order). This approach allows for using this driver with
> > various Exynos SoCs.
> > 
> > The devfreq target() callback provided by exynos-bus now selects either the
> > frequency calculated by the devfreq governor or the frequency requested via
> > the interconnect API for the given node, whichever is higher.
> 
> Basically, I agree to support the QoS requirement between devices.
> But, I think that need to consider the multiple cases.
> 
> 
> 1. When changing the devfreq governor by user,
> For example of the connection between bus_dmc/leftbus/display on patch8,
> there are possible multiple cases with various devfreq governor
> which is changed on the runtime by user through sysfs interface.
> 
> If users changes the devfreq governor as following:
> Before,
> - bus_dmc (simple_ondemand, available frequency 100/200/300/400 MHz)
> --> bus_leftbus(simple_ondemand, available frequency 100/200/300/400 MHz)
> ----> bus_display(passive)
> 
> After changed governor of bus_dmc,
> if the min_freq by interconnect requirement is 400Mhz,
> - bus_dmc (powersave) : min_freq and max_freq and cur_freq is 100MHz
> --> bus_leftbus(simple_ondemand) : cur_freq is 400Mhz
> ----> bus_display(passive)
> 
> The final frequency is 400MHz of bus_dmc
> even if the min_freq/max_freq/cur_freq is 100MHz.
> It cannot show the correct min_freq/max_freq through
> devfreq sysfs interface.
> 
> 
> 2. When disabling the some frequency by devfreq-thermal throttling,
> This patch checks the min_freq of interconnect requirement
> in the exynos_bus_target() and exynos_bus_passive_target().
> Also, it cannot show the correct min_freq/max_freq through
> devfreq sysfs interface.
> 
> For example of bus_dmc bus,
> - The available frequencies are 100MHz, 200MHz, 300MHz, 400MHz
> - Disable 400MHz by devfreq-thermal throttling 
> - min_freq is 100MHz
> - max_freq is 300MHz
> - min_freq of interconnect is 400MHz
> 
> In result, the final frequency is 400MHz by exynos_bus_target()
> There are no problem for working. But, the user cannot know
> reason why cur_freq is 400MHz even if max_freq is 300MHz.
> 
> Basically, update_devfreq() considers the all constraints
> of min_freq/max_freq to decide the proper target frequency.
> 
> 
> 3.
> I think that the exynos_bus_passive_target() is used for devfreq device
> using 'passive' governor. The frequency already depends on the parent device.
> 
> If already the parent devfreq device like bus_leftbus consider
> the minimum frequency of QoS requirement like interconnect,
> it is not necessary. The next frequency of devfreq device
> with 'passive' governor, it will apply the QoS requirement
> without any additional code.
> 
> > 
> > Please note that it is not an error when CONFIG_INTERCONNECT is 'n', in
> > which case all interconnect API functions are no-op.
> > 
> > Signed-off-by: Artur Świgoń <a.swigon@partner.samsung.com>
> > ---
> >  drivers/devfreq/exynos-bus.c | 145 +++++++++++++++++++++++++++++++++++
> >  1 file changed, 145 insertions(+)
> > 
> > diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c
> > index 412511ca7703..12fb7c84ae50 100644
> > --- a/drivers/devfreq/exynos-bus.c
> > +++ b/drivers/devfreq/exynos-bus.c
> > @@ -14,6 +14,7 @@
> >  #include <linux/devfreq-event.h>
> >  #include <linux/device.h>
> >  #include <linux/export.h>
> > +#include <linux/interconnect-provider.h>
> >  #include <linux/module.h>
> >  #include <linux/of.h>
> >  #include <linux/pm_opp.h>
> > @@ -23,6 +24,8 @@
> >  #define DEFAULT_SATURATION_RATIO	40
> >  #define DEFAULT_VOLTAGE_TOLERANCE	2
> >  
> > +#define icc_units_to_hz(x) ((x) * 1000UL / 8)
> > +
> >  struct exynos_bus {
> >  	struct device *dev;
> >  
> > @@ -31,12 +34,17 @@ struct exynos_bus {
> >  	unsigned int edev_count;
> >  	struct mutex lock;
> >  
> > +	unsigned long min_freq;
> >  	unsigned long curr_freq;
> >  
> >  	struct regulator *regulator;
> >  	struct clk *clk;
> >  	unsigned int voltage_tolerance;
> >  	unsigned int ratio;
> > +
> > +	/* One provider per bus, one node per provider */
> > +	struct icc_provider provider;
> > +	struct icc_node *node;
> >  };
> >  
> >  /*
> > @@ -61,6 +69,13 @@ exynos_bus_ops_edev(enable_edev);
> >  exynos_bus_ops_edev(disable_edev);
> >  exynos_bus_ops_edev(set_event);
> >  
> > +static int exynos_bus_next_id(void)
> > +{
> > +	static int exynos_bus_node_id;
> > +
> > +	return exynos_bus_node_id++;
> > +}
> > +
> >  static int exynos_bus_get_event(struct exynos_bus *bus,
> >  				struct devfreq_event_data *edata)
> >  {
> > @@ -98,6 +113,8 @@ static int exynos_bus_target(struct device *dev, unsigned
> > long *freq, u32 flags)
> >  	unsigned long old_freq, new_freq, new_volt, tol;
> >  	int ret = 0;
> >  
> > +	*freq = max(*freq, bus->min_freq);
> > +
> >  	/* Get new opp-bus instance according to new bus clock */
> >  	new_opp = devfreq_recommended_opp(dev, freq, flags);
> >  	if (IS_ERR(new_opp)) {
> > @@ -208,6 +225,8 @@ static int exynos_bus_passive_target(struct device *dev,
> > unsigned long *freq,
> >  	unsigned long old_freq, new_freq;
> >  	int ret = 0;
> >  
> > +	*freq = max(*freq, bus->min_freq);
> > +
> >  	/* Get new opp-bus instance according to new bus clock */
> >  	new_opp = devfreq_recommended_opp(dev, freq, flags);
> >  	if (IS_ERR(new_opp)) {
> > @@ -251,6 +270,35 @@ static void exynos_bus_passive_exit(struct device *dev)
> >  	clk_disable_unprepare(bus->clk);
> >  }
> >  
> > +static int exynos_bus_icc_set(struct icc_node *src, struct icc_node *dst)
> > +{
> > +	struct exynos_bus *src_bus = src->data, *dst_bus = dst->data;
> > +
> > +	src_bus->min_freq = icc_units_to_hz(src->peak_bw);
> > +	dst_bus->min_freq = icc_units_to_hz(dst->peak_bw);
> > +
> > +	return 0;
> > +}
> > +
> > +static int exynos_bus_icc_aggregate(struct icc_node *node, u32 avg_bw,
> > +				    u32 peak_bw, u32 *agg_avg, u32 *agg_peak)
> > +{
> > +	*agg_peak = *agg_avg = peak_bw;
> > +
> > +	return 0;
> > +}
> > +
> > +static struct icc_node *exynos_bus_icc_xlate(struct of_phandle_args *spec,
> > +					     void *data)
> > +{
> > +	struct exynos_bus *bus = data;
> > +
> > +	if (spec->np != bus->dev->of_node)
> > +		return ERR_PTR(-EINVAL);
> > +
> > +	return bus->node;
> > +}
> > +
> >  static int exynos_bus_parent_parse_of(struct device_node *np,
> >  					struct exynos_bus *bus)
> >  {
> > @@ -469,6 +517,95 @@ static int exynos_bus_profile_init_passive(struct
> > exynos_bus *bus,
> >  	return ret;
> >  }
> >  
> > +static int exynos_bus_icc_connect(struct exynos_bus *bus)
> > +{
> > +	struct device_node *np = bus->dev->of_node;
> > +	struct devfreq *parent_devfreq;
> > +	struct icc_node *parent_node = NULL;
> > +	struct of_phandle_args args;
> > +	int ret = 0;
> > +
> > +	parent_devfreq = devfreq_get_devfreq_by_phandle(bus->dev, 0);
> > +	if (!IS_ERR(parent_devfreq)) {
> > +		struct exynos_bus *parent_bus;
> > +
> > +		parent_bus = dev_get_drvdata(parent_devfreq->dev.parent);
> > +		parent_node = parent_bus->node;
> > +	} else {
> > +		/* Look for parent in DT */
> > +		int num = of_count_phandle_with_args(np, "parent",
> > +						     "#interconnect-cells");
> > +		if (num != 1)
> > +			goto out;
> > +
> > +		ret = of_parse_phandle_with_args(np, "parent",
> > +						 "#interconnect-cells",
> > +						 0, &args);
> > +		if (ret < 0)
> > +			goto out;
> > +
> > +		of_node_put(args.np);
> > +
> > +		parent_node = of_icc_get_from_provider(&args);
> > +		if (IS_ERR(parent_node)) {
> > +			/* May be -EPROBE_DEFER */
> > +			ret = PTR_ERR(parent_node);
> > +			goto out;
> > +		}
> > +	}
> > +
> > +	ret = icc_link_create(bus->node, parent_node->id);
> > +
> > +out:
> > +	return ret;
> > +}
> > +
> > +static int exynos_bus_icc_init(struct exynos_bus *bus)
> > +{
> > +	struct device *dev = bus->dev;
> > +	struct icc_provider *provider = &bus->provider;
> > +	struct icc_node *node;
> > +	int id, ret;
> > +
> > +	/* Initialize the interconnect provider */
> > +	provider->set = exynos_bus_icc_set;
> > +	provider->aggregate = exynos_bus_icc_aggregate;
> > +	provider->xlate = exynos_bus_icc_xlate;
> > +	provider->dev = dev;
> > +	provider->data = bus;
> > +
> > +	ret = icc_provider_add(provider);
> > +	if (ret < 0)
> > +		goto out;
> > +
> > +	id = exynos_bus_next_id();
> > +	node = icc_node_create(id);
> > +	if (IS_ERR(node)) {
> > +		ret = PTR_ERR(node);
> > +		goto err_node;
> > +	}
> > +
> > +	bus->node = node;
> > +	node->name = dev->of_node->name;
> > +	node->data = bus;
> > +	icc_node_add(node, provider);
> > +
> > +	ret = exynos_bus_icc_connect(bus);
> > +	if (ret < 0)
> > +		goto err_connect;
> > +
> > +out:
> > +	return ret;
> > +
> > +err_connect:
> > +	icc_node_del(node);
> > +	icc_node_destroy(id);
> > +err_node:
> > +	icc_provider_del(provider);
> > +
> > +	return ret;
> > +}
> > +
> >  static int exynos_bus_probe(struct platform_device *pdev)
> >  {
> >  	struct device *dev = &pdev->dev;
> > @@ -517,6 +654,14 @@ static int exynos_bus_probe(struct platform_device
> > *pdev)
> >  			goto err;
> >  	}
> >  
> > +	/*
> > +	 * Initialize interconnect provider. A return value of -ENOTSUPP means
> > +	 * that CONFIG_INTERCONNECT is disabled.
> > +	 */
> > +	ret = exynos_bus_icc_init(bus);
> > +	if (ret < 0 && ret != -ENOTSUPP)
> > +		goto err;
> > +
> >  	max_state = bus->devfreq->profile->max_state;
> >  	min_freq = (bus->devfreq->profile->freq_table[0] / 1000);
> >  	max_freq = (bus->devfreq->profile->freq_table[max_state - 1] / 1000);
> > 
> 
>
Artur Świgoń Aug. 8, 2019, 1:19 p.m. UTC | #9
Hi,

Thank you for your comments.

On Tue, 2019-08-06 at 13:41 +0000, Leonard Crestez wrote:
> On 23.07.2019 15:21, Artur Świgoń wrote:
> 
> > +static int exynos_bus_icc_aggregate(struct icc_node *node, u32 avg_bw,
> > +				    u32 peak_bw, u32 *agg_avg, u32 *agg_peak)
> > +{
> > +	*agg_peak = *agg_avg = peak_bw;
> > +
> > +	return 0;
> > +}
> 
> The only current provider aggregates "avg" with "sum" and "peak" with 
> "max", any particular reason to do something different? This function 
> doesn't even really do aggregation, if there is a second request for "0" 
> then the first request is lost.

Yes, you're right. I adopted an oversimplified solution for the purpose of this
RFC (please bear in mind that currently only one icc_path is used, so there is
no aggregation anyway).

> I didn't find any docs but my interpretation of avg/peak is that "avg" 
> is for constant traffic like a display or VPU pushing pixels and "peak" 
> is for variable traffic like networking. I assume devices which make 
> "peak" requests are aggregated with max because they're not expected to 
> all max-out together.

That's correct (according to my understanding).

> In PATCH 11 you're making a bandwidth request based on resolution, that 
> traffic is constant and guaranteed to happend while the display is on so 
> it would make sense to request it as an "avg" and aggregate it with "sum".
> 
> --
> Regards,
> Leonard
>
Artur Świgoń Aug. 8, 2019, 1:28 p.m. UTC | #10
Hi,

On Wed, 2019-08-07 at 17:21 +0300, Georgi Djakov wrote:
> Hi Artur,
> 
> On 8/1/19 10:59, Artur Świgoń wrote:
> > Hi Georgi,
> > 
> > On Fri, 2019-07-26 at 11:05 +0300, Georgi Djakov wrote:
> > > Hi Artur,
> > > 
> > > On 7/23/19 15:20, Artur Świgoń wrote:
> > > > This patch adds interconnect functionality to the exynos-bus devfreq
> > > > driver.
> > > > 
> > > > The SoC topology is a graph (or, more specifically, a tree) and most of
> > > > its
> > > > edges are taken from the devfreq parent-child hierarchy (cf.
> > > > Documentation/devicetree/bindings/devfreq/exynos-bus.txt). The previous
> > > > patch adds missing edges to the DT (under the name 'parent'). Due to
> > > > unspecified relative probing order, -EPROBE_DEFER may be propagated to
> > > > guarantee that a child is probed before its parent.
> > > > 
> > > > Each bus is now an interconnect provider and an interconnect node as
> > > > well
> > > > (cf. Documentation/interconnect/interconnect.rst), i.e. every bus
> > > > registers
> > > > itself as a node. Node IDs are not hardcoded but rather assigned at
> > > > runtime, in probing order (subject to the above-mentioned exception
> > > > regarding relative order). This approach allows for using this driver
> > > > with
> > > > various Exynos SoCs.
> > > 
> > > I am not familiar with the Exynos bus topology, but it seems to me that
> > > it's not
> > > represented correctly. An interconnect provider with just a single node
> > > (port)
> > > is odd. I would expect that each provider consists of multiple master and
> > > slave
> > > nodes. This data would be used by a framework to understand what are the
> > > links
> > > and how the traffic flows between the IP blocks and through which buses.
> > 
> > To summarize the exynos-bus topology[1] used by the devfreq driver: There
> > are
> > many data buses for data transfer in Samsung Exynos SoC. Every bus has its
> > own
> > clock. Buses often share power lines, in which case one of the buses on the
> > power line is referred to as 'parent' (or as 'devfreq' in the DT). In the
> > particular case of Exynos4412[1][2], the topology can be expressed as
> > follows:
> > 
> > bus_dmc
> > -- bus_acp
> > -- bus_c2c
> > 
> > bus_leftbus
> > -- bus_rightbus
> > -- bus_display
> > -- bus_fsys
> > -- bus_peri
> > -- bus_mfc
> > 
> > Where bus_dmc and bus_leftbus probably could be referred to as masters, and
> > the
> > following indented nodes as slaves. Patch 08/11 of this RFC additionally
> > adds
> > the following to the DT:
> > 
> > bus_dmc
> > -- bus_leftbus
> > 
> > Which makes the topology a valid tree.
> > 
> > The exynos-bus concept in devfreq[3] is designed in such a way that every
> > bus is
> > probed separately as a platform device, and is a largely independent entity.
> > 
> > This RFC proposes an extension to the existing devfreq driver that basically
> > provides a simple QoS to ensure minimum clock frequency for selected buses
> > (possibly overriding devfreq governor calculations) using the interconnect
> > framework.
> > 
> > The hierarchy is modelled in such a way that every bus is an interconnect
> > node.
> > On the other hand, what is considered an interconnect provider here is quite
> > arbitrary, but for the reasons mentioned in the above paragraph, this RFC
> > assumes that every bus is a provider of itself as a node. Using an
> > alternative
> 
> IIUC, in case we want to transfer data between the display and the memory
> controller, the path would look like this:
> 
> display --> bus_display --> bus_leftbus --> bus_dmc --> memory
> 
> But the bus_display for example would have not one, but two nodes (ports),
> right?  One representing the link to the display controller and another one
> representing the link to bus_leftbus? So i think that all the buses should
> have at least two nodes, to represent each end of the wire.

I do not think we really need that for our simple tree hierarchy. Of course, I
can split every tree node into two nodes/ports (e.g., 'in' for children and
'out' for parent), but neither 'display' nor 'memory' from your diagram above
are registered with the interconnect framework (only buses are). The devfreq
devices used in the driver are virtual anyway.

> > singleton provider approach was deemed more complicated since the 'dev'
> > field in
> > 'struct icc_provider' has to be set to something meaningful and we are tied
> > to
> > the 'samsung,exynos-bus' compatible string in the driver (and multiple
> > instances
> > of exynos-bus probed in indeterminate relative order).
> > 
> 
> Sure, the rest makes sense to me.
> 
> Thanks,
> Georgi

Regards,
Leonard Crestez Aug. 8, 2019, 3 p.m. UTC | #11
On 29.07.2019 04:49, Chanwoo Choi wrote:
> On 19. 7. 23. 오후 9:20, Artur Świgoń wrote:
>> This patch adds interconnect functionality to the exynos-bus devfreq
>> driver.
>>
>> The devfreq target() callback provided by exynos-bus now selects either the
>> frequency calculated by the devfreq governor or the frequency requested via
>> the interconnect API for the given node, whichever is higher.
> 
> Basically, I agree to support the QoS requirement between devices.
> But, I think that need to consider the multiple cases.
> 
> 1. When changing the devfreq governor by user,
> For example of the connection between bus_dmc/leftbus/display on patch8,
> there are possible multiple cases with various devfreq governor
> which is changed on the runtime by user through sysfs interface.
> 
> If users changes the devfreq governor as following:
> Before,
> - bus_dmc (simple_ondemand, available frequency 100/200/300/400 MHz)
> --> bus_leftbus(simple_ondemand, available frequency 100/200/300/400 MHz)
> ----> bus_display(passive)
> 
> After changed governor of bus_dmc,
> if the min_freq by interconnect requirement is 400Mhz,
> - bus_dmc (powersave) : min_freq and max_freq and cur_freq is 100MHz
> --> bus_leftbus(simple_ondemand) : cur_freq is 400Mhz
> ----> bus_display(passive)
> 
> The final frequency is 400MHz of bus_dmc
> even if the min_freq/max_freq/cur_freq is 100MHz.
> It cannot show the correct min_freq/max_freq through
> devfreq sysfs interface.
> 
> 
> 2. When disabling the some frequency by devfreq-thermal throttling,
> This patch checks the min_freq of interconnect requirement
> in the exynos_bus_target() and exynos_bus_passive_target().
> Also, it cannot show the correct min_freq/max_freq through
> devfreq sysfs interface.
> 
> For example of bus_dmc bus,
> - The available frequencies are 100MHz, 200MHz, 300MHz, 400MHz
> - Disable 400MHz by devfreq-thermal throttling
> - min_freq is 100MHz
> - max_freq is 300MHz
> - min_freq of interconnect is 400MHz
> 
> In result, the final frequency is 400MHz by exynos_bus_target()
> There are no problem for working. But, the user cannot know
> reason why cur_freq is 400MHz even if max_freq is 300MHz.
> 
> Basically, update_devfreq() considers the all constraints
> of min_freq/max_freq to decide the proper target frequency.

I think that applying the interconnect min_freq via dev_pm_qos can help 
with many of these concerns: update_devfreq controls all the min/max 
handling, sysfs is accurate and better decisions can be made regarding 
thermal. Enforcing constraints in the core is definitely better.

This wouldn't even be a very big change, you don't need to actually move 
the interconnect code outside of devfreq. Just make every devfreq/icc 
node register a dev_pm_qos_request for itself during registration and 
call dev_pm_qos_update_request inside exynos_bus_icc_set.

See: https://patchwork.kernel.org/patch/11084279/

--
Regards,
Leonard
Chanwoo Choi Aug. 9, 2019, 2:17 a.m. UTC | #12
Hi,

We need to discuss how to change or do refactoring on v2.
Actually, I don't know the your opinion and how to do it on v2.
You have to reply the answer and then after finished the discussion,
I recommend that you would rework and resend the v2 patches.

On 19. 8. 8. 오후 10:18, Artur Świgoń wrote:
> Hi,
> 
> Thank you for your remarks. I will take them into account while preparing RFCv2.
> 
> On Mon, 2019-07-29 at 10:52 +0900, Chanwoo Choi wrote:
>> Hi,
>>
>> On 19. 7. 23. 오후 9:20, Artur Świgoń wrote:
>>> This patch adds interconnect functionality to the exynos-bus devfreq
>>> driver.
>>>
>>> The SoC topology is a graph (or, more specifically, a tree) and most of its
>>> edges are taken from the devfreq parent-child hierarchy (cf.
>>> Documentation/devicetree/bindings/devfreq/exynos-bus.txt). The previous
>>> patch adds missing edges to the DT (under the name 'parent'). Due to
>>> unspecified relative probing order, -EPROBE_DEFER may be propagated to
>>> guarantee that a child is probed before its parent.
>>>
>>> Each bus is now an interconnect provider and an interconnect node as well
>>> (cf. Documentation/interconnect/interconnect.rst), i.e. every bus registers
>>> itself as a node. Node IDs are not hardcoded but rather assigned at
>>> runtime, in probing order (subject to the above-mentioned exception
>>> regarding relative order). This approach allows for using this driver with
>>> various Exynos SoCs.
>>>
>>> The devfreq target() callback provided by exynos-bus now selects either the
>>> frequency calculated by the devfreq governor or the frequency requested via
>>> the interconnect API for the given node, whichever is higher.
>>
>> Basically, I agree to support the QoS requirement between devices.
>> But, I think that need to consider the multiple cases.
>>
>>
>> 1. When changing the devfreq governor by user,
>> For example of the connection between bus_dmc/leftbus/display on patch8,
>> there are possible multiple cases with various devfreq governor
>> which is changed on the runtime by user through sysfs interface.
>>
>> If users changes the devfreq governor as following:
>> Before,
>> - bus_dmc (simple_ondemand, available frequency 100/200/300/400 MHz)
>> --> bus_leftbus(simple_ondemand, available frequency 100/200/300/400 MHz)
>> ----> bus_display(passive)
>>
>> After changed governor of bus_dmc,
>> if the min_freq by interconnect requirement is 400Mhz,
>> - bus_dmc (powersave) : min_freq and max_freq and cur_freq is 100MHz
>> --> bus_leftbus(simple_ondemand) : cur_freq is 400Mhz
>> ----> bus_display(passive)
>>
>> The final frequency is 400MHz of bus_dmc
>> even if the min_freq/max_freq/cur_freq is 100MHz.
>> It cannot show the correct min_freq/max_freq through
>> devfreq sysfs interface.
>>
>>
>> 2. When disabling the some frequency by devfreq-thermal throttling,
>> This patch checks the min_freq of interconnect requirement
>> in the exynos_bus_target() and exynos_bus_passive_target().
>> Also, it cannot show the correct min_freq/max_freq through
>> devfreq sysfs interface.
>>
>> For example of bus_dmc bus,
>> - The available frequencies are 100MHz, 200MHz, 300MHz, 400MHz
>> - Disable 400MHz by devfreq-thermal throttling 
>> - min_freq is 100MHz
>> - max_freq is 300MHz
>> - min_freq of interconnect is 400MHz
>>
>> In result, the final frequency is 400MHz by exynos_bus_target()
>> There are no problem for working. But, the user cannot know
>> reason why cur_freq is 400MHz even if max_freq is 300MHz.
>>
>> Basically, update_devfreq() considers the all constraints
>> of min_freq/max_freq to decide the proper target frequency.
>>
>>
>> 3.
>> I think that the exynos_bus_passive_target() is used for devfreq device
>> using 'passive' governor. The frequency already depends on the parent device.
>>
>> If already the parent devfreq device like bus_leftbus consider
>> the minimum frequency of QoS requirement like interconnect,
>> it is not necessary. The next frequency of devfreq device
>> with 'passive' governor, it will apply the QoS requirement
>> without any additional code.
>>
>>>
>>> Please note that it is not an error when CONFIG_INTERCONNECT is 'n', in
>>> which case all interconnect API functions are no-op.
>>>
>>> Signed-off-by: Artur Świgoń <a.swigon@partner.samsung.com>
>>> ---
>>>  drivers/devfreq/exynos-bus.c | 145 +++++++++++++++++++++++++++++++++++
>>>  1 file changed, 145 insertions(+)
>>>
>>> diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c
>>> index 412511ca7703..12fb7c84ae50 100644
>>> --- a/drivers/devfreq/exynos-bus.c
>>> +++ b/drivers/devfreq/exynos-bus.c
>>> @@ -14,6 +14,7 @@
>>>  #include <linux/devfreq-event.h>
>>>  #include <linux/device.h>
>>>  #include <linux/export.h>
>>> +#include <linux/interconnect-provider.h>
>>>  #include <linux/module.h>
>>>  #include <linux/of.h>
>>>  #include <linux/pm_opp.h>
>>> @@ -23,6 +24,8 @@
>>>  #define DEFAULT_SATURATION_RATIO	40
>>>  #define DEFAULT_VOLTAGE_TOLERANCE	2
>>>  
>>> +#define icc_units_to_hz(x) ((x) * 1000UL / 8)
>>> +
>>>  struct exynos_bus {
>>>  	struct device *dev;
>>>  
>>> @@ -31,12 +34,17 @@ struct exynos_bus {
>>>  	unsigned int edev_count;
>>>  	struct mutex lock;
>>>  
>>> +	unsigned long min_freq;
>>>  	unsigned long curr_freq;
>>>  
>>>  	struct regulator *regulator;
>>>  	struct clk *clk;
>>>  	unsigned int voltage_tolerance;
>>>  	unsigned int ratio;
>>> +
>>> +	/* One provider per bus, one node per provider */
>>> +	struct icc_provider provider;
>>> +	struct icc_node *node;
>>>  };
>>>  
>>>  /*
>>> @@ -61,6 +69,13 @@ exynos_bus_ops_edev(enable_edev);
>>>  exynos_bus_ops_edev(disable_edev);
>>>  exynos_bus_ops_edev(set_event);
>>>  
>>> +static int exynos_bus_next_id(void)
>>> +{
>>> +	static int exynos_bus_node_id;
>>> +
>>> +	return exynos_bus_node_id++;
>>> +}
>>> +
>>>  static int exynos_bus_get_event(struct exynos_bus *bus,
>>>  				struct devfreq_event_data *edata)
>>>  {
>>> @@ -98,6 +113,8 @@ static int exynos_bus_target(struct device *dev, unsigned
>>> long *freq, u32 flags)
>>>  	unsigned long old_freq, new_freq, new_volt, tol;
>>>  	int ret = 0;
>>>  
>>> +	*freq = max(*freq, bus->min_freq);
>>> +
>>>  	/* Get new opp-bus instance according to new bus clock */
>>>  	new_opp = devfreq_recommended_opp(dev, freq, flags);
>>>  	if (IS_ERR(new_opp)) {
>>> @@ -208,6 +225,8 @@ static int exynos_bus_passive_target(struct device *dev,
>>> unsigned long *freq,
>>>  	unsigned long old_freq, new_freq;
>>>  	int ret = 0;
>>>  
>>> +	*freq = max(*freq, bus->min_freq);
>>> +
>>>  	/* Get new opp-bus instance according to new bus clock */
>>>  	new_opp = devfreq_recommended_opp(dev, freq, flags);
>>>  	if (IS_ERR(new_opp)) {
>>> @@ -251,6 +270,35 @@ static void exynos_bus_passive_exit(struct device *dev)
>>>  	clk_disable_unprepare(bus->clk);
>>>  }
>>>  
>>> +static int exynos_bus_icc_set(struct icc_node *src, struct icc_node *dst)
>>> +{
>>> +	struct exynos_bus *src_bus = src->data, *dst_bus = dst->data;
>>> +
>>> +	src_bus->min_freq = icc_units_to_hz(src->peak_bw);
>>> +	dst_bus->min_freq = icc_units_to_hz(dst->peak_bw);
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static int exynos_bus_icc_aggregate(struct icc_node *node, u32 avg_bw,
>>> +				    u32 peak_bw, u32 *agg_avg, u32 *agg_peak)
>>> +{
>>> +	*agg_peak = *agg_avg = peak_bw;
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static struct icc_node *exynos_bus_icc_xlate(struct of_phandle_args *spec,
>>> +					     void *data)
>>> +{
>>> +	struct exynos_bus *bus = data;
>>> +
>>> +	if (spec->np != bus->dev->of_node)
>>> +		return ERR_PTR(-EINVAL);
>>> +
>>> +	return bus->node;
>>> +}
>>> +
>>>  static int exynos_bus_parent_parse_of(struct device_node *np,
>>>  					struct exynos_bus *bus)
>>>  {
>>> @@ -469,6 +517,95 @@ static int exynos_bus_profile_init_passive(struct
>>> exynos_bus *bus,
>>>  	return ret;
>>>  }
>>>  
>>> +static int exynos_bus_icc_connect(struct exynos_bus *bus)
>>> +{
>>> +	struct device_node *np = bus->dev->of_node;
>>> +	struct devfreq *parent_devfreq;
>>> +	struct icc_node *parent_node = NULL;
>>> +	struct of_phandle_args args;
>>> +	int ret = 0;
>>> +
>>> +	parent_devfreq = devfreq_get_devfreq_by_phandle(bus->dev, 0);
>>> +	if (!IS_ERR(parent_devfreq)) {
>>> +		struct exynos_bus *parent_bus;
>>> +
>>> +		parent_bus = dev_get_drvdata(parent_devfreq->dev.parent);
>>> +		parent_node = parent_bus->node;
>>> +	} else {
>>> +		/* Look for parent in DT */
>>> +		int num = of_count_phandle_with_args(np, "parent",
>>> +						     "#interconnect-cells");
>>> +		if (num != 1)
>>> +			goto out;
>>> +
>>> +		ret = of_parse_phandle_with_args(np, "parent",
>>> +						 "#interconnect-cells",
>>> +						 0, &args);
>>> +		if (ret < 0)
>>> +			goto out;
>>> +
>>> +		of_node_put(args.np);
>>> +
>>> +		parent_node = of_icc_get_from_provider(&args);
>>> +		if (IS_ERR(parent_node)) {
>>> +			/* May be -EPROBE_DEFER */
>>> +			ret = PTR_ERR(parent_node);
>>> +			goto out;
>>> +		}
>>> +	}
>>> +
>>> +	ret = icc_link_create(bus->node, parent_node->id);
>>> +
>>> +out:
>>> +	return ret;
>>> +}
>>> +
>>> +static int exynos_bus_icc_init(struct exynos_bus *bus)
>>> +{
>>> +	struct device *dev = bus->dev;
>>> +	struct icc_provider *provider = &bus->provider;
>>> +	struct icc_node *node;
>>> +	int id, ret;
>>> +
>>> +	/* Initialize the interconnect provider */
>>> +	provider->set = exynos_bus_icc_set;
>>> +	provider->aggregate = exynos_bus_icc_aggregate;
>>> +	provider->xlate = exynos_bus_icc_xlate;
>>> +	provider->dev = dev;
>>> +	provider->data = bus;
>>> +
>>> +	ret = icc_provider_add(provider);
>>> +	if (ret < 0)
>>> +		goto out;
>>> +
>>> +	id = exynos_bus_next_id();
>>> +	node = icc_node_create(id);
>>> +	if (IS_ERR(node)) {
>>> +		ret = PTR_ERR(node);
>>> +		goto err_node;
>>> +	}
>>> +
>>> +	bus->node = node;
>>> +	node->name = dev->of_node->name;
>>> +	node->data = bus;
>>> +	icc_node_add(node, provider);
>>> +
>>> +	ret = exynos_bus_icc_connect(bus);
>>> +	if (ret < 0)
>>> +		goto err_connect;
>>> +
>>> +out:
>>> +	return ret;
>>> +
>>> +err_connect:
>>> +	icc_node_del(node);
>>> +	icc_node_destroy(id);
>>> +err_node:
>>> +	icc_provider_del(provider);
>>> +
>>> +	return ret;
>>> +}
>>> +
>>>  static int exynos_bus_probe(struct platform_device *pdev)
>>>  {
>>>  	struct device *dev = &pdev->dev;
>>> @@ -517,6 +654,14 @@ static int exynos_bus_probe(struct platform_device
>>> *pdev)
>>>  			goto err;
>>>  	}
>>>  
>>> +	/*
>>> +	 * Initialize interconnect provider. A return value of -ENOTSUPP means
>>> +	 * that CONFIG_INTERCONNECT is disabled.
>>> +	 */
>>> +	ret = exynos_bus_icc_init(bus);
>>> +	if (ret < 0 && ret != -ENOTSUPP)
>>> +		goto err;
>>> +
>>>  	max_state = bus->devfreq->profile->max_state;
>>>  	min_freq = (bus->devfreq->profile->freq_table[0] / 1000);
>>>  	max_freq = (bus->devfreq->profile->freq_table[max_state - 1] / 1000);
>>>
>>
>>
Chanwoo Choi Aug. 19, 2019, 11:44 p.m. UTC | #13
Hi Artur and Leonard,

On 19. 8. 9. 오전 12:00, Leonard Crestez wrote:
> On 29.07.2019 04:49, Chanwoo Choi wrote:
>> On 19. 7. 23. 오후 9:20, Artur Świgoń wrote:
>>> This patch adds interconnect functionality to the exynos-bus devfreq
>>> driver.
>>>
>>> The devfreq target() callback provided by exynos-bus now selects either the
>>> frequency calculated by the devfreq governor or the frequency requested via
>>> the interconnect API for the given node, whichever is higher.
>>
>> Basically, I agree to support the QoS requirement between devices.
>> But, I think that need to consider the multiple cases.
>>
>> 1. When changing the devfreq governor by user,
>> For example of the connection between bus_dmc/leftbus/display on patch8,
>> there are possible multiple cases with various devfreq governor
>> which is changed on the runtime by user through sysfs interface.
>>
>> If users changes the devfreq governor as following:
>> Before,
>> - bus_dmc (simple_ondemand, available frequency 100/200/300/400 MHz)
>> --> bus_leftbus(simple_ondemand, available frequency 100/200/300/400 MHz)
>> ----> bus_display(passive)
>>
>> After changed governor of bus_dmc,
>> if the min_freq by interconnect requirement is 400Mhz,
>> - bus_dmc (powersave) : min_freq and max_freq and cur_freq is 100MHz
>> --> bus_leftbus(simple_ondemand) : cur_freq is 400Mhz
>> ----> bus_display(passive)
>>
>> The final frequency is 400MHz of bus_dmc
>> even if the min_freq/max_freq/cur_freq is 100MHz.
>> It cannot show the correct min_freq/max_freq through
>> devfreq sysfs interface.
>>
>>
>> 2. When disabling the some frequency by devfreq-thermal throttling,
>> This patch checks the min_freq of interconnect requirement
>> in the exynos_bus_target() and exynos_bus_passive_target().
>> Also, it cannot show the correct min_freq/max_freq through
>> devfreq sysfs interface.
>>
>> For example of bus_dmc bus,
>> - The available frequencies are 100MHz, 200MHz, 300MHz, 400MHz
>> - Disable 400MHz by devfreq-thermal throttling
>> - min_freq is 100MHz
>> - max_freq is 300MHz
>> - min_freq of interconnect is 400MHz
>>
>> In result, the final frequency is 400MHz by exynos_bus_target()
>> There are no problem for working. But, the user cannot know
>> reason why cur_freq is 400MHz even if max_freq is 300MHz.
>>
>> Basically, update_devfreq() considers the all constraints
>> of min_freq/max_freq to decide the proper target frequency.
> 
> I think that applying the interconnect min_freq via dev_pm_qos can help 
> with many of these concerns: update_devfreq controls all the min/max 
> handling, sysfs is accurate and better decisions can be made regarding 
> thermal. Enforcing constraints in the core is definitely better.
> 
> This wouldn't even be a very big change, you don't need to actually move 
> the interconnect code outside of devfreq. Just make every devfreq/icc 
> node register a dev_pm_qos_request for itself during registration and 
> call dev_pm_qos_update_request inside exynos_bus_icc_set.
> 
> See: https://patchwork.kernel.org/patch/11084279/

I agree this approach of Leonard. If we use the dev_pm_qos[1] feature,
it resolve the issue of my comment1/2.

In summary, when receive the minimum frequency requirement
from interconnect path, the each bus uses the dev_pm_qos interface
in order to inform the minimum frequency from interconnect to devfreq.
And then the devfreq core will execute the update_devfreq()
with all frequency requirements as following:
- the user input (min/max frequency though devfreq sysfs
- the target frequency provided by devfreq governor
- the minimum frequency from interconnect path
diff mbox series

Patch

diff --git a/drivers/devfreq/exynos-bus.c b/drivers/devfreq/exynos-bus.c
index 412511ca7703..12fb7c84ae50 100644
--- a/drivers/devfreq/exynos-bus.c
+++ b/drivers/devfreq/exynos-bus.c
@@ -14,6 +14,7 @@ 
 #include <linux/devfreq-event.h>
 #include <linux/device.h>
 #include <linux/export.h>
+#include <linux/interconnect-provider.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/pm_opp.h>
@@ -23,6 +24,8 @@ 
 #define DEFAULT_SATURATION_RATIO	40
 #define DEFAULT_VOLTAGE_TOLERANCE	2
 
+#define icc_units_to_hz(x) ((x) * 1000UL / 8)
+
 struct exynos_bus {
 	struct device *dev;
 
@@ -31,12 +34,17 @@  struct exynos_bus {
 	unsigned int edev_count;
 	struct mutex lock;
 
+	unsigned long min_freq;
 	unsigned long curr_freq;
 
 	struct regulator *regulator;
 	struct clk *clk;
 	unsigned int voltage_tolerance;
 	unsigned int ratio;
+
+	/* One provider per bus, one node per provider */
+	struct icc_provider provider;
+	struct icc_node *node;
 };
 
 /*
@@ -61,6 +69,13 @@  exynos_bus_ops_edev(enable_edev);
 exynos_bus_ops_edev(disable_edev);
 exynos_bus_ops_edev(set_event);
 
+static int exynos_bus_next_id(void)
+{
+	static int exynos_bus_node_id;
+
+	return exynos_bus_node_id++;
+}
+
 static int exynos_bus_get_event(struct exynos_bus *bus,
 				struct devfreq_event_data *edata)
 {
@@ -98,6 +113,8 @@  static int exynos_bus_target(struct device *dev, unsigned long *freq, u32 flags)
 	unsigned long old_freq, new_freq, new_volt, tol;
 	int ret = 0;
 
+	*freq = max(*freq, bus->min_freq);
+
 	/* Get new opp-bus instance according to new bus clock */
 	new_opp = devfreq_recommended_opp(dev, freq, flags);
 	if (IS_ERR(new_opp)) {
@@ -208,6 +225,8 @@  static int exynos_bus_passive_target(struct device *dev, unsigned long *freq,
 	unsigned long old_freq, new_freq;
 	int ret = 0;
 
+	*freq = max(*freq, bus->min_freq);
+
 	/* Get new opp-bus instance according to new bus clock */
 	new_opp = devfreq_recommended_opp(dev, freq, flags);
 	if (IS_ERR(new_opp)) {
@@ -251,6 +270,35 @@  static void exynos_bus_passive_exit(struct device *dev)
 	clk_disable_unprepare(bus->clk);
 }
 
+static int exynos_bus_icc_set(struct icc_node *src, struct icc_node *dst)
+{
+	struct exynos_bus *src_bus = src->data, *dst_bus = dst->data;
+
+	src_bus->min_freq = icc_units_to_hz(src->peak_bw);
+	dst_bus->min_freq = icc_units_to_hz(dst->peak_bw);
+
+	return 0;
+}
+
+static int exynos_bus_icc_aggregate(struct icc_node *node, u32 avg_bw,
+				    u32 peak_bw, u32 *agg_avg, u32 *agg_peak)
+{
+	*agg_peak = *agg_avg = peak_bw;
+
+	return 0;
+}
+
+static struct icc_node *exynos_bus_icc_xlate(struct of_phandle_args *spec,
+					     void *data)
+{
+	struct exynos_bus *bus = data;
+
+	if (spec->np != bus->dev->of_node)
+		return ERR_PTR(-EINVAL);
+
+	return bus->node;
+}
+
 static int exynos_bus_parent_parse_of(struct device_node *np,
 					struct exynos_bus *bus)
 {
@@ -469,6 +517,95 @@  static int exynos_bus_profile_init_passive(struct exynos_bus *bus,
 	return ret;
 }
 
+static int exynos_bus_icc_connect(struct exynos_bus *bus)
+{
+	struct device_node *np = bus->dev->of_node;
+	struct devfreq *parent_devfreq;
+	struct icc_node *parent_node = NULL;
+	struct of_phandle_args args;
+	int ret = 0;
+
+	parent_devfreq = devfreq_get_devfreq_by_phandle(bus->dev, 0);
+	if (!IS_ERR(parent_devfreq)) {
+		struct exynos_bus *parent_bus;
+
+		parent_bus = dev_get_drvdata(parent_devfreq->dev.parent);
+		parent_node = parent_bus->node;
+	} else {
+		/* Look for parent in DT */
+		int num = of_count_phandle_with_args(np, "parent",
+						     "#interconnect-cells");
+		if (num != 1)
+			goto out;
+
+		ret = of_parse_phandle_with_args(np, "parent",
+						 "#interconnect-cells",
+						 0, &args);
+		if (ret < 0)
+			goto out;
+
+		of_node_put(args.np);
+
+		parent_node = of_icc_get_from_provider(&args);
+		if (IS_ERR(parent_node)) {
+			/* May be -EPROBE_DEFER */
+			ret = PTR_ERR(parent_node);
+			goto out;
+		}
+	}
+
+	ret = icc_link_create(bus->node, parent_node->id);
+
+out:
+	return ret;
+}
+
+static int exynos_bus_icc_init(struct exynos_bus *bus)
+{
+	struct device *dev = bus->dev;
+	struct icc_provider *provider = &bus->provider;
+	struct icc_node *node;
+	int id, ret;
+
+	/* Initialize the interconnect provider */
+	provider->set = exynos_bus_icc_set;
+	provider->aggregate = exynos_bus_icc_aggregate;
+	provider->xlate = exynos_bus_icc_xlate;
+	provider->dev = dev;
+	provider->data = bus;
+
+	ret = icc_provider_add(provider);
+	if (ret < 0)
+		goto out;
+
+	id = exynos_bus_next_id();
+	node = icc_node_create(id);
+	if (IS_ERR(node)) {
+		ret = PTR_ERR(node);
+		goto err_node;
+	}
+
+	bus->node = node;
+	node->name = dev->of_node->name;
+	node->data = bus;
+	icc_node_add(node, provider);
+
+	ret = exynos_bus_icc_connect(bus);
+	if (ret < 0)
+		goto err_connect;
+
+out:
+	return ret;
+
+err_connect:
+	icc_node_del(node);
+	icc_node_destroy(id);
+err_node:
+	icc_provider_del(provider);
+
+	return ret;
+}
+
 static int exynos_bus_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -517,6 +654,14 @@  static int exynos_bus_probe(struct platform_device *pdev)
 			goto err;
 	}
 
+	/*
+	 * Initialize interconnect provider. A return value of -ENOTSUPP means
+	 * that CONFIG_INTERCONNECT is disabled.
+	 */
+	ret = exynos_bus_icc_init(bus);
+	if (ret < 0 && ret != -ENOTSUPP)
+		goto err;
+
 	max_state = bus->devfreq->profile->max_state;
 	min_freq = (bus->devfreq->profile->freq_table[0] / 1000);
 	max_freq = (bus->devfreq->profile->freq_table[max_state - 1] / 1000);