diff mbox series

[10/18] drivers: firmware: psci: Add hierarchical domain idle states converter

Message ID 20190513192300.653-11-ulf.hansson@linaro.org (mailing list archive)
State New, archived
Headers show
Series ARM/ARM64: Support hierarchical CPU arrangement for PSCI | expand

Commit Message

Ulf Hansson May 13, 2019, 7:22 p.m. UTC
If the hierarchical CPU topology is used, but the OS initiated mode isn't
supported, we need to rely solely on the regular cpuidle framework to
manage the idle state selection, rather than using genpd and its governor.

For this reason, introduce a new PSCI DT helper function,
psci_dt_pm_domains_parse_states(), which parses and converts the
hierarchically described domain idle states from DT, into regular flattened
cpuidle states. The converted states are added to the existing cpuidle
driver's array of idle states, which make them available for cpuidle.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---

Changes:
	- Some simplification of the code.

---
 drivers/firmware/psci/psci.h           |   5 ++
 drivers/firmware/psci/psci_pm_domain.c | 118 +++++++++++++++++++++++++
 2 files changed, 123 insertions(+)

Comments

Lorenzo Pieralisi July 9, 2019, 3:31 p.m. UTC | #1
On Mon, May 13, 2019 at 09:22:52PM +0200, Ulf Hansson wrote:
> If the hierarchical CPU topology is used, but the OS initiated mode isn't
> supported, we need to rely solely on the regular cpuidle framework to
> manage the idle state selection, rather than using genpd and its governor.
> 
> For this reason, introduce a new PSCI DT helper function,
> psci_dt_pm_domains_parse_states(), which parses and converts the
> hierarchically described domain idle states from DT, into regular flattened
> cpuidle states. The converted states are added to the existing cpuidle
> driver's array of idle states, which make them available for cpuidle.
> 
> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
> ---
> 
> Changes:
> 	- Some simplification of the code.
> 
> ---
>  drivers/firmware/psci/psci.h           |   5 ++
>  drivers/firmware/psci/psci_pm_domain.c | 118 +++++++++++++++++++++++++
>  2 files changed, 123 insertions(+)
> 
> diff --git a/drivers/firmware/psci/psci.h b/drivers/firmware/psci/psci.h
> index 00d2e3dcef49..c36e0e6649e9 100644
> --- a/drivers/firmware/psci/psci.h
> +++ b/drivers/firmware/psci/psci.h
> @@ -3,6 +3,7 @@
>  #ifndef __PSCI_H
>  #define __PSCI_H
>  
> +struct cpuidle_driver;
>  struct device_node;
>  
>  int psci_set_osi_mode(void);
> @@ -13,8 +14,12 @@ void psci_set_domain_state(u32 state);
>  int psci_dt_parse_state_node(struct device_node *np, u32 *state);
>  #ifdef CONFIG_PM_GENERIC_DOMAINS_OF
>  int psci_dt_init_pm_domains(struct device_node *np);
> +int psci_dt_pm_domains_parse_states(struct cpuidle_driver *drv,
> +		struct device_node *cpu_node, u32 *psci_states);
>  #else
>  static inline int psci_dt_init_pm_domains(struct device_node *np) { return 0; }
> +static inline int psci_dt_pm_domains_parse_states(struct cpuidle_driver *drv,
> +		struct device_node *cpu_node, u32 *psci_states) { return 0; }
>  #endif
>  #endif
>  
> diff --git a/drivers/firmware/psci/psci_pm_domain.c b/drivers/firmware/psci/psci_pm_domain.c
> index 3c6ca846caf4..3aa645dba81b 100644
> --- a/drivers/firmware/psci/psci_pm_domain.c
> +++ b/drivers/firmware/psci/psci_pm_domain.c
> @@ -14,6 +14,10 @@
>  #include <linux/pm_domain.h>
>  #include <linux/slab.h>
>  #include <linux/string.h>
> +#include <linux/cpuidle.h>
> +#include <linux/cpu_pm.h>
> +
> +#include <asm/cpuidle.h>
>  
>  #include "psci.h"
>  
> @@ -104,6 +108,53 @@ static void psci_pd_free_states(struct genpd_power_state *states,
>  	kfree(states);
>  }
>  
> +static int psci_pd_enter_pc(struct cpuidle_device *dev,
> +			struct cpuidle_driver *drv, int idx)
> +{
> +	return CPU_PM_CPU_IDLE_ENTER(arm_cpuidle_suspend, idx);
> +}
> +
> +static void psci_pd_enter_s2idle_pc(struct cpuidle_device *dev,
> +			struct cpuidle_driver *drv, int idx)
> +{
> +	psci_pd_enter_pc(dev, drv, idx);
> +}
> +
> +static void psci_pd_convert_states(struct cpuidle_state *idle_state,
> +			u32 *psci_state, struct genpd_power_state *state)
> +{
> +	u32 *state_data = state->data;
> +	u64 target_residency_us = state->residency_ns;
> +	u64 exit_latency_us = state->power_on_latency_ns +
> +			state->power_off_latency_ns;
> +
> +	*psci_state = *state_data;
> +	do_div(target_residency_us, 1000);
> +	idle_state->target_residency = target_residency_us;
> +	do_div(exit_latency_us, 1000);
> +	idle_state->exit_latency = exit_latency_us;
> +	idle_state->enter = &psci_pd_enter_pc;
> +	idle_state->enter_s2idle = &psci_pd_enter_s2idle_pc;
> +	idle_state->flags |= CPUIDLE_FLAG_TIMER_STOP;

This is arbitrary and not necessarily true.

I think that this patch is useful to represent my reservations about the
current approach. As a matter of fact, idle state entry will always be a
CPUidle decision.

You only need PM domain information to understand when all CPUs
in a power domain are actually idle but that's all genPD can do
in this respect.

I think this patchset would be much simpler if both CPUidle and
genPD governor would work on *one* set of idle states, globally
indexed (and that would be true for PSCI suspend parameters too).

To work with a unified set of idle states between CPUidle and genPD
(tossing some ideas around):

- We can implement a genPD CPUidle governor that in its select method
  takes into account genPD information (for instance by avoiding
  selection of idle states that require multiple cpus to be in idle
  to be effectively active)
- We can use genPD to enable/disable CPUidle states through runtime
  PM information

There may be other ways. My point is that current code, with two (or
more if the hierarchy grows) sets of idle states across two subsystems
(CPUidle and genPD) is not very well defined and honestly very hard to
grasp and prone to errors.

> +
> +	strncpy(idle_state->name, to_of_node(state->fwnode)->name,
> +		CPUIDLE_NAME_LEN - 1);
> +	strncpy(idle_state->desc, to_of_node(state->fwnode)->name,
> +		CPUIDLE_NAME_LEN - 1);
> +}
> +
> +static bool psci_pd_is_provider(struct device_node *np)
> +{
> +	struct psci_pd_provider *pd_prov, *it;
> +
> +	list_for_each_entry_safe(pd_prov, it, &psci_pd_providers, link) {
> +		if (pd_prov->node == np)
> +			return true;
> +	}
> +
> +	return false;
> +}
> +
>  static int psci_pd_init(struct device_node *np)
>  {
>  	struct generic_pm_domain *pd;
> @@ -265,4 +316,71 @@ int psci_dt_init_pm_domains(struct device_node *np)
>  	pr_err("failed to create CPU PM domains ret=%d\n", ret);
>  	return ret;
>  }
> +
> +int psci_dt_pm_domains_parse_states(struct cpuidle_driver *drv,
> +			struct device_node *cpu_node, u32 *psci_states)
> +{
> +	struct genpd_power_state *pd_states;
> +	struct of_phandle_args args;
> +	int ret, pd_state_count, i, state_idx, psci_idx;
> +	u32 cpu_psci_state = psci_states[drv->state_count - 2];

This (-2) is very dodgy and I doubt it would work on hierarchies going
above "cluster" level.

As I say above, I think we should work towards a single array of
idle states to be selected by a CPUidle governor using genPD
runtime information to bias the results according to the number
of CPUs in a genPD that entered/exit idle.

To be more precise, all idles states should be "domain-idle-state"
compatible, even the CPU ones, the distinction between what CPUidle
and genPD manage is a bit stretched IMO in this patchset.

We will have a chance to talk about this but I thought I would
comment publically if anyone else is willing to chime in, this
is not a PSCI problem at all, it is a CPUidle/genPD coexistence
design problem which is much broader.

Lorenzo

> +	struct device_node *np = of_node_get(cpu_node);
> +
> +
> +	/* Walk the CPU topology to find compatible domain idle states. */
> +	while (np) {
> +		ret = of_parse_phandle_with_args(np, "power-domains",
> +					"#power-domain-cells", 0, &args);
> +		of_node_put(np);
> +		if (ret)
> +			return 0;
> +
> +		np = args.np;
> +
> +		/* Verify that the node represents a psci pd provider. */
> +		if (!psci_pd_is_provider(np)) {
> +			of_node_put(np);
> +			return 0;
> +		}
> +
> +		/* Parse for compatible domain idle states. */
> +		ret = psci_pd_parse_states(np, &pd_states, &pd_state_count);
> +		if (ret) {
> +			of_node_put(np);
> +			return ret;
> +		}
> +
> +		i = 0;
> +		while (i < pd_state_count) {
> +
> +			state_idx = drv->state_count;
> +			if (state_idx >= CPUIDLE_STATE_MAX) {
> +				pr_warn("exceeding max cpuidle states\n");
> +				of_node_put(np);
> +				return 0;
> +			}
> +
> +			/* WFI state is not part of psci_states. */
> +			psci_idx = state_idx - 1 + i;
> +			psci_pd_convert_states(&drv->states[state_idx + i],
> +					&psci_states[psci_idx], &pd_states[i]);
> +
> +			/*
> +			 * In the hierarchical CPU topology the master PM domain
> +			 * idle state's DT property, "arm,psci-suspend-param",
> +			 * don't contain the bits for the idle state of the CPU,
> +			 * let's add those here.
> +			 */
> +			psci_states[psci_idx] |= cpu_psci_state;
> +			pr_debug("psci-power-state %#x index %d\n",
> +				psci_states[psci_idx], psci_idx);
> +
> +			drv->state_count++;
> +			i++;
> +		}
> +		psci_pd_free_states(pd_states, pd_state_count);
> +	}
> +
> +	return 0;
> +}
>  #endif
> -- 
> 2.17.1
>
Ulf Hansson July 16, 2019, 8:45 a.m. UTC | #2
Lorenzo,

Just wanted to give some feedback to you in public, even if we have
already discussed this series, offlist, last week.

On Tue, 9 Jul 2019 at 17:31, Lorenzo Pieralisi
<lorenzo.pieralisi@arm.com> wrote:
>
> On Mon, May 13, 2019 at 09:22:52PM +0200, Ulf Hansson wrote:
> > If the hierarchical CPU topology is used, but the OS initiated mode isn't
> > supported, we need to rely solely on the regular cpuidle framework to
> > manage the idle state selection, rather than using genpd and its governor.
> >
> > For this reason, introduce a new PSCI DT helper function,
> > psci_dt_pm_domains_parse_states(), which parses and converts the
> > hierarchically described domain idle states from DT, into regular flattened
> > cpuidle states. The converted states are added to the existing cpuidle
> > driver's array of idle states, which make them available for cpuidle.
> >
> > Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
> > ---
> >
> > Changes:
> >       - Some simplification of the code.
> >
> > ---
> >  drivers/firmware/psci/psci.h           |   5 ++
> >  drivers/firmware/psci/psci_pm_domain.c | 118 +++++++++++++++++++++++++
> >  2 files changed, 123 insertions(+)
> >
> > diff --git a/drivers/firmware/psci/psci.h b/drivers/firmware/psci/psci.h
> > index 00d2e3dcef49..c36e0e6649e9 100644
> > --- a/drivers/firmware/psci/psci.h
> > +++ b/drivers/firmware/psci/psci.h
> > @@ -3,6 +3,7 @@
> >  #ifndef __PSCI_H
> >  #define __PSCI_H
> >
> > +struct cpuidle_driver;
> >  struct device_node;
> >
> >  int psci_set_osi_mode(void);
> > @@ -13,8 +14,12 @@ void psci_set_domain_state(u32 state);
> >  int psci_dt_parse_state_node(struct device_node *np, u32 *state);
> >  #ifdef CONFIG_PM_GENERIC_DOMAINS_OF
> >  int psci_dt_init_pm_domains(struct device_node *np);
> > +int psci_dt_pm_domains_parse_states(struct cpuidle_driver *drv,
> > +             struct device_node *cpu_node, u32 *psci_states);
> >  #else
> >  static inline int psci_dt_init_pm_domains(struct device_node *np) { return 0; }
> > +static inline int psci_dt_pm_domains_parse_states(struct cpuidle_driver *drv,
> > +             struct device_node *cpu_node, u32 *psci_states) { return 0; }
> >  #endif
> >  #endif
> >
> > diff --git a/drivers/firmware/psci/psci_pm_domain.c b/drivers/firmware/psci/psci_pm_domain.c
> > index 3c6ca846caf4..3aa645dba81b 100644
> > --- a/drivers/firmware/psci/psci_pm_domain.c
> > +++ b/drivers/firmware/psci/psci_pm_domain.c
> > @@ -14,6 +14,10 @@
> >  #include <linux/pm_domain.h>
> >  #include <linux/slab.h>
> >  #include <linux/string.h>
> > +#include <linux/cpuidle.h>
> > +#include <linux/cpu_pm.h>
> > +
> > +#include <asm/cpuidle.h>
> >
> >  #include "psci.h"
> >
> > @@ -104,6 +108,53 @@ static void psci_pd_free_states(struct genpd_power_state *states,
> >       kfree(states);
> >  }
> >
> > +static int psci_pd_enter_pc(struct cpuidle_device *dev,
> > +                     struct cpuidle_driver *drv, int idx)
> > +{
> > +     return CPU_PM_CPU_IDLE_ENTER(arm_cpuidle_suspend, idx);
> > +}
> > +
> > +static void psci_pd_enter_s2idle_pc(struct cpuidle_device *dev,
> > +                     struct cpuidle_driver *drv, int idx)
> > +{
> > +     psci_pd_enter_pc(dev, drv, idx);
> > +}
> > +
> > +static void psci_pd_convert_states(struct cpuidle_state *idle_state,
> > +                     u32 *psci_state, struct genpd_power_state *state)
> > +{
> > +     u32 *state_data = state->data;
> > +     u64 target_residency_us = state->residency_ns;
> > +     u64 exit_latency_us = state->power_on_latency_ns +
> > +                     state->power_off_latency_ns;
> > +
> > +     *psci_state = *state_data;
> > +     do_div(target_residency_us, 1000);
> > +     idle_state->target_residency = target_residency_us;
> > +     do_div(exit_latency_us, 1000);
> > +     idle_state->exit_latency = exit_latency_us;
> > +     idle_state->enter = &psci_pd_enter_pc;
> > +     idle_state->enter_s2idle = &psci_pd_enter_s2idle_pc;
> > +     idle_state->flags |= CPUIDLE_FLAG_TIMER_STOP;
>
> This is arbitrary and not necessarily true.

The arbitrary thing you refer to here, is that the
CPUIDLE_FLAG_TIMER_STOP? Or are you referring to the complete function
psci_pd_convert_states()?

>
> I think that this patch is useful to represent my reservations about the
> current approach. As a matter of fact, idle state entry will always be a
> CPUidle decision.
>
> You only need PM domain information to understand when all CPUs
> in a power domain are actually idle but that's all genPD can do
> in this respect.
>
> I think this patchset would be much simpler if both CPUidle and
> genPD governor would work on *one* set of idle states, globally
> indexed (and that would be true for PSCI suspend parameters too).
>
> To work with a unified set of idle states between CPUidle and genPD
> (tossing some ideas around):
>
> - We can implement a genPD CPUidle governor that in its select method
>   takes into account genPD information (for instance by avoiding
>   selection of idle states that require multiple cpus to be in idle
>   to be effectively active)
> - We can use genPD to enable/disable CPUidle states through runtime
>   PM information

I don't understand how to make this work.

The CPUidle governor works on per CPU basis. The genpd governor works
on per PM domain basis, which typically can be a group of CPUs (and
even other devices) via subdomains, for example.

1.
In case of Linux being in *charge* of what idle state to pick for a
group of CPUs, that decision is best done by the genpd governor as it
operates on "groups" of CPUs. This is used for PSCI OSI mode. Of
course, there are idle states also per CPU, which potentially could be
managed by the genpd governor as well, but at this point I decided to
re-use the cpuidle governor as it's already doing the job.

2. In case the decision of what idle state to enter for the group is
done by the FW, we can rely solely on the cpuidle governor and let it
select states per CPU basis. This is used for PSCI PC mode.

>
> There may be other ways. My point is that current code, with two (or
> more if the hierarchy grows) sets of idle states across two subsystems
> (CPUidle and genPD) is not very well defined and honestly very hard to
> grasp and prone to errors.

The complexity is there, I admit that.

I guess some deeper insight about genpd+its governor+runtime PM are
needed when reviewing this, of course. As an option, you may also have
a look at my slides [1] from OSPM (Pisa) in May this year, which via
flow charts try to describes things in more detail.

In our offlist meeting, we discussed that perhaps moving some of the
new PSCI code introduced in this series, into a cpuidle driver
instead, could make things more clear. For sure, I can explore that
option, but before I go there, I think we should agree on it publicly.

In principle what it means is to invent a special cpuidle driver for
PSCI, so we would need access to some of the PSCI internal functions,
for example.

One thing though, the initialization of the PSCI PM domain topology is
a separate step, managed via the new initcall, psci_dt_topology_init()
(introduced in patch 11). That part still seems to be belong to the
PSCI code, don't you think?

>
> > +
> > +     strncpy(idle_state->name, to_of_node(state->fwnode)->name,
> > +             CPUIDLE_NAME_LEN - 1);
> > +     strncpy(idle_state->desc, to_of_node(state->fwnode)->name,
> > +             CPUIDLE_NAME_LEN - 1);
> > +}
> > +
> > +static bool psci_pd_is_provider(struct device_node *np)
> > +{
> > +     struct psci_pd_provider *pd_prov, *it;
> > +
> > +     list_for_each_entry_safe(pd_prov, it, &psci_pd_providers, link) {
> > +             if (pd_prov->node == np)
> > +                     return true;
> > +     }
> > +
> > +     return false;
> > +}
> > +
> >  static int psci_pd_init(struct device_node *np)
> >  {
> >       struct generic_pm_domain *pd;
> > @@ -265,4 +316,71 @@ int psci_dt_init_pm_domains(struct device_node *np)
> >       pr_err("failed to create CPU PM domains ret=%d\n", ret);
> >       return ret;
> >  }
> > +
> > +int psci_dt_pm_domains_parse_states(struct cpuidle_driver *drv,
> > +                     struct device_node *cpu_node, u32 *psci_states)
> > +{
> > +     struct genpd_power_state *pd_states;
> > +     struct of_phandle_args args;
> > +     int ret, pd_state_count, i, state_idx, psci_idx;
> > +     u32 cpu_psci_state = psci_states[drv->state_count - 2];
>
> This (-2) is very dodgy and I doubt it would work on hierarchies going
> above "cluster" level.
>
> As I say above, I think we should work towards a single array of
> idle states to be selected by a CPUidle governor using genPD
> runtime information to bias the results according to the number
> of CPUs in a genPD that entered/exit idle.
>
> To be more precise, all idles states should be "domain-idle-state"
> compatible, even the CPU ones, the distinction between what CPUidle
> and genPD manage is a bit stretched IMO in this patchset.
>
> We will have a chance to talk about this but I thought I would
> comment publically if anyone else is willing to chime in, this
> is not a PSCI problem at all, it is a CPUidle/genPD coexistence
> design problem which is much broader.

To move this forward, I think we need to move from vague ideas to
clear and distinct plans. Whatever that means. :-)

I understand you are concerned about the level of changes introduced
to the PSCI code. As I stated somewhere in earlier replies, I picked
that approach as I thought it was better to implement things in a PSCI
specific manner to start with, then we could move things around, when
we realize that it make sense.

Anyway, as a suggestion to address your concern, how about this:

1. Move some things out to a PSCI cpuidle driver. We need to decide
more exactly on what to move and find the right level for the
interfaces.
2. Don't attach the CPU to the PM domain topology in case the PSCI PC
mode is used. I think this makes it easier, at least as a first step,
to understand when runtime PM needs to be used/enabled.
3. Would it help if I volunteer to help you guys as a maintainer for
PSCI. At least for the part of the new code that becomes introduced?

[...]

Kind regards
Uffe
Lorenzo Pieralisi July 16, 2019, 2:51 p.m. UTC | #3
On Tue, Jul 16, 2019 at 10:45:49AM +0200, Ulf Hansson wrote:

[...]

> > > +static void psci_pd_convert_states(struct cpuidle_state *idle_state,
> > > +                     u32 *psci_state, struct genpd_power_state *state)
> > > +{
> > > +     u32 *state_data = state->data;
> > > +     u64 target_residency_us = state->residency_ns;
> > > +     u64 exit_latency_us = state->power_on_latency_ns +
> > > +                     state->power_off_latency_ns;
> > > +
> > > +     *psci_state = *state_data;
> > > +     do_div(target_residency_us, 1000);
> > > +     idle_state->target_residency = target_residency_us;
> > > +     do_div(exit_latency_us, 1000);
> > > +     idle_state->exit_latency = exit_latency_us;
> > > +     idle_state->enter = &psci_pd_enter_pc;
> > > +     idle_state->enter_s2idle = &psci_pd_enter_s2idle_pc;
> > > +     idle_state->flags |= CPUIDLE_FLAG_TIMER_STOP;
> >
> > This is arbitrary and not necessarily true.
> 
> The arbitrary thing you refer to here, is that the
> CPUIDLE_FLAG_TIMER_STOP? Or are you referring to the complete function
> psci_pd_convert_states()?

I refer to CPUIDLE_FLAG_TIMER_STOP. I think that on platform coordinated
system we should not bother about the hierarchical representation of the
states (I understand I asked you to make it work but it has become too
complex, I would rather focus on making the hierarchical representation
work for all idle states combination in OSI mode).

Plus side, another level of complexity removed.

> > I think that this patch is useful to represent my reservations about the
> > current approach. As a matter of fact, idle state entry will always be a
> > CPUidle decision.
> >
> > You only need PM domain information to understand when all CPUs
> > in a power domain are actually idle but that's all genPD can do
> > in this respect.
> >
> > I think this patchset would be much simpler if both CPUidle and
> > genPD governor would work on *one* set of idle states, globally
> > indexed (and that would be true for PSCI suspend parameters too).
> >
> > To work with a unified set of idle states between CPUidle and genPD
> > (tossing some ideas around):
> >
> > - We can implement a genPD CPUidle governor that in its select method
> >   takes into account genPD information (for instance by avoiding
> >   selection of idle states that require multiple cpus to be in idle
> >   to be effectively active)
> > - We can use genPD to enable/disable CPUidle states through runtime
> >   PM information
> 
> I don't understand how to make this work.
> 
> The CPUidle governor works on per CPU basis. The genpd governor works
> on per PM domain basis, which typically can be a group of CPUs (and
> even other devices) via subdomains, for example.
> 
> 1.
> In case of Linux being in *charge* of what idle state to pick for a
> group of CPUs, that decision is best done by the genpd governor as it
> operates on "groups" of CPUs. This is used for PSCI OSI mode. Of
> course, there are idle states also per CPU, which potentially could be
> managed by the genpd governor as well, but at this point I decided to
> re-use the cpuidle governor as it's already doing the job.
> 
> 2. In case the decision of what idle state to enter for the group is
> done by the FW, we can rely solely on the cpuidle governor and let it
> select states per CPU basis. This is used for PSCI PC mode.
> 
> >
> > There may be other ways. My point is that current code, with two (or
> > more if the hierarchy grows) sets of idle states across two subsystems
> > (CPUidle and genPD) is not very well defined and honestly very hard to
> > grasp and prone to errors.
> 
> The complexity is there, I admit that.
> 
> I guess some deeper insight about genpd+its governor+runtime PM are
> needed when reviewing this, of course. As an option, you may also have
> a look at my slides [1] from OSPM (Pisa) in May this year, which via
> flow charts try to describes things in more detail.
> 
> In our offlist meeting, we discussed that perhaps moving some of the
> new PSCI code introduced in this series, into a cpuidle driver
> instead, could make things more clear. For sure, I can explore that
> option, but before I go there, I think we should agree on it publicly.

I will do it but given that the generic idle infrastructure basically
is there for PSCI and:

drivers/soc/qcom/spm.c

if we create a PSCI CPUidle driver we can write one for qcom-spm and
remove the generic idle infrastructure, there would not be much
point in keeping it in the kernel; at least on ARM64 not using
PSCI for CPUidle is not even an option.

> In principle what it means is to invent a special cpuidle driver for
> PSCI, so we would need access to some of the PSCI internal functions,
> for example.

Yes.

> One thing though, the initialization of the PSCI PM domain topology is
> a separate step, managed via the new initcall, psci_dt_topology_init()
> (introduced in patch 11). That part still seems to be belong to the
> PSCI code, don't you think?

Yes but at least we can call it from a sensible place (well, sensible,
most likely from an initcall given how idle drivers are initialized).

> > > +     strncpy(idle_state->name, to_of_node(state->fwnode)->name,
> > > +             CPUIDLE_NAME_LEN - 1);
> > > +     strncpy(idle_state->desc, to_of_node(state->fwnode)->name,
> > > +             CPUIDLE_NAME_LEN - 1);
> > > +}
> > > +
> > > +static bool psci_pd_is_provider(struct device_node *np)
> > > +{
> > > +     struct psci_pd_provider *pd_prov, *it;
> > > +
> > > +     list_for_each_entry_safe(pd_prov, it, &psci_pd_providers, link) {
> > > +             if (pd_prov->node == np)
> > > +                     return true;
> > > +     }
> > > +
> > > +     return false;
> > > +}
> > > +
> > >  static int psci_pd_init(struct device_node *np)
> > >  {
> > >       struct generic_pm_domain *pd;
> > > @@ -265,4 +316,71 @@ int psci_dt_init_pm_domains(struct device_node *np)
> > >       pr_err("failed to create CPU PM domains ret=%d\n", ret);
> > >       return ret;
> > >  }
> > > +
> > > +int psci_dt_pm_domains_parse_states(struct cpuidle_driver *drv,
> > > +                     struct device_node *cpu_node, u32 *psci_states)
> > > +{
> > > +     struct genpd_power_state *pd_states;
> > > +     struct of_phandle_args args;
> > > +     int ret, pd_state_count, i, state_idx, psci_idx;
> > > +     u32 cpu_psci_state = psci_states[drv->state_count - 2];
> >
> > This (-2) is very dodgy and I doubt it would work on hierarchies going
> > above "cluster" level.
> >
> > As I say above, I think we should work towards a single array of
> > idle states to be selected by a CPUidle governor using genPD
> > runtime information to bias the results according to the number
> > of CPUs in a genPD that entered/exit idle.
> >
> > To be more precise, all idles states should be "domain-idle-state"
> > compatible, even the CPU ones, the distinction between what CPUidle
> > and genPD manage is a bit stretched IMO in this patchset.
> >
> > We will have a chance to talk about this but I thought I would
> > comment publically if anyone else is willing to chime in, this
> > is not a PSCI problem at all, it is a CPUidle/genPD coexistence
> > design problem which is much broader.
> 
> To move this forward, I think we need to move from vague ideas to
> clear and distinct plans. Whatever that means. :-)

See above.

> I understand you are concerned about the level of changes introduced
> to the PSCI code. As I stated somewhere in earlier replies, I picked
> that approach as I thought it was better to implement things in a PSCI
> specific manner to start with, then we could move things around, when
> we realize that it make sense.

I am also concerned about how the idle states are managed in
this patchset and I am pretty certain it will break when we
move away from a simple hierarchy with one CPU state and one
cluster state, we will comment on the specifics.

Moving PSCI code into a CPUidle driver will cater for the rest.

> Anyway, as a suggestion to address your concern, how about this:
> 
> 1. Move some things out to a PSCI cpuidle driver. We need to decide
> more exactly on what to move and find the right level for the
> interfaces.

I will do it and post patches asap.

> 2. Don't attach the CPU to the PM domain topology in case the PSCI PC
> mode is used. I think this makes it easier, at least as a first step,
> to understand when runtime PM needs to be used/enabled.

In the PSCI CPUidle driver we can have two distinct struct
cpuidle_state->enter functions for PC and OSI, no overhead
for PC, runtime PM for OSI, decoupling done.

We can choose one or the other depending on whether:

OSI iff:
- OSI is available
- hierarchical idle states are present in DT

otherwise PC.

That's what this patch does but we will do it in a unified file.

> 3. Would it help if I volunteer to help you guys as a maintainer for
> PSCI. At least for the part of the new code that becomes introduced?

We will do as described above if that makes sense.

Thanks,
Lorenzo
Ulf Hansson July 18, 2019, 11:43 a.m. UTC | #4
On Tue, 16 Jul 2019 at 16:51, Lorenzo Pieralisi
<lorenzo.pieralisi@arm.com> wrote:
>
> On Tue, Jul 16, 2019 at 10:45:49AM +0200, Ulf Hansson wrote:
>
> [...]
>
> > > > +static void psci_pd_convert_states(struct cpuidle_state *idle_state,
> > > > +                     u32 *psci_state, struct genpd_power_state *state)
> > > > +{
> > > > +     u32 *state_data = state->data;
> > > > +     u64 target_residency_us = state->residency_ns;
> > > > +     u64 exit_latency_us = state->power_on_latency_ns +
> > > > +                     state->power_off_latency_ns;
> > > > +
> > > > +     *psci_state = *state_data;
> > > > +     do_div(target_residency_us, 1000);
> > > > +     idle_state->target_residency = target_residency_us;
> > > > +     do_div(exit_latency_us, 1000);
> > > > +     idle_state->exit_latency = exit_latency_us;
> > > > +     idle_state->enter = &psci_pd_enter_pc;
> > > > +     idle_state->enter_s2idle = &psci_pd_enter_s2idle_pc;
> > > > +     idle_state->flags |= CPUIDLE_FLAG_TIMER_STOP;
> > >
> > > This is arbitrary and not necessarily true.
> >
> > The arbitrary thing you refer to here, is that the
> > CPUIDLE_FLAG_TIMER_STOP? Or are you referring to the complete function
> > psci_pd_convert_states()?
>
> I refer to CPUIDLE_FLAG_TIMER_STOP. I think that on platform coordinated
> system we should not bother about the hierarchical representation of the
> states (I understand I asked you to make it work but it has become too
> complex, I would rather focus on making the hierarchical representation
> work for all idle states combination in OSI mode).
>
> Plus side, another level of complexity removed.

Oh, well, that's why I left it out in the first place, to start simple. :-)

Anyway, no problem, I will revert back to that option.

>
> > > I think that this patch is useful to represent my reservations about the
> > > current approach. As a matter of fact, idle state entry will always be a
> > > CPUidle decision.
> > >
> > > You only need PM domain information to understand when all CPUs
> > > in a power domain are actually idle but that's all genPD can do
> > > in this respect.
> > >
> > > I think this patchset would be much simpler if both CPUidle and
> > > genPD governor would work on *one* set of idle states, globally
> > > indexed (and that would be true for PSCI suspend parameters too).
> > >
> > > To work with a unified set of idle states between CPUidle and genPD
> > > (tossing some ideas around):
> > >
> > > - We can implement a genPD CPUidle governor that in its select method
> > >   takes into account genPD information (for instance by avoiding
> > >   selection of idle states that require multiple cpus to be in idle
> > >   to be effectively active)
> > > - We can use genPD to enable/disable CPUidle states through runtime
> > >   PM information
> >
> > I don't understand how to make this work.
> >
> > The CPUidle governor works on per CPU basis. The genpd governor works
> > on per PM domain basis, which typically can be a group of CPUs (and
> > even other devices) via subdomains, for example.
> >
> > 1.
> > In case of Linux being in *charge* of what idle state to pick for a
> > group of CPUs, that decision is best done by the genpd governor as it
> > operates on "groups" of CPUs. This is used for PSCI OSI mode. Of
> > course, there are idle states also per CPU, which potentially could be
> > managed by the genpd governor as well, but at this point I decided to
> > re-use the cpuidle governor as it's already doing the job.
> >
> > 2. In case the decision of what idle state to enter for the group is
> > done by the FW, we can rely solely on the cpuidle governor and let it
> > select states per CPU basis. This is used for PSCI PC mode.
> >
> > >
> > > There may be other ways. My point is that current code, with two (or
> > > more if the hierarchy grows) sets of idle states across two subsystems
> > > (CPUidle and genPD) is not very well defined and honestly very hard to
> > > grasp and prone to errors.
> >
> > The complexity is there, I admit that.
> >
> > I guess some deeper insight about genpd+its governor+runtime PM are
> > needed when reviewing this, of course. As an option, you may also have
> > a look at my slides [1] from OSPM (Pisa) in May this year, which via
> > flow charts try to describes things in more detail.
> >
> > In our offlist meeting, we discussed that perhaps moving some of the
> > new PSCI code introduced in this series, into a cpuidle driver
> > instead, could make things more clear. For sure, I can explore that
> > option, but before I go there, I think we should agree on it publicly.
>
> I will do it but given that the generic idle infrastructure basically
> is there for PSCI and:
>
> drivers/soc/qcom/spm.c
>
> if we create a PSCI CPUidle driver we can write one for qcom-spm and
> remove the generic idle infrastructure, there would not be much
> point in keeping it in the kernel; at least on ARM64 not using
> PSCI for CPUidle is not even an option.

To make it clear, I definitely like this idea!

I am not really fond of current cpuidle backend infrastructure for
ARM/ARM64, it's really hard to follow all the things that happens in
those corresponding callbacks.

>
> > In principle what it means is to invent a special cpuidle driver for
> > PSCI, so we would need access to some of the PSCI internal functions,
> > for example.
>
> Yes.
>
> > One thing though, the initialization of the PSCI PM domain topology is
> > a separate step, managed via the new initcall, psci_dt_topology_init()
> > (introduced in patch 11). That part still seems to be belong to the
> > PSCI code, don't you think?
>
> Yes but at least we can call it from a sensible place (well, sensible,
> most likely from an initcall given how idle drivers are initialized).

Okay.

>
> > > > +     strncpy(idle_state->name, to_of_node(state->fwnode)->name,
> > > > +             CPUIDLE_NAME_LEN - 1);
> > > > +     strncpy(idle_state->desc, to_of_node(state->fwnode)->name,
> > > > +             CPUIDLE_NAME_LEN - 1);
> > > > +}
> > > > +
> > > > +static bool psci_pd_is_provider(struct device_node *np)
> > > > +{
> > > > +     struct psci_pd_provider *pd_prov, *it;
> > > > +
> > > > +     list_for_each_entry_safe(pd_prov, it, &psci_pd_providers, link) {
> > > > +             if (pd_prov->node == np)
> > > > +                     return true;
> > > > +     }
> > > > +
> > > > +     return false;
> > > > +}
> > > > +
> > > >  static int psci_pd_init(struct device_node *np)
> > > >  {
> > > >       struct generic_pm_domain *pd;
> > > > @@ -265,4 +316,71 @@ int psci_dt_init_pm_domains(struct device_node *np)
> > > >       pr_err("failed to create CPU PM domains ret=%d\n", ret);
> > > >       return ret;
> > > >  }
> > > > +
> > > > +int psci_dt_pm_domains_parse_states(struct cpuidle_driver *drv,
> > > > +                     struct device_node *cpu_node, u32 *psci_states)
> > > > +{
> > > > +     struct genpd_power_state *pd_states;
> > > > +     struct of_phandle_args args;
> > > > +     int ret, pd_state_count, i, state_idx, psci_idx;
> > > > +     u32 cpu_psci_state = psci_states[drv->state_count - 2];
> > >
> > > This (-2) is very dodgy and I doubt it would work on hierarchies going
> > > above "cluster" level.
> > >
> > > As I say above, I think we should work towards a single array of
> > > idle states to be selected by a CPUidle governor using genPD
> > > runtime information to bias the results according to the number
> > > of CPUs in a genPD that entered/exit idle.
> > >
> > > To be more precise, all idles states should be "domain-idle-state"
> > > compatible, even the CPU ones, the distinction between what CPUidle
> > > and genPD manage is a bit stretched IMO in this patchset.
> > >
> > > We will have a chance to talk about this but I thought I would
> > > comment publically if anyone else is willing to chime in, this
> > > is not a PSCI problem at all, it is a CPUidle/genPD coexistence
> > > design problem which is much broader.
> >
> > To move this forward, I think we need to move from vague ideas to
> > clear and distinct plans. Whatever that means. :-)
>
> See above.
>
> > I understand you are concerned about the level of changes introduced
> > to the PSCI code. As I stated somewhere in earlier replies, I picked
> > that approach as I thought it was better to implement things in a PSCI
> > specific manner to start with, then we could move things around, when
> > we realize that it make sense.
>
> I am also concerned about how the idle states are managed in
> this patchset and I am pretty certain it will break when we
> move away from a simple hierarchy with one CPU state and one
> cluster state, we will comment on the specifics.

The intent with the series is that this should be supported, no matter
of the number of states or the level of hierarchy.

Well, there are some limitations/bugs in genpd (and the genpd
governor) to support a greater level than 2, but that is on my TODO
list to fix. Again, some of my slides from OSPM Pisa explains this.

More importantly, the current deployment to PSCI should remain
unchanged after this series (unless there is a bug somewhere, as also
was pointed out by Sudeep in another reply).

>
> Moving PSCI code into a CPUidle driver will cater for the rest.

Great news, we seems to have a plan!

>
> > Anyway, as a suggestion to address your concern, how about this:
> >
> > 1. Move some things out to a PSCI cpuidle driver. We need to decide
> > more exactly on what to move and find the right level for the
> > interfaces.
>
> I will do it and post patches asap.

Okay, so I will wait for you to converting the cpuidle-arm driver into
a cpuidle-psci driver (and all the changes that comes with it) and
then base my re-base my series on top.

Then, would you mind sharing (even in an early phase) a
branch/git-tree so I can start re-basing my series on top?

>
> > 2. Don't attach the CPU to the PM domain topology in case the PSCI PC
> > mode is used. I think this makes it easier, at least as a first step,
> > to understand when runtime PM needs to be used/enabled.
>
> In the PSCI CPUidle driver we can have two distinct struct
> cpuidle_state->enter functions for PC and OSI, no overhead
> for PC, runtime PM for OSI, decoupling done.

Good idea!

>
> We can choose one or the other depending on whether:
>
> OSI iff:
> - OSI is available
> - hierarchical idle states are present in DT
>
> otherwise PC.
>
> That's what this patch does but we will do it in a unified file.

Sure, it makes sense.

>
> > 3. Would it help if I volunteer to help you guys as a maintainer for
> > PSCI. At least for the part of the new code that becomes introduced?
>
> We will do as described above if that makes sense.

Yep, I am okay with your suggestions, assuming I have understood them correctly.

BTW, have you considered to host a git tree for PSCI so we can have
changes pre-integrated and tested in Stephen Rothwell's linux-next
tree?

Kind regards
Uffe
Lorenzo Pieralisi July 18, 2019, 1:36 p.m. UTC | #5
On Thu, Jul 18, 2019 at 01:43:44PM +0200, Ulf Hansson wrote:

[...]

> > > Anyway, as a suggestion to address your concern, how about this:
> > >
> > > 1. Move some things out to a PSCI cpuidle driver. We need to decide
> > > more exactly on what to move and find the right level for the
> > > interfaces.
> >
> > I will do it and post patches asap.
> 
> Okay, so I will wait for you to converting the cpuidle-arm driver into
> a cpuidle-psci driver (and all the changes that comes with it) and
> then base my re-base my series on top.
> 
> Then, would you mind sharing (even in an early phase) a
> branch/git-tree so I can start re-basing my series on top?

Sure, I should be able to post at -rc1 and will publish a branch
here [1].

> > > 2. Don't attach the CPU to the PM domain topology in case the PSCI PC
> > > mode is used. I think this makes it easier, at least as a first step,
> > > to understand when runtime PM needs to be used/enabled.
> >
> > In the PSCI CPUidle driver we can have two distinct struct
> > cpuidle_state->enter functions for PC and OSI, no overhead
> > for PC, runtime PM for OSI, decoupling done.
> 
> Good idea!
> 
> >
> > We can choose one or the other depending on whether:
> >
> > OSI iff:
> > - OSI is available
> > - hierarchical idle states are present in DT
> >
> > otherwise PC.
> >
> > That's what this patch does but we will do it in a unified file.
> 
> Sure, it makes sense.
> 
> >
> > > 3. Would it help if I volunteer to help you guys as a maintainer for
> > > PSCI. At least for the part of the new code that becomes introduced?
> >
> > We will do as described above if that makes sense.
> 
> Yep, I am okay with your suggestions, assuming I have understood them correctly.
> 
> BTW, have you considered to host a git tree for PSCI so we can have
> changes pre-integrated and tested in Stephen Rothwell's linux-next
> tree?

I will ask Stephen to pull when needed a branch in the tree below[1]

[1] https://git.kernel.org/pub/scm/linux/kernel/git/lpieralisi/linux.git/
diff mbox series

Patch

diff --git a/drivers/firmware/psci/psci.h b/drivers/firmware/psci/psci.h
index 00d2e3dcef49..c36e0e6649e9 100644
--- a/drivers/firmware/psci/psci.h
+++ b/drivers/firmware/psci/psci.h
@@ -3,6 +3,7 @@ 
 #ifndef __PSCI_H
 #define __PSCI_H
 
+struct cpuidle_driver;
 struct device_node;
 
 int psci_set_osi_mode(void);
@@ -13,8 +14,12 @@  void psci_set_domain_state(u32 state);
 int psci_dt_parse_state_node(struct device_node *np, u32 *state);
 #ifdef CONFIG_PM_GENERIC_DOMAINS_OF
 int psci_dt_init_pm_domains(struct device_node *np);
+int psci_dt_pm_domains_parse_states(struct cpuidle_driver *drv,
+		struct device_node *cpu_node, u32 *psci_states);
 #else
 static inline int psci_dt_init_pm_domains(struct device_node *np) { return 0; }
+static inline int psci_dt_pm_domains_parse_states(struct cpuidle_driver *drv,
+		struct device_node *cpu_node, u32 *psci_states) { return 0; }
 #endif
 #endif
 
diff --git a/drivers/firmware/psci/psci_pm_domain.c b/drivers/firmware/psci/psci_pm_domain.c
index 3c6ca846caf4..3aa645dba81b 100644
--- a/drivers/firmware/psci/psci_pm_domain.c
+++ b/drivers/firmware/psci/psci_pm_domain.c
@@ -14,6 +14,10 @@ 
 #include <linux/pm_domain.h>
 #include <linux/slab.h>
 #include <linux/string.h>
+#include <linux/cpuidle.h>
+#include <linux/cpu_pm.h>
+
+#include <asm/cpuidle.h>
 
 #include "psci.h"
 
@@ -104,6 +108,53 @@  static void psci_pd_free_states(struct genpd_power_state *states,
 	kfree(states);
 }
 
+static int psci_pd_enter_pc(struct cpuidle_device *dev,
+			struct cpuidle_driver *drv, int idx)
+{
+	return CPU_PM_CPU_IDLE_ENTER(arm_cpuidle_suspend, idx);
+}
+
+static void psci_pd_enter_s2idle_pc(struct cpuidle_device *dev,
+			struct cpuidle_driver *drv, int idx)
+{
+	psci_pd_enter_pc(dev, drv, idx);
+}
+
+static void psci_pd_convert_states(struct cpuidle_state *idle_state,
+			u32 *psci_state, struct genpd_power_state *state)
+{
+	u32 *state_data = state->data;
+	u64 target_residency_us = state->residency_ns;
+	u64 exit_latency_us = state->power_on_latency_ns +
+			state->power_off_latency_ns;
+
+	*psci_state = *state_data;
+	do_div(target_residency_us, 1000);
+	idle_state->target_residency = target_residency_us;
+	do_div(exit_latency_us, 1000);
+	idle_state->exit_latency = exit_latency_us;
+	idle_state->enter = &psci_pd_enter_pc;
+	idle_state->enter_s2idle = &psci_pd_enter_s2idle_pc;
+	idle_state->flags |= CPUIDLE_FLAG_TIMER_STOP;
+
+	strncpy(idle_state->name, to_of_node(state->fwnode)->name,
+		CPUIDLE_NAME_LEN - 1);
+	strncpy(idle_state->desc, to_of_node(state->fwnode)->name,
+		CPUIDLE_NAME_LEN - 1);
+}
+
+static bool psci_pd_is_provider(struct device_node *np)
+{
+	struct psci_pd_provider *pd_prov, *it;
+
+	list_for_each_entry_safe(pd_prov, it, &psci_pd_providers, link) {
+		if (pd_prov->node == np)
+			return true;
+	}
+
+	return false;
+}
+
 static int psci_pd_init(struct device_node *np)
 {
 	struct generic_pm_domain *pd;
@@ -265,4 +316,71 @@  int psci_dt_init_pm_domains(struct device_node *np)
 	pr_err("failed to create CPU PM domains ret=%d\n", ret);
 	return ret;
 }
+
+int psci_dt_pm_domains_parse_states(struct cpuidle_driver *drv,
+			struct device_node *cpu_node, u32 *psci_states)
+{
+	struct genpd_power_state *pd_states;
+	struct of_phandle_args args;
+	int ret, pd_state_count, i, state_idx, psci_idx;
+	u32 cpu_psci_state = psci_states[drv->state_count - 2];
+	struct device_node *np = of_node_get(cpu_node);
+
+
+	/* Walk the CPU topology to find compatible domain idle states. */
+	while (np) {
+		ret = of_parse_phandle_with_args(np, "power-domains",
+					"#power-domain-cells", 0, &args);
+		of_node_put(np);
+		if (ret)
+			return 0;
+
+		np = args.np;
+
+		/* Verify that the node represents a psci pd provider. */
+		if (!psci_pd_is_provider(np)) {
+			of_node_put(np);
+			return 0;
+		}
+
+		/* Parse for compatible domain idle states. */
+		ret = psci_pd_parse_states(np, &pd_states, &pd_state_count);
+		if (ret) {
+			of_node_put(np);
+			return ret;
+		}
+
+		i = 0;
+		while (i < pd_state_count) {
+
+			state_idx = drv->state_count;
+			if (state_idx >= CPUIDLE_STATE_MAX) {
+				pr_warn("exceeding max cpuidle states\n");
+				of_node_put(np);
+				return 0;
+			}
+
+			/* WFI state is not part of psci_states. */
+			psci_idx = state_idx - 1 + i;
+			psci_pd_convert_states(&drv->states[state_idx + i],
+					&psci_states[psci_idx], &pd_states[i]);
+
+			/*
+			 * In the hierarchical CPU topology the master PM domain
+			 * idle state's DT property, "arm,psci-suspend-param",
+			 * don't contain the bits for the idle state of the CPU,
+			 * let's add those here.
+			 */
+			psci_states[psci_idx] |= cpu_psci_state;
+			pr_debug("psci-power-state %#x index %d\n",
+				psci_states[psci_idx], psci_idx);
+
+			drv->state_count++;
+			i++;
+		}
+		psci_pd_free_states(pd_states, pd_state_count);
+	}
+
+	return 0;
+}
 #endif