diff mbox series

[v3,10/13] cpuidle: psci: Prepare to use OS initiated suspend mode via PM domains

Message ID 20191127102914.18729-11-ulf.hansson@linaro.org (mailing list archive)
State New, archived
Headers show
Series cpuidle: psci: Support hierarchical CPU arrangement | expand

Commit Message

Ulf Hansson Nov. 27, 2019, 10:29 a.m. UTC
The per CPU variable psci_power_state, contains an array of fixed values,
which reflects the corresponding arm,psci-suspend-param parsed from DT, for
each of the available CPU idle states.

This isn't sufficient when using the hierarchical CPU topology in DT, in
combination with having PSCI OS initiated (OSI) mode enabled. More
precisely, in OSI mode, Linux is responsible of telling the PSCI FW what
idle state the cluster (a group of CPUs) should enter, while in PSCI
Platform Coordinated (PC) mode, each CPU independently votes for an idle
state of the cluster.

For this reason, introduce a per CPU variable called domain_state and
implement two helper functions to read/write its value. Then let the
domain_state take precedence over the regular selected state, when entering
and idle state.

To avoid executing the above OSI specific code in the ->enter() callback,
while operating in the default PSCI Platform Coordinated mode, let's also
add a new enter-function and use it for OSI.

Co-developed-by: Lina Iyer <lina.iyer@linaro.org>
Signed-off-by: Lina Iyer <lina.iyer@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---

Changes in v3:
	- Avoid executing any OSI specific code in psci_enter_idle_state(),
	while operating in the default PSCI Platform Coordinated mode.

---
 drivers/cpuidle/cpuidle-psci.c | 52 ++++++++++++++++++++++++++++++----
 1 file changed, 46 insertions(+), 6 deletions(-)

Comments

Lorenzo Pieralisi Dec. 5, 2019, 6:35 p.m. UTC | #1
On Wed, Nov 27, 2019 at 11:29:11AM +0100, Ulf Hansson wrote:

[...]

> -static int __init psci_dt_cpu_init_idle(struct device_node *cpu_node,
> +static int __init psci_dt_cpu_init_idle(struct cpuidle_driver *drv,
> +					struct device_node *cpu_node,
>  					unsigned int state_count, int cpu)
>  {
>  	int i, ret = 0;
> @@ -118,6 +152,11 @@ static int __init psci_dt_cpu_init_idle(struct device_node *cpu_node,
>  		goto free_mem;
>  	}
>  
> +	/* Manage the deepest state via a dedicated enter-function. */
> +	if (dev)
> +		drv->states[state_count - 1].enter =
> +			psci_enter_domain_idle_state;


It is unfortunate to make this arbitrary choice, it would be best
if you could detect which states are "domain" states aka are governed
by multiple cpus.

This inizialization though does not belong in here, it is done at driver
level, it should not be done in this per-cpu path. IIUC the logic the
enter pointer should only be overridden if and only if all cpus managed
by the driver have a corresponding device associated.

To be frank I would even move the psci_has_osi_support() check from
psci_dt_attach_cpu() to this path and prevent calling
psci_dt_attach_cpu() and the tail of the function if
(!psci_has_osi_support()).

>  	data->dev = dev;

I think Sudeep already mentioned that, by using psci_has_osi_support()
as above you can prevent running this code, there is really no point,
the data->dev NULL sentinel is already initialized.

Lorenzo

>  	/* Idle states parsed correctly, store them in the per-cpu struct. */
> @@ -129,7 +168,8 @@ static int __init psci_dt_cpu_init_idle(struct device_node *cpu_node,
>  	return ret;
>  }
>  
> -static __init int psci_cpu_init_idle(unsigned int cpu, unsigned int state_count)
> +static __init int psci_cpu_init_idle(struct cpuidle_driver *drv,
> +				     unsigned int cpu, unsigned int state_count)
>  {
>  	struct device_node *cpu_node;
>  	int ret;
> @@ -145,7 +185,7 @@ static __init int psci_cpu_init_idle(unsigned int cpu, unsigned int state_count)
>  	if (!cpu_node)
>  		return -ENODEV;
>  
> -	ret = psci_dt_cpu_init_idle(cpu_node, state_count, cpu);
> +	ret = psci_dt_cpu_init_idle(drv, cpu_node, state_count, cpu);
>  
>  	of_node_put(cpu_node);
>  
> @@ -201,7 +241,7 @@ static int __init psci_idle_init_cpu(int cpu)
>  	/*
>  	 * Initialize PSCI idle states.
>  	 */
> -	ret = psci_cpu_init_idle(cpu, ret);
> +	ret = psci_cpu_init_idle(drv, cpu, ret);
>  	if (ret) {
>  		pr_err("CPU %d failed to PSCI idle\n", cpu);
>  		goto out_kfree_drv;
> -- 
> 2.17.1
>
Ulf Hansson Dec. 5, 2019, 8:25 p.m. UTC | #2
On Thu, 5 Dec 2019 at 19:35, Lorenzo Pieralisi
<lorenzo.pieralisi@arm.com> wrote:
>
> On Wed, Nov 27, 2019 at 11:29:11AM +0100, Ulf Hansson wrote:
>
> [...]
>
> > -static int __init psci_dt_cpu_init_idle(struct device_node *cpu_node,
> > +static int __init psci_dt_cpu_init_idle(struct cpuidle_driver *drv,
> > +                                     struct device_node *cpu_node,
> >                                       unsigned int state_count, int cpu)
> >  {
> >       int i, ret = 0;
> > @@ -118,6 +152,11 @@ static int __init psci_dt_cpu_init_idle(struct device_node *cpu_node,
> >               goto free_mem;
> >       }
> >
> > +     /* Manage the deepest state via a dedicated enter-function. */
> > +     if (dev)
> > +             drv->states[state_count - 1].enter =
> > +                     psci_enter_domain_idle_state;
>
>
> It is unfortunate to make this arbitrary choice, it would be best
> if you could detect which states are "domain" states aka are governed
> by multiple cpus.

The domain states are managed and selected by the genpd providers, via
using runtime PM reference counting. Please have a closer look at the
code in cpuidle-psci-domain.c and in the generic PM domain, that
should give you the needed details.

I am overriding the enter callback for the *deepest* known idle state
of the CPU, which is according to what you requested [1].

So, unless I am missing your point, I think the above code does
exactly what you want, no?

In regards to the "arbitrary choice" of what cpuidle state to use,
there are more details about why that is, in the changelog.

>
> This inizialization though does not belong in here, it is done at driver
> level, it should not be done in this per-cpu path. IIUC the logic the
> enter pointer should only be overridden if and only if all cpus managed
> by the driver have a corresponding device associated.

I think you have overlooked the fact that there are one cpuidle driver
registered per CPU. The above doesn't make sense to me, sorry.

>
> To be frank I would even move the psci_has_osi_support() check from
> psci_dt_attach_cpu() to this path and prevent calling
> psci_dt_attach_cpu() and the tail of the function if
> (!psci_has_osi_support()).
>
> >       data->dev = dev;
>
> I think Sudeep already mentioned that, by using psci_has_osi_support()
> as above you can prevent running this code, there is really no point,
> the data->dev NULL sentinel is already initialized.

Yes, I discussed this with Sudeep, but we didn't reach a consensus.
Let me explain the reasons behind the selected approach, once more.

The data->dev is a pointer within a static declared struct. Are you
sure it's assigned NULL by initialization? Don't we explicitly need to
set it to NULL, else it will be undefined, no?

Of course, I can move the check for psci_has_osi_support() into here
and avoid calling psci_dt_attach_cpu(). Just wondering what that
actually gain us, especially if we need to explicitly set the pointer
to NULL anyway.

That said, can you please confirm your thoughts around this, I will
change to whatever you think is best.

[...]

Kind regards
Uffe

[1] https://www.spinics.net/lists/arm-kernel/msg770558.html
Ulf Hansson Dec. 5, 2019, 8:38 p.m. UTC | #3
On Thu, 5 Dec 2019 at 21:25, Ulf Hansson <ulf.hansson@linaro.org> wrote:
>
> On Thu, 5 Dec 2019 at 19:35, Lorenzo Pieralisi
> <lorenzo.pieralisi@arm.com> wrote:
> >
> > On Wed, Nov 27, 2019 at 11:29:11AM +0100, Ulf Hansson wrote:
> >
> > [...]
> >
> > > -static int __init psci_dt_cpu_init_idle(struct device_node *cpu_node,
> > > +static int __init psci_dt_cpu_init_idle(struct cpuidle_driver *drv,
> > > +                                     struct device_node *cpu_node,
> > >                                       unsigned int state_count, int cpu)
> > >  {
> > >       int i, ret = 0;
> > > @@ -118,6 +152,11 @@ static int __init psci_dt_cpu_init_idle(struct device_node *cpu_node,
> > >               goto free_mem;
> > >       }
> > >
> > > +     /* Manage the deepest state via a dedicated enter-function. */
> > > +     if (dev)
> > > +             drv->states[state_count - 1].enter =
> > > +                     psci_enter_domain_idle_state;
> >
> >
> > It is unfortunate to make this arbitrary choice, it would be best
> > if you could detect which states are "domain" states aka are governed
> > by multiple cpus.
>
> The domain states are managed and selected by the genpd providers, via
> using runtime PM reference counting. Please have a closer look at the
> code in cpuidle-psci-domain.c and in the generic PM domain, that
> should give you the needed details.
>
> I am overriding the enter callback for the *deepest* known idle state
> of the CPU, which is according to what you requested [1].
>
> So, unless I am missing your point, I think the above code does
> exactly what you want, no?
>
> In regards to the "arbitrary choice" of what cpuidle state to use,
> there are more details about why that is, in the changelog.

Correction: Since I have moved patches around, I realized that the
explanation is actually put in the changelog of patch11.

For clarity, let me cut and paste it here as well:

"The triggering point for when runtime PM reference counting should be done,
has been selected to the deepest idle state for the CPU. However, from the
hierarchical point view, there may be good reasons to do runtime PM
reference counting even on shallower idle states, but at this point this
isn't supported, mainly due to limitations set by the generic PM domain."

Is that good enough or you want some of this information also in the
changelog of $subject patch? Or if you have any other idea for how to
make this more clear?

[...]

Kind regards
Uffe
Lorenzo Pieralisi Dec. 6, 2019, 11:25 a.m. UTC | #4
On Thu, Dec 05, 2019 at 09:25:54PM +0100, Ulf Hansson wrote:
> On Thu, 5 Dec 2019 at 19:35, Lorenzo Pieralisi
> <lorenzo.pieralisi@arm.com> wrote:
> >
> > On Wed, Nov 27, 2019 at 11:29:11AM +0100, Ulf Hansson wrote:
> >
> > [...]
> >
> > > -static int __init psci_dt_cpu_init_idle(struct device_node *cpu_node,
> > > +static int __init psci_dt_cpu_init_idle(struct cpuidle_driver *drv,
> > > +                                     struct device_node *cpu_node,
> > >                                       unsigned int state_count, int cpu)
> > >  {
> > >       int i, ret = 0;
> > > @@ -118,6 +152,11 @@ static int __init psci_dt_cpu_init_idle(struct device_node *cpu_node,
> > >               goto free_mem;
> > >       }
> > >
> > > +     /* Manage the deepest state via a dedicated enter-function. */
> > > +     if (dev)
> > > +             drv->states[state_count - 1].enter =
> > > +                     psci_enter_domain_idle_state;
> >
> >
> > It is unfortunate to make this arbitrary choice, it would be best
> > if you could detect which states are "domain" states aka are governed
> > by multiple cpus.
> 
> The domain states are managed and selected by the genpd providers, via
> using runtime PM reference counting. Please have a closer look at the
> code in cpuidle-psci-domain.c and in the generic PM domain, that
> should give you the needed details.
> 
> I am overriding the enter callback for the *deepest* known idle state
> of the CPU, which is according to what you requested [1].

Overriding it yes but I have not requested to do it only for the
deepest idle state that, I repeat, in my opinion is an arbitrary
choice that works for the platform you are testing on but is
not generic as it should.

You can merge it as it is but that's how things stand and adding
a comment to the *code* would help understand its logic.

> So, unless I am missing your point, I think the above code does
> exactly what you want, no?
> 
> In regards to the "arbitrary choice" of what cpuidle state to use,
> there are more details about why that is, in the changelog.
> 
> >
> > This inizialization though does not belong in here, it is done at driver
> > level, it should not be done in this per-cpu path. IIUC the logic the
> > enter pointer should only be overridden if and only if all cpus managed
> > by the driver have a corresponding device associated.
> 
> I think you have overlooked the fact that there are one cpuidle driver
> registered per CPU. The above doesn't make sense to me, sorry.

You are calling psci_dt_cpu_init_idle() for every possibile cpu.

Every time psci_dt_attach_cpu() is called, we check dev and override
the idle driver enter method. There is one driver, what I am saying
is that it is not correct to check dev and override the enter pointer
for *every* cpu that we try to attach to a power domain. This must
be done once for all by checking that *all* devices could be attached
to a power domain.

> > To be frank I would even move the psci_has_osi_support() check from
> > psci_dt_attach_cpu() to this path and prevent calling
> > psci_dt_attach_cpu() and the tail of the function if
> > (!psci_has_osi_support()).
> >
> > >       data->dev = dev;
> >
> > I think Sudeep already mentioned that, by using psci_has_osi_support()
> > as above you can prevent running this code, there is really no point,
> > the data->dev NULL sentinel is already initialized.
> 
> Yes, I discussed this with Sudeep, but we didn't reach a consensus.

Consensus was already reached.

http://www.open-std.org/jtc1/sc22/WG14/www/docs/n1570.pdf

> Let me explain the reasons behind the selected approach, once more.
> 
> The data->dev is a pointer within a static declared struct. Are you
> sure it's assigned NULL by initialization? Don't we explicitly need to
> set it to NULL, else it will be undefined, no?

http://www.open-std.org/jtc1/sc22/WG14/www/docs/n1570.pdf

6.7.9 (10) page 140

> Of course, I can move the check for psci_has_osi_support() into here
> and avoid calling psci_dt_attach_cpu(). Just wondering what that
> actually gain us, especially if we need to explicitly set the pointer
> to NULL anyway.

See above.

Thanks,
Lorenzo
Ulf Hansson Dec. 6, 2019, 2:26 p.m. UTC | #5
On Fri, 6 Dec 2019 at 12:25, Lorenzo Pieralisi
<lorenzo.pieralisi@arm.com> wrote:
>
> On Thu, Dec 05, 2019 at 09:25:54PM +0100, Ulf Hansson wrote:
> > On Thu, 5 Dec 2019 at 19:35, Lorenzo Pieralisi
> > <lorenzo.pieralisi@arm.com> wrote:
> > >
> > > On Wed, Nov 27, 2019 at 11:29:11AM +0100, Ulf Hansson wrote:
> > >
> > > [...]
> > >
> > > > -static int __init psci_dt_cpu_init_idle(struct device_node *cpu_node,
> > > > +static int __init psci_dt_cpu_init_idle(struct cpuidle_driver *drv,
> > > > +                                     struct device_node *cpu_node,
> > > >                                       unsigned int state_count, int cpu)
> > > >  {
> > > >       int i, ret = 0;
> > > > @@ -118,6 +152,11 @@ static int __init psci_dt_cpu_init_idle(struct device_node *cpu_node,
> > > >               goto free_mem;
> > > >       }
> > > >
> > > > +     /* Manage the deepest state via a dedicated enter-function. */
> > > > +     if (dev)
> > > > +             drv->states[state_count - 1].enter =
> > > > +                     psci_enter_domain_idle_state;
> > >
> > >
> > > It is unfortunate to make this arbitrary choice, it would be best
> > > if you could detect which states are "domain" states aka are governed
> > > by multiple cpus.
> >
> > The domain states are managed and selected by the genpd providers, via
> > using runtime PM reference counting. Please have a closer look at the
> > code in cpuidle-psci-domain.c and in the generic PM domain, that
> > should give you the needed details.
> >
> > I am overriding the enter callback for the *deepest* known idle state
> > of the CPU, which is according to what you requested [1].
>
> Overriding it yes but I have not requested to do it only for the
> deepest idle state that, I repeat, in my opinion is an arbitrary
> choice that works for the platform you are testing on but is
> not generic as it should.

Right, I agree. I recall we have discussed this already.

>
> You can merge it as it is but that's how things stand and adding
> a comment to the *code* would help understand its logic.

Okay, how about adding a comment along the lines of this:

"Using the deepest state for the CPU to trigger a potential selection
of a shared state for the domain, assumes the domain states are all
deeper states. This assumption may not be true for all platforms, thus
we may consider to revisit this, if it turns out that optimizations
can be made."

>
> > So, unless I am missing your point, I think the above code does
> > exactly what you want, no?
> >
> > In regards to the "arbitrary choice" of what cpuidle state to use,
> > there are more details about why that is, in the changelog.
> >
> > >
> > > This inizialization though does not belong in here, it is done at driver
> > > level, it should not be done in this per-cpu path. IIUC the logic the
> > > enter pointer should only be overridden if and only if all cpus managed
> > > by the driver have a corresponding device associated.
> >
> > I think you have overlooked the fact that there are one cpuidle driver
> > registered per CPU. The above doesn't make sense to me, sorry.
>
> You are calling psci_dt_cpu_init_idle() for every possibile cpu.
>
> Every time psci_dt_attach_cpu() is called, we check dev and override
> the idle driver enter method. There is one driver, what I am saying
> is that it is not correct to check dev and override the enter pointer
> for *every* cpu that we try to attach to a power domain. This must
> be done once for all by checking that *all* devices could be attached
> to a power domain.

Ah, now I think get your point.

You want me to re-iterate through all the registered cpuidle drivers,
which means one per CPU - and then override the enter callback for
each of them, but only if all devices was successfully attached to a
PM domain. Is that correct?

My only worries with this, is that we have already registered the
cpuidle drivers and I don't think it's a good idea to update the enter
callbacks, beyond that point.

Perhaps another option is to track whether the first CPU gets attached
(and then update the enter callback), but after that require all the
remaining CPUs to be attached as well - else bail out with an error
code, failing to register all the driver instances.

What do you think about that?

>
> > > To be frank I would even move the psci_has_osi_support() check from
> > > psci_dt_attach_cpu() to this path and prevent calling
> > > psci_dt_attach_cpu() and the tail of the function if
> > > (!psci_has_osi_support()).
> > >
> > > >       data->dev = dev;
> > >
> > > I think Sudeep already mentioned that, by using psci_has_osi_support()
> > > as above you can prevent running this code, there is really no point,
> > > the data->dev NULL sentinel is already initialized.
> >
> > Yes, I discussed this with Sudeep, but we didn't reach a consensus.
>
> Consensus was already reached.
>
> http://www.open-std.org/jtc1/sc22/WG14/www/docs/n1570.pdf
>
> > Let me explain the reasons behind the selected approach, once more.
> >
> > The data->dev is a pointer within a static declared struct. Are you
> > sure it's assigned NULL by initialization? Don't we explicitly need to
> > set it to NULL, else it will be undefined, no?
>
> http://www.open-std.org/jtc1/sc22/WG14/www/docs/n1570.pdf
>
> 6.7.9 (10) page 140

Thanks for sharing, didn't know we could rely on this behaviour! Alright!

>
> > Of course, I can move the check for psci_has_osi_support() into here
> > and avoid calling psci_dt_attach_cpu(). Just wondering what that
> > actually gain us, especially if we need to explicitly set the pointer
> > to NULL anyway.
>
> See above.

Yes, makes more sense now. I will adopt your suggestions!

Kind regards
Uffe
Lorenzo Pieralisi Dec. 6, 2019, 3:14 p.m. UTC | #6
On Fri, Dec 06, 2019 at 03:26:16PM +0100, Ulf Hansson wrote:

[...]

> > You can merge it as it is but that's how things stand and adding
> > a comment to the *code* would help understand its logic.
> 
> Okay, how about adding a comment along the lines of this:
> 
> "Using the deepest state for the CPU to trigger a potential selection
> of a shared state for the domain, assumes the domain states are all
> deeper states".

Just this it should be fine (I trimmed it a bit).

> > > So, unless I am missing your point, I think the above code does
> > > exactly what you want, no?
> > >
> > > In regards to the "arbitrary choice" of what cpuidle state to use,
> > > there are more details about why that is, in the changelog.
> > >
> > > >
> > > > This inizialization though does not belong in here, it is done at driver
> > > > level, it should not be done in this per-cpu path. IIUC the logic the
> > > > enter pointer should only be overridden if and only if all cpus managed
> > > > by the driver have a corresponding device associated.
> > >
> > > I think you have overlooked the fact that there are one cpuidle driver
> > > registered per CPU. The above doesn't make sense to me, sorry.
> >
> > You are calling psci_dt_cpu_init_idle() for every possibile cpu.
> >
> > Every time psci_dt_attach_cpu() is called, we check dev and override
> > the idle driver enter method. There is one driver, what I am saying
> > is that it is not correct to check dev and override the enter pointer
> > for *every* cpu that we try to attach to a power domain. This must
> > be done once for all by checking that *all* devices could be attached
> > to a power domain.
> 
> Ah, now I think get your point.
> 
> You want me to re-iterate through all the registered cpuidle drivers,
> which means one per CPU - and then override the enter callback for
> each of them, but only if all devices was successfully attached to a
> PM domain. Is that correct?
> 
> My only worries with this, is that we have already registered the
> cpuidle drivers and I don't think it's a good idea to update the enter
> callbacks, beyond that point.
> 
> Perhaps another option is to track whether the first CPU gets attached
> (and then update the enter callback), but after that require all the
> remaining CPUs to be attached as well - else bail out with an error
> code, failing to register all the driver instances.
> 
> What do you think about that?

I was confused - now we have one cpuidle driver per cpu so this
comment was bogus from this perspective (I was still reasoning
wit a *single* cpuidle driver across cpus. Apologies).

Sudeep will follow up on this but please forget this specific
comment - I was wrong.

Thanks,
Lorenzo
Ulf Hansson Dec. 6, 2019, 5:23 p.m. UTC | #7
On Fri, 6 Dec 2019 at 16:14, Lorenzo Pieralisi
<lorenzo.pieralisi@arm.com> wrote:
>
> On Fri, Dec 06, 2019 at 03:26:16PM +0100, Ulf Hansson wrote:
>
> [...]
>
> > > You can merge it as it is but that's how things stand and adding
> > > a comment to the *code* would help understand its logic.
> >
> > Okay, how about adding a comment along the lines of this:
> >
> > "Using the deepest state for the CPU to trigger a potential selection
> > of a shared state for the domain, assumes the domain states are all
> > deeper states".
>
> Just this it should be fine (I trimmed it a bit).

Great, I add that!

>
> > > > So, unless I am missing your point, I think the above code does
> > > > exactly what you want, no?
> > > >
> > > > In regards to the "arbitrary choice" of what cpuidle state to use,
> > > > there are more details about why that is, in the changelog.
> > > >
> > > > >
> > > > > This inizialization though does not belong in here, it is done at driver
> > > > > level, it should not be done in this per-cpu path. IIUC the logic the
> > > > > enter pointer should only be overridden if and only if all cpus managed
> > > > > by the driver have a corresponding device associated.
> > > >
> > > > I think you have overlooked the fact that there are one cpuidle driver
> > > > registered per CPU. The above doesn't make sense to me, sorry.
> > >
> > > You are calling psci_dt_cpu_init_idle() for every possibile cpu.
> > >
> > > Every time psci_dt_attach_cpu() is called, we check dev and override
> > > the idle driver enter method. There is one driver, what I am saying
> > > is that it is not correct to check dev and override the enter pointer
> > > for *every* cpu that we try to attach to a power domain. This must
> > > be done once for all by checking that *all* devices could be attached
> > > to a power domain.
> >
> > Ah, now I think get your point.
> >
> > You want me to re-iterate through all the registered cpuidle drivers,
> > which means one per CPU - and then override the enter callback for
> > each of them, but only if all devices was successfully attached to a
> > PM domain. Is that correct?
> >
> > My only worries with this, is that we have already registered the
> > cpuidle drivers and I don't think it's a good idea to update the enter
> > callbacks, beyond that point.
> >
> > Perhaps another option is to track whether the first CPU gets attached
> > (and then update the enter callback), but after that require all the
> > remaining CPUs to be attached as well - else bail out with an error
> > code, failing to register all the driver instances.
> >
> > What do you think about that?
>
> I was confused - now we have one cpuidle driver per cpu so this
> comment was bogus from this perspective (I was still reasoning
> wit a *single* cpuidle driver across cpus. Apologies).

No worries!

We agreed on the way forward, so I am happy. :-)

>
> Sudeep will follow up on this but please forget this specific
> comment - I was wrong.

Alright, thanks!

Does that also mean I can add your ack for the rest of the patches in
the series (besides the last hotplug patch) or is there any other
issues you want to raise?

Have a nice weekend!

Kind regards
Uffe
diff mbox series

Patch

diff --git a/drivers/cpuidle/cpuidle-psci.c b/drivers/cpuidle/cpuidle-psci.c
index 167249d0493f..fd664e134c3f 100644
--- a/drivers/cpuidle/cpuidle-psci.c
+++ b/drivers/cpuidle/cpuidle-psci.c
@@ -29,14 +29,47 @@  struct psci_cpuidle_data {
 };
 
 static DEFINE_PER_CPU_READ_MOSTLY(struct psci_cpuidle_data, psci_cpuidle_data);
+static DEFINE_PER_CPU(u32, domain_state);
+
+static inline void psci_set_domain_state(u32 state)
+{
+	__this_cpu_write(domain_state, state);
+}
+
+static inline u32 psci_get_domain_state(void)
+{
+	return __this_cpu_read(domain_state);
+}
+
+static inline int psci_enter_state(int idx, u32 state)
+{
+	return CPU_PM_CPU_IDLE_ENTER_PARAM(psci_cpu_suspend_enter, idx, state);
+}
+
+static int psci_enter_domain_idle_state(struct cpuidle_device *dev,
+					struct cpuidle_driver *drv, int idx)
+{
+	struct psci_cpuidle_data *data = this_cpu_ptr(&psci_cpuidle_data);
+	u32 *states = data->psci_states;
+	u32 state = psci_get_domain_state();
+	int ret;
+
+	if (!state)
+		state = states[idx];
+
+	ret = psci_enter_state(idx, state);
+
+	/* Clear the domain state to start fresh when back from idle. */
+	psci_set_domain_state(0);
+	return ret;
+}
 
 static int psci_enter_idle_state(struct cpuidle_device *dev,
 				struct cpuidle_driver *drv, int idx)
 {
 	u32 *state = __this_cpu_read(psci_cpuidle_data.psci_states);
 
-	return CPU_PM_CPU_IDLE_ENTER_PARAM(psci_cpu_suspend_enter,
-					   idx, state[idx]);
+	return psci_enter_state(idx, state[idx]);
 }
 
 static struct cpuidle_driver psci_idle_driver __initdata = {
@@ -79,7 +112,8 @@  static int __init psci_dt_parse_state_node(struct device_node *np, u32 *state)
 	return 0;
 }
 
-static int __init psci_dt_cpu_init_idle(struct device_node *cpu_node,
+static int __init psci_dt_cpu_init_idle(struct cpuidle_driver *drv,
+					struct device_node *cpu_node,
 					unsigned int state_count, int cpu)
 {
 	int i, ret = 0;
@@ -118,6 +152,11 @@  static int __init psci_dt_cpu_init_idle(struct device_node *cpu_node,
 		goto free_mem;
 	}
 
+	/* Manage the deepest state via a dedicated enter-function. */
+	if (dev)
+		drv->states[state_count - 1].enter =
+			psci_enter_domain_idle_state;
+
 	data->dev = dev;
 
 	/* Idle states parsed correctly, store them in the per-cpu struct. */
@@ -129,7 +168,8 @@  static int __init psci_dt_cpu_init_idle(struct device_node *cpu_node,
 	return ret;
 }
 
-static __init int psci_cpu_init_idle(unsigned int cpu, unsigned int state_count)
+static __init int psci_cpu_init_idle(struct cpuidle_driver *drv,
+				     unsigned int cpu, unsigned int state_count)
 {
 	struct device_node *cpu_node;
 	int ret;
@@ -145,7 +185,7 @@  static __init int psci_cpu_init_idle(unsigned int cpu, unsigned int state_count)
 	if (!cpu_node)
 		return -ENODEV;
 
-	ret = psci_dt_cpu_init_idle(cpu_node, state_count, cpu);
+	ret = psci_dt_cpu_init_idle(drv, cpu_node, state_count, cpu);
 
 	of_node_put(cpu_node);
 
@@ -201,7 +241,7 @@  static int __init psci_idle_init_cpu(int cpu)
 	/*
 	 * Initialize PSCI idle states.
 	 */
-	ret = psci_cpu_init_idle(cpu, ret);
+	ret = psci_cpu_init_idle(drv, cpu, ret);
 	if (ret) {
 		pr_err("CPU %d failed to PSCI idle\n", cpu);
 		goto out_kfree_drv;