diff mbox

[v4,REPOST] ARM: vexpress/TC2: Implement MCPM power_down_finish()

Message ID 1385396185-15291-1-git-send-email-Dave.Martin@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

Dave Martin Nov. 25, 2013, 4:16 p.m. UTC
This patch implements the power_down_finish() method for TC2, to
enable the kernel to confirm when CPUs are safely powered down.

The information required for determining when a CPU is parked
cannot be obtained from any single place, so a few sources of
information must be combined:

  * mcpm_cpu_power_down() must be pending for the CPU, so that we
    don't get confused by false STANDBYWFI positives arising from
    CPUidle.  This is detected by waiting for the tc2_pm use count
    for the target CPU to reach 0.

  * Either the SPC must report that the CPU has asserted
    STANDBYWFI, or the TC2 tile's reset control logic must be
    holding the CPU in reset.

    Just checking for STANDBYWFI is not sufficient, because this
    signal is not latched when the the cluster is clamped off and
    powered down: the relevant status bits just drop to zero.  This
    means that STANDBYWFI status cannot be used for reliable
    detection of the last CPU in a cluster reaching WFI.

This patch is required in order for kexec to work with MCPM on TC2.

Signed-off-by: Dave Martin <Dave.Martin@arm.com>
Acked-by: Pawel Moll <pawel.moll@arm.com>
---

Since v3.13-rc1, mcpm backends must implement an additional method which
the vexpress TC2 implementation doesn't have upstream, resulting in an
ugly WARN_ON() during hotplug and kexec operations.

This patch has received review, but the audience for it is small, so
feedback has been limited.  Pawel has agreed that it is OK for merge,
though, as documented by the following thread:

http://archive.arm.linux.org.uk/lurker/message/20131002.144555.de22a6f8.en.html


Since there is now an interface break, it would be good if this patch
can be merged as a fix in the 3.13 cycle.  This code is specific to
vexpress TC2 and won't affect any other platform.


 arch/arm/mach-vexpress/spc.c    |   40 ++++++++++++++++++++++++
 arch/arm/mach-vexpress/spc.h    |    1 +
 arch/arm/mach-vexpress/tc2_pm.c |   66 ++++++++++++++++++++++++++++++++++++---
 3 files changed, 102 insertions(+), 5 deletions(-)

Comments

Nicolas Pitre Nov. 25, 2013, 4:44 p.m. UTC | #1
On Mon, 25 Nov 2013, Dave Martin wrote:

> This patch implements the power_down_finish() method for TC2, to
> enable the kernel to confirm when CPUs are safely powered down.
> 
> The information required for determining when a CPU is parked
> cannot be obtained from any single place, so a few sources of
> information must be combined:
> 
>   * mcpm_cpu_power_down() must be pending for the CPU, so that we
>     don't get confused by false STANDBYWFI positives arising from
>     CPUidle.  This is detected by waiting for the tc2_pm use count
>     for the target CPU to reach 0.
> 
>   * Either the SPC must report that the CPU has asserted
>     STANDBYWFI, or the TC2 tile's reset control logic must be
>     holding the CPU in reset.
> 
>     Just checking for STANDBYWFI is not sufficient, because this
>     signal is not latched when the the cluster is clamped off and
>     powered down: the relevant status bits just drop to zero.  This
>     means that STANDBYWFI status cannot be used for reliable
>     detection of the last CPU in a cluster reaching WFI.
> 
> This patch is required in order for kexec to work with MCPM on TC2.
> 
> Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> Acked-by: Pawel Moll <pawel.moll@arm.com>

I thought I provided it before, but here it is again:

Reviewed-by: Nicolas Pitre <nico@linaro.org>

> Since there is now an interface break, it would be good if this patch
> can be merged as a fix in the 3.13 cycle.  This code is specific to
> vexpress TC2 and won't affect any other platform.

I agree to this as well.




> 
> 
>  arch/arm/mach-vexpress/spc.c    |   40 ++++++++++++++++++++++++
>  arch/arm/mach-vexpress/spc.h    |    1 +
>  arch/arm/mach-vexpress/tc2_pm.c |   66 ++++++++++++++++++++++++++++++++++++---
>  3 files changed, 102 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/arm/mach-vexpress/spc.c b/arch/arm/mach-vexpress/spc.c
> index 033d34d..c26ef5b 100644
> --- a/arch/arm/mach-vexpress/spc.c
> +++ b/arch/arm/mach-vexpress/spc.c
> @@ -53,6 +53,11 @@
>  #define A15_BX_ADDR0		0x68
>  #define A7_BX_ADDR0		0x78
>  
> +/* SPC CPU/cluster reset statue */
> +#define STANDBYWFI_STAT		0x3c
> +#define STANDBYWFI_STAT_A15_CPU_MASK(cpu)	(1 << (cpu))
> +#define STANDBYWFI_STAT_A7_CPU_MASK(cpu)	(1 << (3 + (cpu)))
> +
>  /* SPC system config interface registers */
>  #define SYSCFG_WDATA		0x70
>  #define SYSCFG_RDATA		0x74
> @@ -213,6 +218,41 @@ void ve_spc_powerdown(u32 cluster, bool enable)
>  	writel_relaxed(enable, info->baseaddr + pwdrn_reg);
>  }
>  
> +static u32 standbywfi_cpu_mask(u32 cpu, u32 cluster)
> +{
> +	return cluster_is_a15(cluster) ?
> +		  STANDBYWFI_STAT_A15_CPU_MASK(cpu)
> +		: STANDBYWFI_STAT_A7_CPU_MASK(cpu);
> +}
> +
> +/**
> + * ve_spc_cpu_in_wfi(u32 cpu, u32 cluster)
> + *
> + * @cpu: mpidr[7:0] bitfield describing CPU affinity level within cluster
> + * @cluster: mpidr[15:8] bitfield describing cluster affinity level
> + *
> + * @return: non-zero if and only if the specified CPU is in WFI
> + *
> + * Take care when interpreting the result of this function: a CPU might
> + * be in WFI temporarily due to idle, and is not necessarily safely
> + * parked.
> + */
> +int ve_spc_cpu_in_wfi(u32 cpu, u32 cluster)
> +{
> +	int ret;
> +	u32 mask = standbywfi_cpu_mask(cpu, cluster);
> +
> +	if (cluster >= MAX_CLUSTERS)
> +		return 1;
> +
> +	ret = readl_relaxed(info->baseaddr + STANDBYWFI_STAT);
> +
> +	pr_debug("%s: PCFGREG[0x%X] = 0x%08X, mask = 0x%X\n",
> +		 __func__, STANDBYWFI_STAT, ret, mask);
> +
> +	return ret & mask;
> +}
> +
>  static int ve_spc_get_performance(int cluster, u32 *freq)
>  {
>  	struct ve_spc_opp *opps = info->opps[cluster];
> diff --git a/arch/arm/mach-vexpress/spc.h b/arch/arm/mach-vexpress/spc.h
> index dbd44c3..793d065 100644
> --- a/arch/arm/mach-vexpress/spc.h
> +++ b/arch/arm/mach-vexpress/spc.h
> @@ -20,5 +20,6 @@ void ve_spc_global_wakeup_irq(bool set);
>  void ve_spc_cpu_wakeup_irq(u32 cluster, u32 cpu, bool set);
>  void ve_spc_set_resume_addr(u32 cluster, u32 cpu, u32 addr);
>  void ve_spc_powerdown(u32 cluster, bool enable);
> +int ve_spc_cpu_in_wfi(u32 cpu, u32 cluster);
>  
>  #endif
> diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c
> index 05a364c..29e7785 100644
> --- a/arch/arm/mach-vexpress/tc2_pm.c
> +++ b/arch/arm/mach-vexpress/tc2_pm.c
> @@ -12,6 +12,7 @@
>   * published by the Free Software Foundation.
>   */
>  
> +#include <linux/delay.h>
>  #include <linux/init.h>
>  #include <linux/io.h>
>  #include <linux/kernel.h>
> @@ -32,11 +33,17 @@
>  #include "spc.h"
>  
>  /* SCC conf registers */
> +#define RESET_CTRL		0x018
> +#define RESET_A15_NCORERESET(cpu)	(1 << (2 + (cpu)))
> +#define RESET_A7_NCORERESET(cpu)	(1 << (16 + (cpu)))
> +
>  #define A15_CONF		0x400
>  #define A7_CONF			0x500
>  #define SYS_INFO		0x700
>  #define SPC_BASE		0xb00
>  
> +static void __iomem *scc;
> +
>  /*
>   * We can't use regular spinlocks. In the switcher case, it is possible
>   * for an outbound CPU to call power_down() after its inbound counterpart
> @@ -190,6 +197,55 @@ static void tc2_pm_power_down(void)
>  	tc2_pm_down(0);
>  }
>  
> +static int tc2_core_in_reset(unsigned int cpu, unsigned int cluster)
> +{
> +	u32 mask = cluster ?
> +		  RESET_A7_NCORERESET(cpu)
> +		: RESET_A15_NCORERESET(cpu);
> +
> +	return !(readl_relaxed(scc + RESET_CTRL) & mask);
> +}
> +
> +#define POLL_MSEC 10
> +#define TIMEOUT_MSEC 1000
> +
> +static int tc2_pm_power_down_finish(unsigned int cpu, unsigned int cluster)
> +{
> +	unsigned tries;
> +
> +	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
> +	BUG_ON(cluster >= TC2_CLUSTERS || cpu >= TC2_MAX_CPUS_PER_CLUSTER);
> +
> +	for (tries = 0; tries < TIMEOUT_MSEC / POLL_MSEC; ++tries) {
> +		/*
> +		 * Only examine the hardware state if the target CPU has
> +		 * caught up at least as far as tc2_pm_down():
> +		 */
> +		if (ACCESS_ONCE(tc2_pm_use_count[cpu][cluster]) == 0) {
> +			pr_debug("%s(cpu=%u, cluster=%u): RESET_CTRL = 0x%08X\n",
> +				 __func__, cpu, cluster,
> +				 readl_relaxed(scc + RESET_CTRL));
> +
> +			/*
> +			 * We need the CPU to reach WFI, but the power
> +			 * controller may put the cluster in reset and
> +			 * power it off as soon as that happens, before
> +			 * we have a chance to see STANDBYWFI.
> +			 *
> +			 * So we need to check for both conditions:
> +			 */
> +			if (tc2_core_in_reset(cpu, cluster) ||
> +			    ve_spc_cpu_in_wfi(cpu, cluster))
> +				return 0; /* success: the CPU is halted */
> +		}
> +
> +		/* Otherwise, wait and retry: */
> +		msleep(POLL_MSEC);
> +	}
> +
> +	return -ETIMEDOUT; /* timeout */
> +}
> +
>  static void tc2_pm_suspend(u64 residency)
>  {
>  	unsigned int mpidr, cpu, cluster;
> @@ -232,10 +288,11 @@ static void tc2_pm_powered_up(void)
>  }
>  
>  static const struct mcpm_platform_ops tc2_pm_power_ops = {
> -	.power_up	= tc2_pm_power_up,
> -	.power_down	= tc2_pm_power_down,
> -	.suspend	= tc2_pm_suspend,
> -	.powered_up	= tc2_pm_powered_up,
> +	.power_up		= tc2_pm_power_up,
> +	.power_down		= tc2_pm_power_down,
> +	.power_down_finish	= tc2_pm_power_down_finish,
> +	.suspend		= tc2_pm_suspend,
> +	.powered_up		= tc2_pm_powered_up,
>  };
>  
>  static bool __init tc2_pm_usage_count_init(void)
> @@ -269,7 +326,6 @@ static void __naked tc2_pm_power_up_setup(unsigned int affinity_level)
>  static int __init tc2_pm_init(void)
>  {
>  	int ret, irq;
> -	void __iomem *scc;
>  	u32 a15_cluster_id, a7_cluster_id, sys_info;
>  	struct device_node *np;
>  
> -- 
> 1.7.9.5
>
Dave Martin Nov. 25, 2013, 5:09 p.m. UTC | #2
On Mon, Nov 25, 2013 at 11:44:45AM -0500, Nicolas Pitre wrote:
> On Mon, 25 Nov 2013, Dave Martin wrote:
> 
> > This patch implements the power_down_finish() method for TC2, to
> > enable the kernel to confirm when CPUs are safely powered down.
> > 
> > The information required for determining when a CPU is parked
> > cannot be obtained from any single place, so a few sources of
> > information must be combined:
> > 
> >   * mcpm_cpu_power_down() must be pending for the CPU, so that we
> >     don't get confused by false STANDBYWFI positives arising from
> >     CPUidle.  This is detected by waiting for the tc2_pm use count
> >     for the target CPU to reach 0.
> > 
> >   * Either the SPC must report that the CPU has asserted
> >     STANDBYWFI, or the TC2 tile's reset control logic must be
> >     holding the CPU in reset.
> > 
> >     Just checking for STANDBYWFI is not sufficient, because this
> >     signal is not latched when the the cluster is clamped off and
> >     powered down: the relevant status bits just drop to zero.  This
> >     means that STANDBYWFI status cannot be used for reliable
> >     detection of the last CPU in a cluster reaching WFI.
> > 
> > This patch is required in order for kexec to work with MCPM on TC2.
> > 
> > Signed-off-by: Dave Martin <Dave.Martin@arm.com>
> > Acked-by: Pawel Moll <pawel.moll@arm.com>
> 
> I thought I provided it before, but here it is again:
> 
> Reviewed-by: Nicolas Pitre <nico@linaro.org>

Hmm, I may have lost that somewhere ... apologies if so.

To avoid extra noise, I will leave it the committer to add this if they
want (unless I need to repost for some other reason).

> > Since there is now an interface break, it would be good if this patch
> > can be merged as a fix in the 3.13 cycle.  This code is specific to
> > vexpress TC2 and won't affect any other platform.
> 
> I agree to this as well.

Thanks
---Dave
Olof Johansson Nov. 25, 2013, 5:39 p.m. UTC | #3
Hi,

> Since v3.13-rc1, mcpm backends must implement an additional method which
> the vexpress TC2 implementation doesn't have upstream, resulting in an
> ugly WARN_ON() during hotplug and kexec operations

Please be more specific here. Which change in the 3.13 merge window caused this?


-Olof
Nicolas Pitre Nov. 25, 2013, 6:02 p.m. UTC | #4
On Mon, 25 Nov 2013, Olof Johansson wrote:

> Hi,
> 
> > Since v3.13-rc1, mcpm backends must implement an additional method which
> > the vexpress TC2 implementation doesn't have upstream, resulting in an
> > ugly WARN_ON() during hotplug and kexec operations
> 
> Please be more specific here. Which change in the 3.13 merge window caused this?

Commit 0de0d6467525.

The corresponding change to tc2-pm.c was initially posted at about the 
same time i.e. before the merge window, but it somehow fell into a 
crack.


Nicolas
Olof Johansson Nov. 25, 2013, 6:26 p.m. UTC | #5
On Mon, Nov 25, 2013 at 10:02 AM, Nicolas Pitre
<nicolas.pitre@linaro.org> wrote:
> On Mon, 25 Nov 2013, Olof Johansson wrote:
>
>> Hi,
>>
>> > Since v3.13-rc1, mcpm backends must implement an additional method which
>> > the vexpress TC2 implementation doesn't have upstream, resulting in an
>> > ugly WARN_ON() during hotplug and kexec operations
>>
>> Please be more specific here. Which change in the 3.13 merge window caused this?
>
> Commit 0de0d6467525.
>
> The corresponding change to tc2-pm.c was initially posted at about the
> same time i.e. before the merge window, but it somehow fell into a
> crack.

Thanks, I'll add that reference to the patch description when I apply.

Looks like the MCPM-side patch was posted already back in Oct 1. I'm
not going to flame anyone over this, but it'd be nice if we could find
out about these things sooner, ideally when breakage hits -next. Dave,
next time I think I'd prefer to hear about it even if it is during the
merge window. :-)

Anyway, obviously it's needed and I'll apply it.


-Olof
Dave Martin Nov. 25, 2013, 6:59 p.m. UTC | #6
On Mon, Nov 25, 2013 at 06:26:46PM +0000, Olof Johansson wrote:
> On Mon, Nov 25, 2013 at 10:02 AM, Nicolas Pitre
> <nicolas.pitre@linaro.org> wrote:
> > On Mon, 25 Nov 2013, Olof Johansson wrote:
> >
> >> Hi,
> >>
> >> > Since v3.13-rc1, mcpm backends must implement an additional method which
> >> > the vexpress TC2 implementation doesn't have upstream, resulting in an
> >> > ugly WARN_ON() during hotplug and kexec operations
> >>
> >> Please be more specific here. Which change in the 3.13 merge window caused this?
> >
> > Commit 0de0d6467525.
> >
> > The corresponding change to tc2-pm.c was initially posted at about the
> > same time i.e. before the merge window, but it somehow fell into a
> > crack.
> 
> Thanks, I'll add that reference to the patch description when I apply.

The issue here was that the two patches were developed together and to
some extent validate each other, but one is an arch change and the other
is not.  Expecting Russell to accept random board changes through his
tree seemed a bit cheeky at this point, but I was slow sorting this out
before the merge window opened.

Is there a good way to accelerate synchronisation of dependent patches
that must be merged through different trees?  It seemed most reliable to
wait for the mcpm patch to appear in Russell's public tree before posting
the dependent TC2 patch for merging -- the intent was to avoid grief for
maintainers, but this can backfire by causing delays.

If you can handle patches with a dependency that is in-flight via another
maintainer, then I'm happy to send such patches earlier in future, before
the dependency lands (with details about where to watch for it,
obviously).

> Looks like the MCPM-side patch was posted already back in Oct 1. I'm
> not going to flame anyone over this, but it'd be nice if we could find
> out about these things sooner, ideally when breakage hits -next. Dave,
> next time I think I'd prefer to hear about it even if it is during the
> merge window. :-)

OK, noted.  Different people's opinions differ on that sometimes, but
I'll make sure you get the heads-up if a similar situation arises in
the future.

> Anyway, obviously it's needed and I'll apply it.

Thanks.  The kernel "works" without it in practice, though theoretically
it's not 100% safe, and I want to set the right example.

I'll try harder to avoid this kind of huccup another time.

Cheers
---Dave
Olof Johansson Nov. 25, 2013, 10:11 p.m. UTC | #7
On Mon, Nov 25, 2013 at 06:59:25PM +0000, Dave P Martin wrote:
> On Mon, Nov 25, 2013 at 06:26:46PM +0000, Olof Johansson wrote:
> > On Mon, Nov 25, 2013 at 10:02 AM, Nicolas Pitre
> > <nicolas.pitre@linaro.org> wrote:
> > > On Mon, 25 Nov 2013, Olof Johansson wrote:
> > >
> > >> Hi,
> > >>
> > >> > Since v3.13-rc1, mcpm backends must implement an additional method which
> > >> > the vexpress TC2 implementation doesn't have upstream, resulting in an
> > >> > ugly WARN_ON() during hotplug and kexec operations
> > >>
> > >> Please be more specific here. Which change in the 3.13 merge window caused this?
> > >
> > > Commit 0de0d6467525.
> > >
> > > The corresponding change to tc2-pm.c was initially posted at about the
> > > same time i.e. before the merge window, but it somehow fell into a
> > > crack.
> > 
> > Thanks, I'll add that reference to the patch description when I apply.
> 
> The issue here was that the two patches were developed together and to
> some extent validate each other, but one is an arch change and the other
> is not.  Expecting Russell to accept random board changes through his
> tree seemed a bit cheeky at this point, but I was slow sorting this out
> before the merge window opened.
>
> Is there a good way to accelerate synchronisation of dependent patches
> that must be merged through different trees?  It seemed most reliable to
> wait for the mcpm patch to appear in Russell's public tree before posting
> the dependent TC2 patch for merging -- the intent was to avoid grief for
> maintainers, but this can backfire by causing delays.

I think it might be easier to work on git branches in those cases, since it's
easy for us to share a pull request from you guys (and we'd do the vexpress
piece on top). Without that it becomes more work for Russell to apply the patch
on a stable branch himself, us finding out what the branch is, etc.

> If you can handle patches with a dependency that is in-flight via another
> maintainer, then I'm happy to send such patches earlier in future, before
> the dependency lands (with details about where to watch for it,
> obviously).

I think doing a shared branch is by far the easiest way to do this. We want to
avoid pulling in someone elses "main" development branch and keep shared code
to a minimum, since it reduces the merge order dependencies upstream (not
technical merge order, but to avoid us merging the feature from the other
tree).

> > Looks like the MCPM-side patch was posted already back in Oct 1. I'm
> > not going to flame anyone over this, but it'd be nice if we could find
> > out about these things sooner, ideally when breakage hits -next. Dave,
> > next time I think I'd prefer to hear about it even if it is during the
> > merge window. :-)
> 
> OK, noted.  Different people's opinions differ on that sometimes, but
> I'll make sure you get the heads-up if a similar situation arises in
> the future.

Sure. I think a heads up is always OK, btw -- it's just that some maintainers
might choose not to act on it during the merge window.

> > Anyway, obviously it's needed and I'll apply it.
> 
> Thanks.  The kernel "works" without it in practice, though theoretically
> it's not 100% safe, and I want to set the right example.
> 
> I'll try harder to avoid this kind of huccup another time.

No worries. MCPM+TC2 is hairy since half of it turns out to be core code, half
board code.


-Olof
Dave Martin Nov. 26, 2013, 11:11 a.m. UTC | #8
On Mon, Nov 25, 2013 at 10:11:54PM +0000, Olof Johansson wrote:
> On Mon, Nov 25, 2013 at 06:59:25PM +0000, Dave P Martin wrote:

[...]

> > Is there a good way to accelerate synchronisation of dependent patches
> > that must be merged through different trees?  It seemed most reliable to
> > wait for the mcpm patch to appear in Russell's public tree before posting
> > the dependent TC2 patch for merging -- the intent was to avoid grief for
> > maintainers, but this can backfire by causing delays.
> 
> I think it might be easier to work on git branches in those cases, since it's
> easy for us to share a pull request from you guys (and we'd do the vexpress
> piece on top). Without that it becomes more work for Russell to apply the patch
> on a stable branch himself, us finding out what the branch is, etc.
> 
> > If you can handle patches with a dependency that is in-flight via another
> > maintainer, then I'm happy to send such patches earlier in future, before
> > the dependency lands (with details about where to watch for it,
> > obviously).
> 
> I think doing a shared branch is by far the easiest way to do this. We want to
> avoid pulling in someone elses "main" development branch and keep shared code
> to a minimum, since it reduces the merge order dependencies upstream (not
> technical merge order, but to avoid us merging the feature from the other
> tree).

OK, I'll try and get something set up.

> > > Looks like the MCPM-side patch was posted already back in Oct 1. I'm
> > > not going to flame anyone over this, but it'd be nice if we could find
> > > out about these things sooner, ideally when breakage hits -next. Dave,
> > > next time I think I'd prefer to hear about it even if it is during the
> > > merge window. :-)
> > 
> > OK, noted.  Different people's opinions differ on that sometimes, but
> > I'll make sure you get the heads-up if a similar situation arises in
> > the future.
> 
> Sure. I think a heads up is always OK, btw -- it's just that some maintainers
> might choose not to act on it during the merge window.

Understood.

> > > Anyway, obviously it's needed and I'll apply it.
> > 
> > Thanks.  The kernel "works" without it in practice, though theoretically
> > it's not 100% safe, and I want to set the right example.
> > 
> > I'll try harder to avoid this kind of huccup another time.
> 
> No worries. MCPM+TC2 is hairy since half of it turns out to be core code, half
> board code.

I think this will always tend to happen for the "lead adopter" of a core
framework change?

Cheers
---Dave
diff mbox

Patch

diff --git a/arch/arm/mach-vexpress/spc.c b/arch/arm/mach-vexpress/spc.c
index 033d34d..c26ef5b 100644
--- a/arch/arm/mach-vexpress/spc.c
+++ b/arch/arm/mach-vexpress/spc.c
@@ -53,6 +53,11 @@ 
 #define A15_BX_ADDR0		0x68
 #define A7_BX_ADDR0		0x78
 
+/* SPC CPU/cluster reset statue */
+#define STANDBYWFI_STAT		0x3c
+#define STANDBYWFI_STAT_A15_CPU_MASK(cpu)	(1 << (cpu))
+#define STANDBYWFI_STAT_A7_CPU_MASK(cpu)	(1 << (3 + (cpu)))
+
 /* SPC system config interface registers */
 #define SYSCFG_WDATA		0x70
 #define SYSCFG_RDATA		0x74
@@ -213,6 +218,41 @@  void ve_spc_powerdown(u32 cluster, bool enable)
 	writel_relaxed(enable, info->baseaddr + pwdrn_reg);
 }
 
+static u32 standbywfi_cpu_mask(u32 cpu, u32 cluster)
+{
+	return cluster_is_a15(cluster) ?
+		  STANDBYWFI_STAT_A15_CPU_MASK(cpu)
+		: STANDBYWFI_STAT_A7_CPU_MASK(cpu);
+}
+
+/**
+ * ve_spc_cpu_in_wfi(u32 cpu, u32 cluster)
+ *
+ * @cpu: mpidr[7:0] bitfield describing CPU affinity level within cluster
+ * @cluster: mpidr[15:8] bitfield describing cluster affinity level
+ *
+ * @return: non-zero if and only if the specified CPU is in WFI
+ *
+ * Take care when interpreting the result of this function: a CPU might
+ * be in WFI temporarily due to idle, and is not necessarily safely
+ * parked.
+ */
+int ve_spc_cpu_in_wfi(u32 cpu, u32 cluster)
+{
+	int ret;
+	u32 mask = standbywfi_cpu_mask(cpu, cluster);
+
+	if (cluster >= MAX_CLUSTERS)
+		return 1;
+
+	ret = readl_relaxed(info->baseaddr + STANDBYWFI_STAT);
+
+	pr_debug("%s: PCFGREG[0x%X] = 0x%08X, mask = 0x%X\n",
+		 __func__, STANDBYWFI_STAT, ret, mask);
+
+	return ret & mask;
+}
+
 static int ve_spc_get_performance(int cluster, u32 *freq)
 {
 	struct ve_spc_opp *opps = info->opps[cluster];
diff --git a/arch/arm/mach-vexpress/spc.h b/arch/arm/mach-vexpress/spc.h
index dbd44c3..793d065 100644
--- a/arch/arm/mach-vexpress/spc.h
+++ b/arch/arm/mach-vexpress/spc.h
@@ -20,5 +20,6 @@  void ve_spc_global_wakeup_irq(bool set);
 void ve_spc_cpu_wakeup_irq(u32 cluster, u32 cpu, bool set);
 void ve_spc_set_resume_addr(u32 cluster, u32 cpu, u32 addr);
 void ve_spc_powerdown(u32 cluster, bool enable);
+int ve_spc_cpu_in_wfi(u32 cpu, u32 cluster);
 
 #endif
diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c
index 05a364c..29e7785 100644
--- a/arch/arm/mach-vexpress/tc2_pm.c
+++ b/arch/arm/mach-vexpress/tc2_pm.c
@@ -12,6 +12,7 @@ 
  * published by the Free Software Foundation.
  */
 
+#include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
@@ -32,11 +33,17 @@ 
 #include "spc.h"
 
 /* SCC conf registers */
+#define RESET_CTRL		0x018
+#define RESET_A15_NCORERESET(cpu)	(1 << (2 + (cpu)))
+#define RESET_A7_NCORERESET(cpu)	(1 << (16 + (cpu)))
+
 #define A15_CONF		0x400
 #define A7_CONF			0x500
 #define SYS_INFO		0x700
 #define SPC_BASE		0xb00
 
+static void __iomem *scc;
+
 /*
  * We can't use regular spinlocks. In the switcher case, it is possible
  * for an outbound CPU to call power_down() after its inbound counterpart
@@ -190,6 +197,55 @@  static void tc2_pm_power_down(void)
 	tc2_pm_down(0);
 }
 
+static int tc2_core_in_reset(unsigned int cpu, unsigned int cluster)
+{
+	u32 mask = cluster ?
+		  RESET_A7_NCORERESET(cpu)
+		: RESET_A15_NCORERESET(cpu);
+
+	return !(readl_relaxed(scc + RESET_CTRL) & mask);
+}
+
+#define POLL_MSEC 10
+#define TIMEOUT_MSEC 1000
+
+static int tc2_pm_power_down_finish(unsigned int cpu, unsigned int cluster)
+{
+	unsigned tries;
+
+	pr_debug("%s: cpu %u cluster %u\n", __func__, cpu, cluster);
+	BUG_ON(cluster >= TC2_CLUSTERS || cpu >= TC2_MAX_CPUS_PER_CLUSTER);
+
+	for (tries = 0; tries < TIMEOUT_MSEC / POLL_MSEC; ++tries) {
+		/*
+		 * Only examine the hardware state if the target CPU has
+		 * caught up at least as far as tc2_pm_down():
+		 */
+		if (ACCESS_ONCE(tc2_pm_use_count[cpu][cluster]) == 0) {
+			pr_debug("%s(cpu=%u, cluster=%u): RESET_CTRL = 0x%08X\n",
+				 __func__, cpu, cluster,
+				 readl_relaxed(scc + RESET_CTRL));
+
+			/*
+			 * We need the CPU to reach WFI, but the power
+			 * controller may put the cluster in reset and
+			 * power it off as soon as that happens, before
+			 * we have a chance to see STANDBYWFI.
+			 *
+			 * So we need to check for both conditions:
+			 */
+			if (tc2_core_in_reset(cpu, cluster) ||
+			    ve_spc_cpu_in_wfi(cpu, cluster))
+				return 0; /* success: the CPU is halted */
+		}
+
+		/* Otherwise, wait and retry: */
+		msleep(POLL_MSEC);
+	}
+
+	return -ETIMEDOUT; /* timeout */
+}
+
 static void tc2_pm_suspend(u64 residency)
 {
 	unsigned int mpidr, cpu, cluster;
@@ -232,10 +288,11 @@  static void tc2_pm_powered_up(void)
 }
 
 static const struct mcpm_platform_ops tc2_pm_power_ops = {
-	.power_up	= tc2_pm_power_up,
-	.power_down	= tc2_pm_power_down,
-	.suspend	= tc2_pm_suspend,
-	.powered_up	= tc2_pm_powered_up,
+	.power_up		= tc2_pm_power_up,
+	.power_down		= tc2_pm_power_down,
+	.power_down_finish	= tc2_pm_power_down_finish,
+	.suspend		= tc2_pm_suspend,
+	.powered_up		= tc2_pm_powered_up,
 };
 
 static bool __init tc2_pm_usage_count_init(void)
@@ -269,7 +326,6 @@  static void __naked tc2_pm_power_up_setup(unsigned int affinity_level)
 static int __init tc2_pm_init(void)
 {
 	int ret, irq;
-	void __iomem *scc;
 	u32 a15_cluster_id, a7_cluster_id, sys_info;
 	struct device_node *np;