diff mbox

[v2,1/2] ARM: shmobile: r8a7740: Add Suspend-To-RAM A3SM

Message ID 1365689264-14410-1-git-send-email-hechtb+renesas@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Bastian Hecht April 11, 2013, 2:07 p.m. UTC
We add 2 Suspend to RAM modes:
- A3SM PLL0 on/off:     Power domain A3SM that contains the ARM core
                        and the 2nd level cache with either PLL0 on
                        or off

As the suspend to memory mechanism we use A3SM PLL off. A3SM PLL on
is included here too, so CPUIdle can use both power down modes (not
included in this patch).

The setup of the SYSC regarding the external IRQs is taken from
pm-sh7372.c from Magnus Damm.

Signed-off-by: Bastian Hecht <hechtb+renesas@gmail.com>
---
This patch relies on
ARM: hw_breakpoint: Do not use __cpuinitdata for dbg_cpu_pm_nb

v2: successor to [PATCH] ARM: shmobile: r8a7740: Add Suspend-To-RAM modes and CPUIdle

- Removed the 2nd L1 flush in the asm shutdown code. See comment in code for explanation.
- Reworked the ifdefs: We can only use CPUIdle if SUSPEND is set as well.
- Removed cpu_do_idle() in r8a7740_enter_suspend: We don't want to sleep twice...

If you want to test this use this workaround in the st1232 driver in
drivers/input/touchscreen/st1232.c:

        error = request_threaded_irq(client->irq, NULL, st1232_ts_irq_handler,
-                                    IRQF_ONESHOT, client->name, ts);
+                                    IRQF_NO_SUSPEND | IRQF_ONESHOT, client->name, ts);

You need it as the current irqpin driver doesn't handle wakeup devices properly yet.

Tested with the 4 possiblities 	L2 on - Suspend on/off - CPUIdle on/off
and with the additional test	L2 off - Suspend on - CPUIdle on

 arch/arm/mach-shmobile/Makefile               |    2 +-
 arch/arm/mach-shmobile/include/mach/r8a7740.h |    3 +
 arch/arm/mach-shmobile/pm-r8a7740.c           |  167 ++++++++++++++++++++++++-
 arch/arm/mach-shmobile/sleep-r8a7740.S        |   72 +++++++++++
 4 files changed, 240 insertions(+), 4 deletions(-)
 create mode 100644 arch/arm/mach-shmobile/sleep-r8a7740.S

Comments

Simon Horman April 12, 2013, 12:56 a.m. UTC | #1
On Thu, Apr 11, 2013 at 04:07:43PM +0200, Bastian Hecht wrote:
> We add 2 Suspend to RAM modes:
> - A3SM PLL0 on/off:     Power domain A3SM that contains the ARM core
>                         and the 2nd level cache with either PLL0 on
>                         or off
> 
> As the suspend to memory mechanism we use A3SM PLL off. A3SM PLL on
> is included here too, so CPUIdle can use both power down modes (not
> included in this patch).
> 
> The setup of the SYSC regarding the external IRQs is taken from
> pm-sh7372.c from Magnus Damm.
> 
> Signed-off-by: Bastian Hecht <hechtb+renesas@gmail.com>
> ---
> This patch relies on
> ARM: hw_breakpoint: Do not use __cpuinitdata for dbg_cpu_pm_nb
> 
> v2: successor to [PATCH] ARM: shmobile: r8a7740: Add Suspend-To-RAM modes and CPUIdle
> 
> - Removed the 2nd L1 flush in the asm shutdown code. See comment in code for explanation.
> - Reworked the ifdefs: We can only use CPUIdle if SUSPEND is set as well.
> - Removed cpu_do_idle() in r8a7740_enter_suspend: We don't want to sleep twice...
> 
> If you want to test this use this workaround in the st1232 driver in
> drivers/input/touchscreen/st1232.c:
> 
>         error = request_threaded_irq(client->irq, NULL, st1232_ts_irq_handler,
> -                                    IRQF_ONESHOT, client->name, ts);
> +                                    IRQF_NO_SUSPEND | IRQF_ONESHOT, client->name, ts);
> 
> You need it as the current irqpin driver doesn't handle wakeup devices properly yet.
> 
> Tested with the 4 possiblities 	L2 on - Suspend on/off - CPUIdle on/off
> and with the additional test	L2 off - Suspend on - CPUIdle on

Hi Bastian,

are the pre-requisites listed above likely to be in v3.11-rc1 or rc2?

>  arch/arm/mach-shmobile/Makefile               |    2 +-
>  arch/arm/mach-shmobile/include/mach/r8a7740.h |    3 +
>  arch/arm/mach-shmobile/pm-r8a7740.c           |  167 ++++++++++++++++++++++++-
>  arch/arm/mach-shmobile/sleep-r8a7740.S        |   72 +++++++++++
>  4 files changed, 240 insertions(+), 4 deletions(-)
>  create mode 100644 arch/arm/mach-shmobile/sleep-r8a7740.S
> 
> diff --git a/arch/arm/mach-shmobile/Makefile b/arch/arm/mach-shmobile/Makefile
> index 068f1da..0568894 100644
> --- a/arch/arm/mach-shmobile/Makefile
> +++ b/arch/arm/mach-shmobile/Makefile
> @@ -30,7 +30,7 @@ obj-$(CONFIG_SUSPEND)		+= suspend.o
>  obj-$(CONFIG_CPU_IDLE)		+= cpuidle.o
>  obj-$(CONFIG_ARCH_SHMOBILE)	+= pm-rmobile.o
>  obj-$(CONFIG_ARCH_SH7372)	+= pm-sh7372.o sleep-sh7372.o
> -obj-$(CONFIG_ARCH_R8A7740)	+= pm-r8a7740.o
> +obj-$(CONFIG_ARCH_R8A7740)	+= pm-r8a7740.o sleep-r8a7740.o
>  obj-$(CONFIG_ARCH_R8A7779)	+= pm-r8a7779.o
>  obj-$(CONFIG_ARCH_SH73A0)	+= pm-sh73a0.o
>  
> diff --git a/arch/arm/mach-shmobile/include/mach/r8a7740.h b/arch/arm/mach-shmobile/include/mach/r8a7740.h
> index abdc4d4..05551ee 100644
> --- a/arch/arm/mach-shmobile/include/mach/r8a7740.h
> +++ b/arch/arm/mach-shmobile/include/mach/r8a7740.h
> @@ -540,6 +540,9 @@ extern void r8a7740_add_standard_devices(void);
>  extern void r8a7740_clock_init(u8 md_ck);
>  extern void r8a7740_pinmux_init(void);
>  extern void r8a7740_pm_init(void);
> +extern void r8a7740_resume(void);
> +extern void r8a7740_shutdown(void);
> +extern void r8a7740_enter_a3sm_common(int);
>  
>  #ifdef CONFIG_PM
>  extern void __init r8a7740_init_pm_domains(void);
> diff --git a/arch/arm/mach-shmobile/pm-r8a7740.c b/arch/arm/mach-shmobile/pm-r8a7740.c
> index 40b87aa..adadac4 100644
> --- a/arch/arm/mach-shmobile/pm-r8a7740.c
> +++ b/arch/arm/mach-shmobile/pm-r8a7740.c
> @@ -8,10 +8,52 @@
>   * License.  See the file "COPYING" in the main directory of this archive
>   * for more details.
>   */
> +#include <linux/bitrev.h>
>  #include <linux/console.h>
> +#include <linux/module.h>
>  #include <linux/suspend.h>
> +#include <linux/io.h>
> +#include <asm/suspend.h>
> +#include <asm/cacheflush.h>
> +#include <asm/hardware/cache-l2x0.h>
>  #include <mach/pm-rmobile.h>
>  #include <mach/common.h>
> +#include <mach/r8a7740.h>
> +
> +/* CPGA */
> +#define PLLC01STPCR 	IOMEM(0xe61500c8)
> +#define SYSTBCR		IOMEM(0xe6150024)
> +
> +/* SYSC */
> +#define STBCHR		IOMEM(0xe6180000)
> +#define STBCHRB		IOMEM(0xe6180040)
> +#define SPDCR		IOMEM(0xe6180008)
> +#define SBAR		IOMEM(0xe6180020)
> +#define SRSTFR		IOMEM(0xe61800B4)
> +#define WUPSMSK		IOMEM(0xe618002c)
> +#define WUPSMSK2	IOMEM(0xe6180048)
> +#define WUPSFAC		IOMEM(0xe6180098)
> +#define IRQCR 		IOMEM(0xe618022c)
> +#define IRQCR2 		IOMEM(0xe6180238)
> +#define IRQCR3		IOMEM(0xe6180244)
> +#define IRQCR4		IOMEM(0xe6180248)
> +
> +/* SRSTFR flags */
> +#define RAMST		(1 << 19)
> +#define RCLNKA		(1 << 7)
> +#define RCPRES		(1 << 5)
> +#define RCWD1		(1 << 4)
> +#define RPF		(1 << 0)
> +
> +/* INTC */
> +#define ICR1A		IOMEM(0xe6900000)
> +#define ICR2A		IOMEM(0xe6900004)
> +#define ICR3A		IOMEM(0xe6900008)
> +#define ICR4A		IOMEM(0xe690000c)
> +#define INTMSK00A	IOMEM(0xe6900040)
> +#define INTMSK10A	IOMEM(0xe6900044)
> +#define INTMSK20A	IOMEM(0xe6900048)
> +#define INTMSK30A	IOMEM(0xe690004c)
>  
>  #ifdef CONFIG_PM
>  static int r8a7740_pd_a4s_suspend(void)
> @@ -58,13 +100,132 @@ void __init r8a7740_init_pm_domains(void)
>  	rmobile_init_domains(r8a7740_pm_domains, ARRAY_SIZE(r8a7740_pm_domains));
>  	pm_genpd_add_subdomain_names("A4S", "A3SP");
>  }
> -
>  #endif /* CONFIG_PM */
>  
>  #ifdef CONFIG_SUSPEND
> +static void r8a7740_set_reset_vector(unsigned long address)
> +{
> +	__raw_writel(address, SBAR);
> +}
> +
> +static void r8a7740_icr_to_irqcr(unsigned long icr, u16 *irqcr1p, u16 *irqcr2p)
> +{
> +	u16 tmp, irqcr1, irqcr2;
> +	int k;
> +
> +	irqcr1 = 0;
> +	irqcr2 = 0;
> +
> +	/* convert INTCA ICR register layout to SYSC IRQCR+IRQCR2 */
> +	for (k = 0; k <= 7; k++) {
> +		tmp = (icr >> ((7 - k) * 4)) & 0xf;
> +		irqcr1 |= (tmp & 0x03) << (k * 2);
> +		irqcr2 |= (tmp >> 2) << (k * 2);
> +	}
> +
> +	*irqcr1p = irqcr1;
> +	*irqcr2p = irqcr2;
> +}
> +
> +static void r8a7740_setup_sysc(unsigned long msk, unsigned long msk2)
> +{
> +	u16 irqcrx_low, irqcrx_high, irqcry_low, irqcry_high;
> +	unsigned long tmp;
> +
> +	/* read IRQ0A -> IRQ15A mask */
> +	tmp = bitrev8(__raw_readb(INTMSK00A));
> +	tmp |= bitrev8(__raw_readb(INTMSK10A)) << 8;
> +
> +	/* setup WUPSMSK from clocks and external IRQ mask */
> +	msk = (~msk & 0xc030000f) | (tmp << 4);
> +	__raw_writel(msk, WUPSMSK);
> +
> +	/* propage level/edge trigger for external IRQ 0->15 */
> +	r8a7740_icr_to_irqcr(__raw_readl(ICR1A), &irqcrx_low, &irqcry_low);
> +	r8a7740_icr_to_irqcr(__raw_readl(ICR2A), &irqcrx_high, &irqcry_high);
> +	__raw_writel((irqcrx_high << 16) | irqcrx_low, IRQCR);
> +	__raw_writel((irqcry_high << 16) | irqcry_low, IRQCR2);
> +
> +	/* read IRQ16A -> IRQ31A mask */
> +	tmp = bitrev8(__raw_readb(INTMSK20A));
> +	tmp |= bitrev8(__raw_readb(INTMSK30A)) << 8;
> +
> +	/* setup WUPSMSK2 from clocks and external IRQ mask */
> +	msk2 = (~msk2 & 0x00030000) | tmp;
> +	__raw_writel(msk2, WUPSMSK2);
> +
> +	/* propage level/edge trigger for external IRQ 16->31 */
> +	r8a7740_icr_to_irqcr(__raw_readl(ICR3A), &irqcrx_low, &irqcry_low);
> +	r8a7740_icr_to_irqcr(__raw_readl(ICR4A), &irqcrx_high, &irqcry_high);
> +	__raw_writel((irqcrx_high << 16) | irqcrx_low, IRQCR3);
> +	__raw_writel((irqcry_high << 16) | irqcry_low, IRQCR4);
> +}
> +
> +static void r8a7740_prepare_wakeup(void)
> +{
> +	/* clear all flags that lead to a cold boot */
> +	__raw_writel(~(RAMST | RCLNKA | RCPRES | RCWD1 | RPF), SRSTFR);
> +	/* indicate warm boot */
> +	__raw_writel(0x80000000, STBCHRB);
> +	/* clear other flags checked by internal ROM boot loader */
> +	__raw_writel(0x00000000, STBCHR);
> +}
> +
> +static int r8a7740_do_suspend(unsigned long unused)
> +{
> +	/*
> +	 * cpu_suspend() guarantees that all data made it to the L2.
> +	 * Flush it out now and disable the cache controller.
> +	 */
> +	outer_flush_all();
> +	outer_disable();
> +
> +	r8a7740_shutdown();
> +
> +	/* in case WFI fails to enter the low power state, restore things */
> +	outer_resume();
> +
> +	return 0;
> +}
> +
> +void r8a7740_enter_a3sm_common(int pllc0_on)
> +{
> +	u32 reg32;
> +
> +	if (pllc0_on)
> +		__raw_writel(0, PLLC01STPCR);
> +	else
> +		__raw_writel(1 << 28, PLLC01STPCR);
> +
> +	r8a7740_set_reset_vector(__pa(r8a7740_resume));
> +	r8a7740_prepare_wakeup();
> +	r8a7740_setup_sysc(1 << 0, 0);
> +
> +	/* Activate delayed shutdown of A3SM */
> +	reg32 = __raw_readl(SPDCR);
> +	reg32 |= (1 << 31) | (1 << 12);
> +	__raw_writel(reg32, SPDCR);
> +
> +	/* We activate CPU Core Standby as well here */
> +	reg32 = __raw_readl(SYSTBCR);
> +	reg32 |= (1 << 4);
> +	__raw_writel(reg32, SYSTBCR);
> +
> +	/* Clear Wakeup Factors and do suspend */
> +	reg32 = __raw_readl(WUPSFAC);
> +	cpu_suspend(0, r8a7740_do_suspend);
> +	outer_resume();
> +	reg32 = __raw_readl(WUPSFAC);
> +
> +	/* Clear CPU Core Standby flag for other WFI instructions */
> +	reg32 &= ~(1 << 4);
> +	__raw_writel(reg32, SYSTBCR);
> +
> +}
> +
>  static int r8a7740_enter_suspend(suspend_state_t suspend_state)
>  {
> -	cpu_do_idle();
> +	r8a7740_enter_a3sm_common(0);
>  	return 0;
>  }
>  
> @@ -74,7 +235,7 @@ static void r8a7740_suspend_init(void)
>  }
>  #else
>  static void r8a7740_suspend_init(void) {}
> -#endif
> +#endif /* CONFIG_SUSPEND */
>  
>  void __init r8a7740_pm_init(void)
>  {
> diff --git a/arch/arm/mach-shmobile/sleep-r8a7740.S b/arch/arm/mach-shmobile/sleep-r8a7740.S
> new file mode 100644
> index 0000000..762f978
> --- /dev/null
> +++ b/arch/arm/mach-shmobile/sleep-r8a7740.S
> @@ -0,0 +1,72 @@
> +/*
> + * Low level sleep code for the SoC r8a7740
> + *
> + * Copyright (C) 2013 Bastian Hecht
> + *
> + * This file is subject to the terms and conditions of the GNU General Public
> + * License.  See the file "COPYING" in the main directory of this archive
> + * for more details.
> + */
> +
> +#include <linux/linkage.h>
> +#include <linux/init.h>
> +#include <asm/memory.h>
> +
> +#ifdef CONFIG_SUSPEND
> +
> +/* r8a7740_shutdown expects L2 to be flushed */
> +	.text
> +ENTRY(r8a7740_shutdown)
> +	stmfd   sp!, {r4-r12, lr}
> +
> +	/* make sure the stack stays intact */
> +	bl      v7_flush_dcache_all
> +
> +	/*
> +	 * Clear the SCTLR.C bit to prevent further data cache
> +	 * allocation. Clearing SCTLR.C would make all the data accesses
> +	 * strongly ordered and would not hit the cache.
> +	 */
> +	mrc	p15, 0, r0, c1, c0, 0
> +	bic	r0, r0, #(1 << 2)		@ Disable the C bit
> +	mcr	p15, 0, r0, c1, c0, 0
> +	isb
> +
> +	/*
> +	 * We don't issue another v7_flush_dcache_all here as seen in many
> +	 * other places as we have a UP core and the L1 could not soak up
> +	 * data from other L1 caches in the meantime.
> +	 */
> +
> +	bl	cpu_v7_do_idle
> +
> +	/* in rare cases when WFI fails we end up here and restore things */
> +	mrc	p15, 0, r0, c1, c0, 0
> +	orr	r0, r0, #(1 << 2)		@ Enable the C bit
> +	mcr	p15, 0, r0, c1, c0, 0
> +	isb
> +
> +	ldmfd   sp!, {r4-r12, pc}
> +ENDPROC(r8a7740)
> +
> +	.text
> +ENTRY(v7_cpu_resume)
> +	bl	v7_invalidate_l1
> +	b	cpu_resume
> +ENDPROC(v7_cpu_resume)
> +
> +/*
> + * The entry point of a warm reboot, used by wakeup scenarios
> + *
> + * The CPU jumps in this case to (0xfffff000 & SBAR), so we need
> + * to align this function properly.
> + * We use a long jump into the text segment and use the physical
> + * address as the MMU is still turned off.
> + */
> +	.align	12
> +	.text
> +ENTRY(r8a7740_resume)
> +	ldr	pc, 1f
> +1:	.long	v7_cpu_resume - PAGE_OFFSET + PLAT_PHYS_OFFSET
> +ENDPROC(r8a7740_resume_core_standby)
> +#endif
> -- 
> 1.7.9.5
>
Bastian Hecht April 12, 2013, 12:54 p.m. UTC | #2
Hi Simon,

2013/4/12 Simon Horman <horms@verge.net.au>:
> On Thu, Apr 11, 2013 at 04:07:43PM +0200, Bastian Hecht wrote:
>> We add 2 Suspend to RAM modes:
>> - A3SM PLL0 on/off:     Power domain A3SM that contains the ARM core
>>                         and the 2nd level cache with either PLL0 on
>>                         or off
>>
>> As the suspend to memory mechanism we use A3SM PLL off. A3SM PLL on
>> is included here too, so CPUIdle can use both power down modes (not
>> included in this patch).
>>
>> The setup of the SYSC regarding the external IRQs is taken from
>> pm-sh7372.c from Magnus Damm.
>>
>> Signed-off-by: Bastian Hecht <hechtb+renesas@gmail.com>
>> ---
>> This patch relies on
>> ARM: hw_breakpoint: Do not use __cpuinitdata for dbg_cpu_pm_nb
>>
>> v2: successor to [PATCH] ARM: shmobile: r8a7740: Add Suspend-To-RAM modes and CPUIdle
>>
>> - Removed the 2nd L1 flush in the asm shutdown code. See comment in code for explanation.
>> - Reworked the ifdefs: We can only use CPUIdle if SUSPEND is set as well.
>> - Removed cpu_do_idle() in r8a7740_enter_suspend: We don't want to sleep twice...
>>
>> If you want to test this use this workaround in the st1232 driver in
>> drivers/input/touchscreen/st1232.c:
>>
>>         error = request_threaded_irq(client->irq, NULL, st1232_ts_irq_handler,
>> -                                    IRQF_ONESHOT, client->name, ts);
>> +                                    IRQF_NO_SUSPEND | IRQF_ONESHOT, client->name, ts);
>>
>> You need it as the current irqpin driver doesn't handle wakeup devices properly yet.
>>
>> Tested with the 4 possiblities        L2 on - Suspend on/off - CPUIdle on/off
>> and with the additional test  L2 off - Suspend on - CPUIdle on
>
> Hi Bastian,
>
> are the pre-requisites listed above likely to be in v3.11-rc1 or rc2?

I probably was a bit incorrect by saying "relies on". Right now the
complete suspend system is broken on ARM UP systems. So I think we can
use this patch without coordination regarding the other patch. It's
just when you want to test this here, then you need the above
mentioned patch to fix the system first before you can do anything.

Cheers,

 Bastian
Lorenzo Pieralisi April 12, 2013, 2:26 p.m. UTC | #3
On Thu, Apr 11, 2013 at 03:07:43PM +0100, Bastian Hecht wrote:

[...]

> diff --git a/arch/arm/mach-shmobile/sleep-r8a7740.S b/arch/arm/mach-shmobile/sleep-r8a7740.S
> new file mode 100644
> index 0000000..762f978
> --- /dev/null
> +++ b/arch/arm/mach-shmobile/sleep-r8a7740.S
> @@ -0,0 +1,72 @@
> +/*
> + * Low level sleep code for the SoC r8a7740
> + *
> + * Copyright (C) 2013 Bastian Hecht
> + *
> + * This file is subject to the terms and conditions of the GNU General Public
> + * License.  See the file "COPYING" in the main directory of this archive
> + * for more details.
> + */
> +
> +#include <linux/linkage.h>
> +#include <linux/init.h>
> +#include <asm/memory.h>
> +
> +#ifdef CONFIG_SUSPEND
> +
> +/* r8a7740_shutdown expects L2 to be flushed */
> +	.text
> +ENTRY(r8a7740_shutdown)
> +	stmfd   sp!, {r4-r12, lr}
> +
> +	/* make sure the stack stays intact */
> +	bl      v7_flush_dcache_all
> +

Why do not you move the cache flush above after clearing the C bit ?
You must not push on the stack _after_ clearing the C bit and before
cleaning the cache, that's the only requirement (well basically you
should not write any data and do not rely on any dirty data sitting
in the cache, since with the C bit cleared as I already said A9 L1
D-cache is not searched).

AS long as, with C bit cleared, you do NOT push on the stack before
cleaning the cache, your cache flushing routine will clean data to DRAM,
so you are fine. v7_flush_dcache_all does not use the stack.

	mrc	p15, 0, r0, c1, c0, 0
	bic	r0, r0, #(1 << 2)		@ Disable the C bit
	mcr	p15, 0, r0, c1, c0, 0
	isb
	bl      v7_flush_dcache_all

This code snippet will do and it is compliant with the SMP procedure,
I do not want people to copy'n'paste code that does not work on SMP,
I know this code runs on UP so the sequence above is safe, but why not make
the sequence identical for both UP/SMP ?

> +	/*
> +	 * Clear the SCTLR.C bit to prevent further data cache
> +	 * allocation. Clearing SCTLR.C would make all the data accesses
> +	 * strongly ordered and would not hit the cache.
> +	 */
> +	mrc	p15, 0, r0, c1, c0, 0
> +	bic	r0, r0, #(1 << 2)		@ Disable the C bit
> +	mcr	p15, 0, r0, c1, c0, 0
> +	isb
> +
> +	/*
> +	 * We don't issue another v7_flush_dcache_all here as seen in many
> +	 * other places as we have a UP core and the L1 could not soak up
> +	 * data from other L1 caches in the meantime.
> +	 */
> +
> +	bl	cpu_v7_do_idle
> +
> +	/* in rare cases when WFI fails we end up here and restore things */

I am still baffled by this "wfi failures" and how hardware can manage
this situation _properly_, if you can explain to me how this works
that would be grand. This code is also called by CPUidle right ? So,
IRQs can still cause wfi completion, there has to be a handshake between
the core and power controller to prevent the CPU from being shutdown
while executing code (power down command sent, standbywfi asserted and
then released following wfi completion).

Anyway, I do not like the code sequence (and I know that eg OMAP4 executes
the same code) above because again, it works on UP, but people might be
tempted to copy'n'paste it to their SMP implementation (with the addition
of SMP bit restore in ACTLR).

OMAP4 sequence is missing TLB flush since the core runs for a time window with
incoherent TLBs (out of SMP). It works because other cores can't update
their page tables while the core exiting wfi is running out of coherency
(ie coupled idle C-states, either all CPUs down or no CPU down), but
again, that code should not be copied verbatim since it fails to flush
TLBs, AFAIK.

Again, you are running a UP kernel so this code does not suffer from
the same problem, but if this code is copied and used on SMP platforms then
we do have problem and that has to be prevented.

Why do not you turn MMU off (cpu_reset) and jump to the reset vector if
wfi fails (code in arch/arm/kernel/process.c __soft_restart(), minus the
cache flushing ?

	phys_reset_t phys_reset;

	/* Take out a flat memory mapping. */
	setup_mm_for_reboot();

	phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
	phys_reset((unsigned long)addr);

	BUG();

where addr == virt_to_phys(cpu_resume);

from what I gather from the code below.

There is just a tiny performance hit owing to a couple of fetches from
DRAM (MMU off), then cpu_resume restores the C bit and the MMU (and flushes
the TLBs) so the virtual address space and caches are back on. By the time wfi
fails, caches are invalid anyway so in both cases they have to be refilled.

On top of that, this code is not really SH specific, it is plain v7 code
that can be standardised.

For now it is just a, hopefully useful, rant.

Lorenzo

> +	mrc	p15, 0, r0, c1, c0, 0
> +	orr	r0, r0, #(1 << 2)		@ Enable the C bit
> +	mcr	p15, 0, r0, c1, c0, 0
> +	isb
> +
> +	ldmfd   sp!, {r4-r12, pc}
> +ENDPROC(r8a7740)
> +
> +	.text
> +ENTRY(v7_cpu_resume)
> +	bl	v7_invalidate_l1
> +	b	cpu_resume
> +ENDPROC(v7_cpu_resume)
> +
> +/*
> + * The entry point of a warm reboot, used by wakeup scenarios
> + *
> + * The CPU jumps in this case to (0xfffff000 & SBAR), so we need
> + * to align this function properly.
> + * We use a long jump into the text segment and use the physical
> + * address as the MMU is still turned off.
> + */
> +	.align	12
> +	.text
> +ENTRY(r8a7740_resume)
> +	ldr	pc, 1f
> +1:	.long	v7_cpu_resume - PAGE_OFFSET + PLAT_PHYS_OFFSET
> +ENDPROC(r8a7740_resume_core_standby)
> +#endif
> --
Simon Horman April 15, 2013, 4:01 a.m. UTC | #4
On Fri, Apr 12, 2013 at 02:54:46PM +0200, Bastian Hecht wrote:
> Hi Simon,
> 
> 2013/4/12 Simon Horman <horms@verge.net.au>:
> > On Thu, Apr 11, 2013 at 04:07:43PM +0200, Bastian Hecht wrote:
> >> We add 2 Suspend to RAM modes:
> >> - A3SM PLL0 on/off:     Power domain A3SM that contains the ARM core
> >>                         and the 2nd level cache with either PLL0 on
> >>                         or off
> >>
> >> As the suspend to memory mechanism we use A3SM PLL off. A3SM PLL on
> >> is included here too, so CPUIdle can use both power down modes (not
> >> included in this patch).
> >>
> >> The setup of the SYSC regarding the external IRQs is taken from
> >> pm-sh7372.c from Magnus Damm.
> >>
> >> Signed-off-by: Bastian Hecht <hechtb+renesas@gmail.com>
> >> ---
> >> This patch relies on
> >> ARM: hw_breakpoint: Do not use __cpuinitdata for dbg_cpu_pm_nb
> >>
> >> v2: successor to [PATCH] ARM: shmobile: r8a7740: Add Suspend-To-RAM modes and CPUIdle
> >>
> >> - Removed the 2nd L1 flush in the asm shutdown code. See comment in code for explanation.
> >> - Reworked the ifdefs: We can only use CPUIdle if SUSPEND is set as well.
> >> - Removed cpu_do_idle() in r8a7740_enter_suspend: We don't want to sleep twice...
> >>
> >> If you want to test this use this workaround in the st1232 driver in
> >> drivers/input/touchscreen/st1232.c:
> >>
> >>         error = request_threaded_irq(client->irq, NULL, st1232_ts_irq_handler,
> >> -                                    IRQF_ONESHOT, client->name, ts);
> >> +                                    IRQF_NO_SUSPEND | IRQF_ONESHOT, client->name, ts);
> >>
> >> You need it as the current irqpin driver doesn't handle wakeup devices properly yet.
> >>
> >> Tested with the 4 possiblities        L2 on - Suspend on/off - CPUIdle on/off
> >> and with the additional test  L2 off - Suspend on - CPUIdle on
> >
> > Hi Bastian,
> >
> > are the pre-requisites listed above likely to be in v3.11-rc1 or rc2?
> 
> I probably was a bit incorrect by saying "relies on". Right now the
> complete suspend system is broken on ARM UP systems. So I think we can
> use this patch without coordination regarding the other patch. It's
> just when you want to test this here, then you need the above
> mentioned patch to fix the system first before you can do anything.

Thanks that seems reasonable.

However, I will hold of queuing-up this series as there seems
to be some discussion relating to patch 1/2. Please let me know
when that is resolved via discussion or a fresh series.
Bastian Hecht April 15, 2013, 11:33 a.m. UTC | #5
Hello Lorenzo,

thanks for the review.

2013/4/12 Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>:
> On Thu, Apr 11, 2013 at 03:07:43PM +0100, Bastian Hecht wrote:
>
> [...]
>
>> diff --git a/arch/arm/mach-shmobile/sleep-r8a7740.S b/arch/arm/mach-shmobile/sleep-r8a7740.S
>> new file mode 100644
>> index 0000000..762f978
>> --- /dev/null
>> +++ b/arch/arm/mach-shmobile/sleep-r8a7740.S
>> @@ -0,0 +1,72 @@
>> +/*
>> + * Low level sleep code for the SoC r8a7740
>> + *
>> + * Copyright (C) 2013 Bastian Hecht
>> + *
>> + * This file is subject to the terms and conditions of the GNU General Public
>> + * License.  See the file "COPYING" in the main directory of this archive
>> + * for more details.
>> + */
>> +
>> +#include <linux/linkage.h>
>> +#include <linux/init.h>
>> +#include <asm/memory.h>
>> +
>> +#ifdef CONFIG_SUSPEND
>> +
>> +/* r8a7740_shutdown expects L2 to be flushed */
>> +     .text
>> +ENTRY(r8a7740_shutdown)
>> +     stmfd   sp!, {r4-r12, lr}
>> +
>> +     /* make sure the stack stays intact */
>> +     bl      v7_flush_dcache_all
>> +
>
> Why do not you move the cache flush above after clearing the C bit ?
> You must not push on the stack _after_ clearing the C bit and before
> cleaning the cache, that's the only requirement (well basically you
> should not write any data and do not rely on any dirty data sitting
> in the cache, since with the C bit cleared as I already said A9 L1
> D-cache is not searched).
>
> AS long as, with C bit cleared, you do NOT push on the stack before
> cleaning the cache, your cache flushing routine will clean data to DRAM,
> so you are fine. v7_flush_dcache_all does not use the stack.
>
>         mrc     p15, 0, r0, c1, c0, 0
>         bic     r0, r0, #(1 << 2)               @ Disable the C bit
>         mcr     p15, 0, r0, c1, c0, 0
>         isb
>         bl      v7_flush_dcache_all
>
> This code snippet will do and it is compliant with the SMP procedure,
> I do not want people to copy'n'paste code that does not work on SMP,
> I know this code runs on UP so the sequence above is safe, but why not make
> the sequence identical for both UP/SMP ?

Now this looks super straight forward. I wonder how I came up with
inverted order (even if works in the UP case). Probably when you look
at others code and they put a v7_flush_dcache_all before the disable,
one (meaning me...) starts to bend the underlying ideas about the
caches until it fits to the code you see.

Anyway: Of course I'll take the code sequence then that works for the
UP and SMP cases.

>> +     /*
>> +      * Clear the SCTLR.C bit to prevent further data cache
>> +      * allocation. Clearing SCTLR.C would make all the data accesses
>> +      * strongly ordered and would not hit the cache.
>> +      */
>> +     mrc     p15, 0, r0, c1, c0, 0
>> +     bic     r0, r0, #(1 << 2)               @ Disable the C bit
>> +     mcr     p15, 0, r0, c1, c0, 0
>> +     isb
>> +
>> +     /*
>> +      * We don't issue another v7_flush_dcache_all here as seen in many
>> +      * other places as we have a UP core and the L1 could not soak up
>> +      * data from other L1 caches in the meantime.
>> +      */
>> +
>> +     bl      cpu_v7_do_idle
>> +
>> +     /* in rare cases when WFI fails we end up here and restore things */
>
> I am still baffled by this "wfi failures" and how hardware can manage
> this situation _properly_, if you can explain to me how this works
> that would be grand.

I unfortunately don't know about the hardware design of this SoC.

> This code is also called by CPUidle right ?

I haven't experienced this when using suspend-to-ram but only for
CPUIdle. Of course this might be just a matter of probability though.

> So,
> IRQs can still cause wfi completion, there has to be a handshake between
> the core and power controller to prevent the CPU from being shutdown
> while executing code (power down command sent, standbywfi asserted and
> then released following wfi completion).
>
> Anyway, I do not like the code sequence (and I know that eg OMAP4 executes
> the same code) above because again, it works on UP, but people might be
> tempted to copy'n'paste it to their SMP implementation (with the addition
> of SMP bit restore in ACTLR).
>
> OMAP4 sequence is missing TLB flush since the core runs for a time window with
> incoherent TLBs (out of SMP). It works because other cores can't update
> their page tables while the core exiting wfi is running out of coherency
> (ie coupled idle C-states, either all CPUs down or no CPU down), but
> again, that code should not be copied verbatim since it fails to flush
> TLBs, AFAIK.
>
> Again, you are running a UP kernel so this code does not suffer from
> the same problem, but if this code is copied and used on SMP platforms then
> we do have problem and that has to be prevented.
>
> Why do not you turn MMU off (cpu_reset) and jump to the reset vector if
> wfi fails (code in arch/arm/kernel/process.c __soft_restart(), minus the
> cache flushing ?
>
>         phys_reset_t phys_reset;
>
>         /* Take out a flat memory mapping. */
>         setup_mm_for_reboot();
>
>         phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
>         phys_reset((unsigned long)addr);
>
>         BUG();
>
> where addr == virt_to_phys(cpu_resume);
>
> from what I gather from the code below.
>
> There is just a tiny performance hit owing to a couple of fetches from
> DRAM (MMU off), then cpu_resume restores the C bit and the MMU (and flushes
> the TLBs) so the virtual address space and caches are back on. By the time wfi
> fails, caches are invalid anyway so in both cases they have to be refilled.
>
> On top of that, this code is not really SH specific, it is plain v7 code
> that can be standardised.

Ok that sounds good. As we don't touch the hotpath I don't care much
about some extra cycles and I appreciate moving towards
standardization. I'll try that out and send a v3 with the two changes.

So thanks a lot,

 Bastian

> For now it is just a, hopefully useful, rant.
>
> Lorenzo
>
>> +     mrc     p15, 0, r0, c1, c0, 0
>> +     orr     r0, r0, #(1 << 2)               @ Enable the C bit
>> +     mcr     p15, 0, r0, c1, c0, 0
>> +     isb
>> +
>> +     ldmfd   sp!, {r4-r12, pc}
>> +ENDPROC(r8a7740)
>> +
>> +     .text
>> +ENTRY(v7_cpu_resume)
>> +     bl      v7_invalidate_l1
>> +     b       cpu_resume
>> +ENDPROC(v7_cpu_resume)
>> +
>> +/*
>> + * The entry point of a warm reboot, used by wakeup scenarios
>> + *
>> + * The CPU jumps in this case to (0xfffff000 & SBAR), so we need
>> + * to align this function properly.
>> + * We use a long jump into the text segment and use the physical
>> + * address as the MMU is still turned off.
>> + */
>> +     .align  12
>> +     .text
>> +ENTRY(r8a7740_resume)
>> +     ldr     pc, 1f
>> +1:   .long   v7_cpu_resume - PAGE_OFFSET + PLAT_PHYS_OFFSET
>> +ENDPROC(r8a7740_resume_core_standby)
>> +#endif
>> --
>
Bastian Hecht April 15, 2013, 1:29 p.m. UTC | #6
2013/4/15 Bastian Hecht <hechtb@gmail.com>:
> Hello Lorenzo,
>
> thanks for the review.
>
> 2013/4/12 Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>:
>> On Thu, Apr 11, 2013 at 03:07:43PM +0100, Bastian Hecht wrote:
>>
>> [...]
>>
>>> diff --git a/arch/arm/mach-shmobile/sleep-r8a7740.S b/arch/arm/mach-shmobile/sleep-r8a7740.S
>>> new file mode 100644
>>> index 0000000..762f978
>>> --- /dev/null
>>> +++ b/arch/arm/mach-shmobile/sleep-r8a7740.S
>>> @@ -0,0 +1,72 @@
>>> +/*
>>> + * Low level sleep code for the SoC r8a7740
>>> + *
>>> + * Copyright (C) 2013 Bastian Hecht
>>> + *
>>> + * This file is subject to the terms and conditions of the GNU General Public
>>> + * License.  See the file "COPYING" in the main directory of this archive
>>> + * for more details.
>>> + */
>>> +
>>> +#include <linux/linkage.h>
>>> +#include <linux/init.h>
>>> +#include <asm/memory.h>
>>> +
>>> +#ifdef CONFIG_SUSPEND
>>> +
>>> +/* r8a7740_shutdown expects L2 to be flushed */
>>> +     .text
>>> +ENTRY(r8a7740_shutdown)
>>> +     stmfd   sp!, {r4-r12, lr}
>>> +
>>> +     /* make sure the stack stays intact */
>>> +     bl      v7_flush_dcache_all
>>> +
>>
>> Why do not you move the cache flush above after clearing the C bit ?
>> You must not push on the stack _after_ clearing the C bit and before
>> cleaning the cache, that's the only requirement (well basically you
>> should not write any data and do not rely on any dirty data sitting
>> in the cache, since with the C bit cleared as I already said A9 L1
>> D-cache is not searched).
>>
>> AS long as, with C bit cleared, you do NOT push on the stack before
>> cleaning the cache, your cache flushing routine will clean data to DRAM,
>> so you are fine. v7_flush_dcache_all does not use the stack.
>>
>>         mrc     p15, 0, r0, c1, c0, 0
>>         bic     r0, r0, #(1 << 2)               @ Disable the C bit
>>         mcr     p15, 0, r0, c1, c0, 0
>>         isb
>>         bl      v7_flush_dcache_all
>>
>> This code snippet will do and it is compliant with the SMP procedure,
>> I do not want people to copy'n'paste code that does not work on SMP,
>> I know this code runs on UP so the sequence above is safe, but why not make
>> the sequence identical for both UP/SMP ?
>
> Now this looks super straight forward. I wonder how I came up with
> inverted order (even if works in the UP case). Probably when you look
> at others code and they put a v7_flush_dcache_all before the disable,
> one (meaning me...) starts to bend the underlying ideas about the
> caches until it fits to the code you see.
>
> Anyway: Of course I'll take the code sequence then that works for the
> UP and SMP cases.
>
>>> +     /*
>>> +      * Clear the SCTLR.C bit to prevent further data cache
>>> +      * allocation. Clearing SCTLR.C would make all the data accesses
>>> +      * strongly ordered and would not hit the cache.
>>> +      */
>>> +     mrc     p15, 0, r0, c1, c0, 0
>>> +     bic     r0, r0, #(1 << 2)               @ Disable the C bit
>>> +     mcr     p15, 0, r0, c1, c0, 0
>>> +     isb
>>> +
>>> +     /*
>>> +      * We don't issue another v7_flush_dcache_all here as seen in many
>>> +      * other places as we have a UP core and the L1 could not soak up
>>> +      * data from other L1 caches in the meantime.
>>> +      */
>>> +
>>> +     bl      cpu_v7_do_idle
>>> +
>>> +     /* in rare cases when WFI fails we end up here and restore things */
>>
>> I am still baffled by this "wfi failures" and how hardware can manage
>> this situation _properly_, if you can explain to me how this works
>> that would be grand.
>
> I unfortunately don't know about the hardware design of this SoC.
>
>> This code is also called by CPUidle right ?
>
> I haven't experienced this when using suspend-to-ram but only for
> CPUIdle. Of course this might be just a matter of probability though.
>
>> So,
>> IRQs can still cause wfi completion, there has to be a handshake between
>> the core and power controller to prevent the CPU from being shutdown
>> while executing code (power down command sent, standbywfi asserted and
>> then released following wfi completion).
>>
>> Anyway, I do not like the code sequence (and I know that eg OMAP4 executes
>> the same code) above because again, it works on UP, but people might be
>> tempted to copy'n'paste it to their SMP implementation (with the addition
>> of SMP bit restore in ACTLR).
>>
>> OMAP4 sequence is missing TLB flush since the core runs for a time window with
>> incoherent TLBs (out of SMP). It works because other cores can't update
>> their page tables while the core exiting wfi is running out of coherency
>> (ie coupled idle C-states, either all CPUs down or no CPU down), but
>> again, that code should not be copied verbatim since it fails to flush
>> TLBs, AFAIK.
>>
>> Again, you are running a UP kernel so this code does not suffer from
>> the same problem, but if this code is copied and used on SMP platforms then
>> we do have problem and that has to be prevented.
>>
>> Why do not you turn MMU off (cpu_reset) and jump to the reset vector if
>> wfi fails (code in arch/arm/kernel/process.c __soft_restart(), minus the
>> cache flushing ?
>>
>>         phys_reset_t phys_reset;
>>
>>         /* Take out a flat memory mapping. */
>>         setup_mm_for_reboot();
>>
>>         phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
>>         phys_reset((unsigned long)addr);
>>
>>         BUG();
>>
>> where addr == virt_to_phys(cpu_resume);
>>
>> from what I gather from the code below.
>>
>> There is just a tiny performance hit owing to a couple of fetches from
>> DRAM (MMU off), then cpu_resume restores the C bit and the MMU (and flushes
>> the TLBs) so the virtual address space and caches are back on. By the time wfi
>> fails, caches are invalid anyway so in both cases they have to be refilled.
>>
>> On top of that, this code is not really SH specific, it is plain v7 code
>> that can be standardised.
>
> Ok that sounds good. As we don't touch the hotpath I don't care much
> about some extra cycles and I appreciate moving towards
> standardization. I'll try that out and send a v3 with the two changes.

I just found out that this would work as well:

wfi_loop:
    bl    cpu_v7_do_idle
    b wfi_loop

So it looks like the SYSC (System Controller - the block that handles
power management) didn't have time to set up the powering down of the
domain that includes the ARM core. I haven't found a way to poll for
that event.

Currently the chip has successfully entered the low power state >
200.000 times, so I feel inclined to post this variation as v3.

> So thanks a lot,
>
>  Bastian
>
>> For now it is just a, hopefully useful, rant.
>>
>> Lorenzo
>>
>>> +     mrc     p15, 0, r0, c1, c0, 0
>>> +     orr     r0, r0, #(1 << 2)               @ Enable the C bit
>>> +     mcr     p15, 0, r0, c1, c0, 0
>>> +     isb
>>> +
>>> +     ldmfd   sp!, {r4-r12, pc}
>>> +ENDPROC(r8a7740)
>>> +
>>> +     .text
>>> +ENTRY(v7_cpu_resume)
>>> +     bl      v7_invalidate_l1
>>> +     b       cpu_resume
>>> +ENDPROC(v7_cpu_resume)
>>> +
>>> +/*
>>> + * The entry point of a warm reboot, used by wakeup scenarios
>>> + *
>>> + * The CPU jumps in this case to (0xfffff000 & SBAR), so we need
>>> + * to align this function properly.
>>> + * We use a long jump into the text segment and use the physical
>>> + * address as the MMU is still turned off.
>>> + */
>>> +     .align  12
>>> +     .text
>>> +ENTRY(r8a7740_resume)
>>> +     ldr     pc, 1f
>>> +1:   .long   v7_cpu_resume - PAGE_OFFSET + PLAT_PHYS_OFFSET
>>> +ENDPROC(r8a7740_resume_core_standby)
>>> +#endif
>>> --
>>
diff mbox

Patch

diff --git a/arch/arm/mach-shmobile/Makefile b/arch/arm/mach-shmobile/Makefile
index 068f1da..0568894 100644
--- a/arch/arm/mach-shmobile/Makefile
+++ b/arch/arm/mach-shmobile/Makefile
@@ -30,7 +30,7 @@  obj-$(CONFIG_SUSPEND)		+= suspend.o
 obj-$(CONFIG_CPU_IDLE)		+= cpuidle.o
 obj-$(CONFIG_ARCH_SHMOBILE)	+= pm-rmobile.o
 obj-$(CONFIG_ARCH_SH7372)	+= pm-sh7372.o sleep-sh7372.o
-obj-$(CONFIG_ARCH_R8A7740)	+= pm-r8a7740.o
+obj-$(CONFIG_ARCH_R8A7740)	+= pm-r8a7740.o sleep-r8a7740.o
 obj-$(CONFIG_ARCH_R8A7779)	+= pm-r8a7779.o
 obj-$(CONFIG_ARCH_SH73A0)	+= pm-sh73a0.o
 
diff --git a/arch/arm/mach-shmobile/include/mach/r8a7740.h b/arch/arm/mach-shmobile/include/mach/r8a7740.h
index abdc4d4..05551ee 100644
--- a/arch/arm/mach-shmobile/include/mach/r8a7740.h
+++ b/arch/arm/mach-shmobile/include/mach/r8a7740.h
@@ -540,6 +540,9 @@  extern void r8a7740_add_standard_devices(void);
 extern void r8a7740_clock_init(u8 md_ck);
 extern void r8a7740_pinmux_init(void);
 extern void r8a7740_pm_init(void);
+extern void r8a7740_resume(void);
+extern void r8a7740_shutdown(void);
+extern void r8a7740_enter_a3sm_common(int);
 
 #ifdef CONFIG_PM
 extern void __init r8a7740_init_pm_domains(void);
diff --git a/arch/arm/mach-shmobile/pm-r8a7740.c b/arch/arm/mach-shmobile/pm-r8a7740.c
index 40b87aa..adadac4 100644
--- a/arch/arm/mach-shmobile/pm-r8a7740.c
+++ b/arch/arm/mach-shmobile/pm-r8a7740.c
@@ -8,10 +8,52 @@ 
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  */
+#include <linux/bitrev.h>
 #include <linux/console.h>
+#include <linux/module.h>
 #include <linux/suspend.h>
+#include <linux/io.h>
+#include <asm/suspend.h>
+#include <asm/cacheflush.h>
+#include <asm/hardware/cache-l2x0.h>
 #include <mach/pm-rmobile.h>
 #include <mach/common.h>
+#include <mach/r8a7740.h>
+
+/* CPGA */
+#define PLLC01STPCR 	IOMEM(0xe61500c8)
+#define SYSTBCR		IOMEM(0xe6150024)
+
+/* SYSC */
+#define STBCHR		IOMEM(0xe6180000)
+#define STBCHRB		IOMEM(0xe6180040)
+#define SPDCR		IOMEM(0xe6180008)
+#define SBAR		IOMEM(0xe6180020)
+#define SRSTFR		IOMEM(0xe61800B4)
+#define WUPSMSK		IOMEM(0xe618002c)
+#define WUPSMSK2	IOMEM(0xe6180048)
+#define WUPSFAC		IOMEM(0xe6180098)
+#define IRQCR 		IOMEM(0xe618022c)
+#define IRQCR2 		IOMEM(0xe6180238)
+#define IRQCR3		IOMEM(0xe6180244)
+#define IRQCR4		IOMEM(0xe6180248)
+
+/* SRSTFR flags */
+#define RAMST		(1 << 19)
+#define RCLNKA		(1 << 7)
+#define RCPRES		(1 << 5)
+#define RCWD1		(1 << 4)
+#define RPF		(1 << 0)
+
+/* INTC */
+#define ICR1A		IOMEM(0xe6900000)
+#define ICR2A		IOMEM(0xe6900004)
+#define ICR3A		IOMEM(0xe6900008)
+#define ICR4A		IOMEM(0xe690000c)
+#define INTMSK00A	IOMEM(0xe6900040)
+#define INTMSK10A	IOMEM(0xe6900044)
+#define INTMSK20A	IOMEM(0xe6900048)
+#define INTMSK30A	IOMEM(0xe690004c)
 
 #ifdef CONFIG_PM
 static int r8a7740_pd_a4s_suspend(void)
@@ -58,13 +100,132 @@  void __init r8a7740_init_pm_domains(void)
 	rmobile_init_domains(r8a7740_pm_domains, ARRAY_SIZE(r8a7740_pm_domains));
 	pm_genpd_add_subdomain_names("A4S", "A3SP");
 }
-
 #endif /* CONFIG_PM */
 
 #ifdef CONFIG_SUSPEND
+static void r8a7740_set_reset_vector(unsigned long address)
+{
+	__raw_writel(address, SBAR);
+}
+
+static void r8a7740_icr_to_irqcr(unsigned long icr, u16 *irqcr1p, u16 *irqcr2p)
+{
+	u16 tmp, irqcr1, irqcr2;
+	int k;
+
+	irqcr1 = 0;
+	irqcr2 = 0;
+
+	/* convert INTCA ICR register layout to SYSC IRQCR+IRQCR2 */
+	for (k = 0; k <= 7; k++) {
+		tmp = (icr >> ((7 - k) * 4)) & 0xf;
+		irqcr1 |= (tmp & 0x03) << (k * 2);
+		irqcr2 |= (tmp >> 2) << (k * 2);
+	}
+
+	*irqcr1p = irqcr1;
+	*irqcr2p = irqcr2;
+}
+
+static void r8a7740_setup_sysc(unsigned long msk, unsigned long msk2)
+{
+	u16 irqcrx_low, irqcrx_high, irqcry_low, irqcry_high;
+	unsigned long tmp;
+
+	/* read IRQ0A -> IRQ15A mask */
+	tmp = bitrev8(__raw_readb(INTMSK00A));
+	tmp |= bitrev8(__raw_readb(INTMSK10A)) << 8;
+
+	/* setup WUPSMSK from clocks and external IRQ mask */
+	msk = (~msk & 0xc030000f) | (tmp << 4);
+	__raw_writel(msk, WUPSMSK);
+
+	/* propage level/edge trigger for external IRQ 0->15 */
+	r8a7740_icr_to_irqcr(__raw_readl(ICR1A), &irqcrx_low, &irqcry_low);
+	r8a7740_icr_to_irqcr(__raw_readl(ICR2A), &irqcrx_high, &irqcry_high);
+	__raw_writel((irqcrx_high << 16) | irqcrx_low, IRQCR);
+	__raw_writel((irqcry_high << 16) | irqcry_low, IRQCR2);
+
+	/* read IRQ16A -> IRQ31A mask */
+	tmp = bitrev8(__raw_readb(INTMSK20A));
+	tmp |= bitrev8(__raw_readb(INTMSK30A)) << 8;
+
+	/* setup WUPSMSK2 from clocks and external IRQ mask */
+	msk2 = (~msk2 & 0x00030000) | tmp;
+	__raw_writel(msk2, WUPSMSK2);
+
+	/* propage level/edge trigger for external IRQ 16->31 */
+	r8a7740_icr_to_irqcr(__raw_readl(ICR3A), &irqcrx_low, &irqcry_low);
+	r8a7740_icr_to_irqcr(__raw_readl(ICR4A), &irqcrx_high, &irqcry_high);
+	__raw_writel((irqcrx_high << 16) | irqcrx_low, IRQCR3);
+	__raw_writel((irqcry_high << 16) | irqcry_low, IRQCR4);
+}
+
+static void r8a7740_prepare_wakeup(void)
+{
+	/* clear all flags that lead to a cold boot */
+	__raw_writel(~(RAMST | RCLNKA | RCPRES | RCWD1 | RPF), SRSTFR);
+	/* indicate warm boot */
+	__raw_writel(0x80000000, STBCHRB);
+	/* clear other flags checked by internal ROM boot loader */
+	__raw_writel(0x00000000, STBCHR);
+}
+
+static int r8a7740_do_suspend(unsigned long unused)
+{
+	/*
+	 * cpu_suspend() guarantees that all data made it to the L2.
+	 * Flush it out now and disable the cache controller.
+	 */
+	outer_flush_all();
+	outer_disable();
+
+	r8a7740_shutdown();
+
+	/* in case WFI fails to enter the low power state, restore things */
+	outer_resume();
+
+	return 0;
+}
+
+void r8a7740_enter_a3sm_common(int pllc0_on)
+{
+	u32 reg32;
+
+	if (pllc0_on)
+		__raw_writel(0, PLLC01STPCR);
+	else
+		__raw_writel(1 << 28, PLLC01STPCR);
+
+	r8a7740_set_reset_vector(__pa(r8a7740_resume));
+	r8a7740_prepare_wakeup();
+	r8a7740_setup_sysc(1 << 0, 0);
+
+	/* Activate delayed shutdown of A3SM */
+	reg32 = __raw_readl(SPDCR);
+	reg32 |= (1 << 31) | (1 << 12);
+	__raw_writel(reg32, SPDCR);
+
+	/* We activate CPU Core Standby as well here */
+	reg32 = __raw_readl(SYSTBCR);
+	reg32 |= (1 << 4);
+	__raw_writel(reg32, SYSTBCR);
+
+	/* Clear Wakeup Factors and do suspend */
+	reg32 = __raw_readl(WUPSFAC);
+	cpu_suspend(0, r8a7740_do_suspend);
+	outer_resume();
+	reg32 = __raw_readl(WUPSFAC);
+
+	/* Clear CPU Core Standby flag for other WFI instructions */
+	reg32 &= ~(1 << 4);
+	__raw_writel(reg32, SYSTBCR);
+
+}
+
 static int r8a7740_enter_suspend(suspend_state_t suspend_state)
 {
-	cpu_do_idle();
+	r8a7740_enter_a3sm_common(0);
 	return 0;
 }
 
@@ -74,7 +235,7 @@  static void r8a7740_suspend_init(void)
 }
 #else
 static void r8a7740_suspend_init(void) {}
-#endif
+#endif /* CONFIG_SUSPEND */
 
 void __init r8a7740_pm_init(void)
 {
diff --git a/arch/arm/mach-shmobile/sleep-r8a7740.S b/arch/arm/mach-shmobile/sleep-r8a7740.S
new file mode 100644
index 0000000..762f978
--- /dev/null
+++ b/arch/arm/mach-shmobile/sleep-r8a7740.S
@@ -0,0 +1,72 @@ 
+/*
+ * Low level sleep code for the SoC r8a7740
+ *
+ * Copyright (C) 2013 Bastian Hecht
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/linkage.h>
+#include <linux/init.h>
+#include <asm/memory.h>
+
+#ifdef CONFIG_SUSPEND
+
+/* r8a7740_shutdown expects L2 to be flushed */
+	.text
+ENTRY(r8a7740_shutdown)
+	stmfd   sp!, {r4-r12, lr}
+
+	/* make sure the stack stays intact */
+	bl      v7_flush_dcache_all
+
+	/*
+	 * Clear the SCTLR.C bit to prevent further data cache
+	 * allocation. Clearing SCTLR.C would make all the data accesses
+	 * strongly ordered and would not hit the cache.
+	 */
+	mrc	p15, 0, r0, c1, c0, 0
+	bic	r0, r0, #(1 << 2)		@ Disable the C bit
+	mcr	p15, 0, r0, c1, c0, 0
+	isb
+
+	/*
+	 * We don't issue another v7_flush_dcache_all here as seen in many
+	 * other places as we have a UP core and the L1 could not soak up
+	 * data from other L1 caches in the meantime.
+	 */
+
+	bl	cpu_v7_do_idle
+
+	/* in rare cases when WFI fails we end up here and restore things */
+	mrc	p15, 0, r0, c1, c0, 0
+	orr	r0, r0, #(1 << 2)		@ Enable the C bit
+	mcr	p15, 0, r0, c1, c0, 0
+	isb
+
+	ldmfd   sp!, {r4-r12, pc}
+ENDPROC(r8a7740)
+
+	.text
+ENTRY(v7_cpu_resume)
+	bl	v7_invalidate_l1
+	b	cpu_resume
+ENDPROC(v7_cpu_resume)
+
+/*
+ * The entry point of a warm reboot, used by wakeup scenarios
+ *
+ * The CPU jumps in this case to (0xfffff000 & SBAR), so we need
+ * to align this function properly.
+ * We use a long jump into the text segment and use the physical
+ * address as the MMU is still turned off.
+ */
+	.align	12
+	.text
+ENTRY(r8a7740_resume)
+	ldr	pc, 1f
+1:	.long	v7_cpu_resume - PAGE_OFFSET + PLAT_PHYS_OFFSET
+ENDPROC(r8a7740_resume_core_standby)
+#endif