diff mbox

[v5,07/15] arm64: Change cpu_resume() to enable mmu early then access sleep_sp by va

Message ID 1455637767-31561-8-git-send-email-james.morse@arm.com (mailing list archive)
State New, archived
Headers show

Commit Message

James Morse Feb. 16, 2016, 3:49 p.m. UTC
By enabling the MMU early in cpu_resume(), the sleep_save_sp and stack can
be accessed by VA, which avoids the need to convert-addresses and clean to
PoC on the suspend path.

MMU setup is shared with the boot path, meaning the swapper_pg_dir is
restored directly: ttbr1_el1 is no longer saved/restored.

struct sleep_save_sp is removed, replacing it with a single array of
pointers.

cpu_do_{suspend,resume} could be further reduced to not restore: cpacr_el1,
mdscr_el1, tcr_el1, vbar_el1 and sctlr_el1, all of which are set by
__cpu_setup(). However these values all contain res0 bits that may be used
to enable future features.

Signed-off-by: James Morse <james.morse@arm.com>
---
 arch/arm64/include/asm/suspend.h |  9 +++----
 arch/arm64/kernel/asm-offsets.c  |  3 ---
 arch/arm64/kernel/head.S         |  2 +-
 arch/arm64/kernel/setup.c        |  1 -
 arch/arm64/kernel/sleep.S        | 54 ++++++++++++++--------------------------
 arch/arm64/kernel/suspend.c      | 39 ++++++-----------------------
 arch/arm64/mm/proc.S             | 41 +++++++++++-------------------
 7 files changed, 45 insertions(+), 104 deletions(-)

Comments

Lorenzo Pieralisi Feb. 18, 2016, 6:26 p.m. UTC | #1
On Tue, Feb 16, 2016 at 03:49:19PM +0000, James Morse wrote:
> By enabling the MMU early in cpu_resume(), the sleep_save_sp and stack can
> be accessed by VA, which avoids the need to convert-addresses and clean to
> PoC on the suspend path.
> 
> MMU setup is shared with the boot path, meaning the swapper_pg_dir is
> restored directly: ttbr1_el1 is no longer saved/restored.
> 
> struct sleep_save_sp is removed, replacing it with a single array of
> pointers.
> 
> cpu_do_{suspend,resume} could be further reduced to not restore: cpacr_el1,
> mdscr_el1, tcr_el1, vbar_el1 and sctlr_el1, all of which are set by
> __cpu_setup(). However these values all contain res0 bits that may be used
> to enable future features.
> 
> Signed-off-by: James Morse <james.morse@arm.com>
> ---

[...]

> diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
> index dca81612fe90..0e2b36f1fb44 100644
> --- a/arch/arm64/kernel/sleep.S
> +++ b/arch/arm64/kernel/sleep.S
> @@ -73,8 +73,8 @@ ENTRY(__cpu_suspend_enter)
>  	str	x2, [x0, #SLEEP_STACK_DATA_SYSTEM_REGS + CPU_CTX_SP]
>  
>  	/* find the mpidr_hash */
> -	ldr	x1, =sleep_save_sp
> -	ldr	x1, [x1, #SLEEP_SAVE_SP_VIRT]
> +	ldr	x1, =sleep_save_stash
> +	ldr	x1, [x1]
>  	mrs	x7, mpidr_el1
>  	ldr	x9, =mpidr_hash
>  	ldr	x10, [x9, #MPIDR_HASH_MASK]
> @@ -87,40 +87,26 @@ ENTRY(__cpu_suspend_enter)
>  	compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10
>  	add	x1, x1, x8, lsl #3
>  
> +	str	x0, [x1]
> +	add	x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS

Mmm...this instruction does not really belong in this patch,
it should be part of patch 6, correct ? What I mean is, the new
struct to stash system regs (struct sleep_stack_data) was added in
patch 6, if the offset #SLEEP_STACK_DATA_SYSTEM_REGS (which is 0) had
to be added it had to be added in patch 6 too, it does not belong
in this patch, am I right ?

>  	push	x29, lr
> -	bl	__cpu_suspend_save
> +	bl	cpu_do_suspend
>  	pop	x29, lr
>  	mov	x0, #1
>  	ret
>  ENDPROC(__cpu_suspend_enter)
>  	.ltorg
>  
> -/*
> - * x0 must contain the sctlr value retrieved from restored context
> - */
> -	.pushsection	".idmap.text", "ax"
> -ENTRY(cpu_resume_mmu)
> -	ldr	x3, =cpu_resume_after_mmu
> -	msr	sctlr_el1, x0		// restore sctlr_el1
> -	isb
> -	/*
> -	 * Invalidate the local I-cache so that any instructions fetched
> -	 * speculatively from the PoC are discarded, since they may have
> -	 * been dynamically patched at the PoU.
> -	 */
> -	ic	iallu
> -	dsb	nsh
> -	isb
> -	br	x3			// global jump to virtual address
> -ENDPROC(cpu_resume_mmu)
> -	.popsection
> -cpu_resume_after_mmu:
> -	mov	x0, #0			// return zero on success
> -	ret
> -ENDPROC(cpu_resume_after_mmu)
> -
>  ENTRY(cpu_resume)
>  	bl	el2_setup		// if in EL2 drop to EL1 cleanly
> +	/* enable the MMU early - so we can access sleep_save_stash by va */
> +	adr_l	lr, __enable_mmu	/* __cpu_setup will return here */
> +	ldr	x27, =_cpu_resume	/* __enable_mmu will branch here */
> +	adrp	x25, idmap_pg_dir
> +	adrp	x26, swapper_pg_dir
> +	b	__cpu_setup
> +
> +ENTRY(_cpu_resume)
>  	mrs	x1, mpidr_el1
>  	adrp	x8, mpidr_hash
>  	add x8, x8, #:lo12:mpidr_hash // x8 = struct mpidr_hash phys address
> @@ -130,29 +116,27 @@ ENTRY(cpu_resume)
>  	ldp	w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)]
>  	compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2
>          /* x7 contains hash index, let's use it to grab context pointer */
> -	ldr_l	x0, sleep_save_sp + SLEEP_SAVE_SP_PHYS
> +	ldr_l	x0, sleep_save_stash
>  	ldr	x0, [x0, x7, lsl #3]
>  	add	x29, x0, #SLEEP_STACK_DATA_CALLEE_REGS
>  	add	x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS
>  	/* load sp from context */
>  	ldr	x2, [x0, #CPU_CTX_SP]
> -	/* load physical address of identity map page table in x1 */
> -	adrp	x1, idmap_pg_dir
>  	mov	sp, x2
>  	/* save thread_info */
>  	and	x2, x2, #~(THREAD_SIZE - 1)
>  	msr	sp_el0, x2
>  	/*
> -	 * cpu_do_resume expects x0 to contain context physical address
> -	 * pointer and x1 to contain physical address of 1:1 page tables
> +	 * cpu_do_resume expects x0 to contain context address pointer
>  	 */
> -	bl	cpu_do_resume		// PC relative jump, MMU off
> -	/* Can't access these by physical address once the MMU is on */
> +	bl	cpu_do_resume
> +
>  	ldp	x19, x20, [x29, #16]
>  	ldp	x21, x22, [x29, #32]
>  	ldp	x23, x24, [x29, #48]
>  	ldp	x25, x26, [x29, #64]
>  	ldp	x27, x28, [x29, #80]
>  	ldp	x29, lr, [x29]
> -	b	cpu_resume_mmu		// Resume MMU, never returns
> +	mov	x0, #0
> +	ret
>  ENDPROC(cpu_resume)
> diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
> index 6fe46100685a..2ec2f94d1690 100644
> --- a/arch/arm64/kernel/suspend.c
> +++ b/arch/arm64/kernel/suspend.c
> @@ -10,30 +10,12 @@
>  #include <asm/suspend.h>
>  #include <asm/tlbflush.h>
>  
> -
>  /*
> - * This is called by __cpu_suspend_enter() to save the state, and do whatever
> - * flushing is required to ensure that when the CPU goes to sleep we have
> - * the necessary data available when the caches are not searched.
> - *
> - * ptr: sleep_stack_data containing cpu state virtual address.
> - * save_ptr: address of the location where the context physical address
> - *           must be saved
> + * This is allocate by cpu_suspend_init(), and used in cpuidle to store a

Nit: s/allocate/allocated

"and used to store", it is not just used in cpuidle, also S2R and now
we have hibernate too.

[...]

> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index db832c42ae30..a2ef1256a329 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -24,6 +24,7 @@
>  #include <asm/asm-offsets.h>
>  #include <asm/hwcap.h>
>  #include <asm/pgtable.h>
> +#include <asm/pgtable-hwdef.h>
>  
>  #ifdef CONFIG_ARM64_64K_PAGES
>  #define TCR_TG_FLAGS	TCR_TG0_64K | TCR_TG1_64K
> @@ -61,62 +62,50 @@ ENTRY(cpu_do_suspend)
>  	mrs	x2, tpidr_el0
>  	mrs	x3, tpidrro_el0
>  	mrs	x4, contextidr_el1
> -	mrs	x5, mair_el1
>  	mrs	x6, cpacr_el1
> -	mrs	x7, ttbr1_el1
>  	mrs	x8, tcr_el1
>  	mrs	x9, vbar_el1
>  	mrs	x10, mdscr_el1
>  	mrs	x11, oslsr_el1
>  	mrs	x12, sctlr_el1
>  	stp	x2, x3, [x0]
> -	stp	x4, x5, [x0, #16]
> -	stp	x6, x7, [x0, #32]
> -	stp	x8, x9, [x0, #48]
> -	stp	x10, x11, [x0, #64]
> -	str	x12, [x0, #80]
> +	stp	x4, xzr, [x0, #16]
> +	stp	x6, x8, [x0, #32]
> +	stp	x9, x10, [x0, #48]
> +	stp	x11, x12, [x0, #64]

Nit: You may want to re-enumerate the registers.

With the last updates it seems fine by me, so:

Reviewed-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>

>  	ret
>  ENDPROC(cpu_do_suspend)
>  
>  /**
>   * cpu_do_resume - restore CPU register context
>   *
> - * x0: Physical address of context pointer
> - * x1: ttbr0_el1 to be restored
> - *
> - * Returns:
> - *	sctlr_el1 value in x0
> + * x0: Address of context pointer
>   */
>  ENTRY(cpu_do_resume)
> -	/*
> -	 * Invalidate local tlb entries before turning on MMU
> -	 */
> -	tlbi	vmalle1
>  	ldp	x2, x3, [x0]
>  	ldp	x4, x5, [x0, #16]
> -	ldp	x6, x7, [x0, #32]
> -	ldp	x8, x9, [x0, #48]
> -	ldp	x10, x11, [x0, #64]
> -	ldr	x12, [x0, #80]
> +	ldp	x6, x8, [x0, #32]
> +	ldp	x9, x10, [x0, #48]
> +	ldp	x11, x12, [x0, #64]
>  	msr	tpidr_el0, x2
>  	msr	tpidrro_el0, x3
>  	msr	contextidr_el1, x4
> -	msr	mair_el1, x5
>  	msr	cpacr_el1, x6
> -	msr	ttbr0_el1, x1
> -	msr	ttbr1_el1, x7
> -	tcr_set_idmap_t0sz x8, x7
> +
> +	/* Don't change t0sz here, mask those bits when restoring */
> +	mrs	x5, tcr_el1
> +	bfi	x8, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
> +
>  	msr	tcr_el1, x8
>  	msr	vbar_el1, x9
>  	msr	mdscr_el1, x10
> +	msr	sctlr_el1, x12
>  	/*
>  	 * Restore oslsr_el1 by writing oslar_el1
>  	 */
>  	ubfx	x11, x11, #1, #1
>  	msr	oslar_el1, x11
>  	reset_pmuserenr_el0 x0			// Disable PMU access from EL0
> -	mov	x0, x12
> -	dsb	nsh		// Make sure local tlb invalidation completed
>  	isb
>  	ret
>  ENDPROC(cpu_do_resume)
> -- 
> 2.6.2
>
James Morse Feb. 19, 2016, 4:20 p.m. UTC | #2
Hi Lorenzo,

On 18/02/16 18:26, Lorenzo Pieralisi wrote:
> On Tue, Feb 16, 2016 at 03:49:19PM +0000, James Morse wrote:
>> By enabling the MMU early in cpu_resume(), the sleep_save_sp and stack can
>> be accessed by VA, which avoids the need to convert-addresses and clean to
>> PoC on the suspend path.
>>
>> MMU setup is shared with the boot path, meaning the swapper_pg_dir is
>> restored directly: ttbr1_el1 is no longer saved/restored.
>>
>> struct sleep_save_sp is removed, replacing it with a single array of
>> pointers.
>>
>> cpu_do_{suspend,resume} could be further reduced to not restore: cpacr_el1,
>> mdscr_el1, tcr_el1, vbar_el1 and sctlr_el1, all of which are set by
>> __cpu_setup(). However these values all contain res0 bits that may be used
>> to enable future features.
>>
>> Signed-off-by: James Morse <james.morse@arm.com>
>> ---
> 
> [...]
> 
>> diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
>> index dca81612fe90..0e2b36f1fb44 100644
>> --- a/arch/arm64/kernel/sleep.S
>> +++ b/arch/arm64/kernel/sleep.S
>> @@ -73,8 +73,8 @@ ENTRY(__cpu_suspend_enter)
>>  	str	x2, [x0, #SLEEP_STACK_DATA_SYSTEM_REGS + CPU_CTX_SP]
>>  
>>  	/* find the mpidr_hash */
>> -	ldr	x1, =sleep_save_sp
>> -	ldr	x1, [x1, #SLEEP_SAVE_SP_VIRT]
>> +	ldr	x1, =sleep_save_stash
>> +	ldr	x1, [x1]
>>  	mrs	x7, mpidr_el1
>>  	ldr	x9, =mpidr_hash
>>  	ldr	x10, [x9, #MPIDR_HASH_MASK]
>> @@ -87,40 +87,26 @@ ENTRY(__cpu_suspend_enter)
>>  	compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10
>>  	add	x1, x1, x8, lsl #3
>>  
>> +	str	x0, [x1]
>> +	add	x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS
> 
> Mmm...this instruction does not really belong in this patch,
> it should be part of patch 6, correct ? What I mean is, the new
> struct to stash system regs (struct sleep_stack_data) was added in
> patch 6, if the offset #SLEEP_STACK_DATA_SYSTEM_REGS (which is 0) had
> to be added it had to be added in patch 6 too, it does not belong
> in this patch, am I right ?

In the previous patch __cpu_suspend_save() was changed to take a struct
sleep_stack_data, this then passes system_regs to cpu_do_suspend(). The 'add	x0,
x0, #SLEEP_STACK_DATA_SYSTEM_REGS' is being done in C, (and hopefully optimised
out by the compiler).

In this patch, we removed __cpu_suspend_save() and call cpu_do_suspend()
directly, so need to account for system_regs's position in struct
sleep_stack_data ourselves.

I'm paranoid, but if you prefer I can remove the 'add 0', and dump a comment
next to the struct definition making it someone elses problem to fix it if they
ever change the layout of the struct...


> 
>>  	push	x29, lr
>> -	bl	__cpu_suspend_save
>> +	bl	cpu_do_suspend
>>  	pop	x29, lr
>>  	mov	x0, #1
>>  	ret
>>  ENDPROC(__cpu_suspend_enter)
>>  	.ltorg

[...2x Nits fixed...]

> With the last updates it seems fine by me, so:
> 
> Reviewed-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>


Thanks!


James
Lorenzo Pieralisi Feb. 19, 2016, 4:43 p.m. UTC | #3
On Fri, Feb 19, 2016 at 04:20:50PM +0000, James Morse wrote:

[...]

> >> diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
> >> index dca81612fe90..0e2b36f1fb44 100644
> >> --- a/arch/arm64/kernel/sleep.S
> >> +++ b/arch/arm64/kernel/sleep.S
> >> @@ -73,8 +73,8 @@ ENTRY(__cpu_suspend_enter)
> >>  	str	x2, [x0, #SLEEP_STACK_DATA_SYSTEM_REGS + CPU_CTX_SP]
> >>  
> >>  	/* find the mpidr_hash */
> >> -	ldr	x1, =sleep_save_sp
> >> -	ldr	x1, [x1, #SLEEP_SAVE_SP_VIRT]
> >> +	ldr	x1, =sleep_save_stash
> >> +	ldr	x1, [x1]
> >>  	mrs	x7, mpidr_el1
> >>  	ldr	x9, =mpidr_hash
> >>  	ldr	x10, [x9, #MPIDR_HASH_MASK]
> >> @@ -87,40 +87,26 @@ ENTRY(__cpu_suspend_enter)
> >>  	compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10
> >>  	add	x1, x1, x8, lsl #3
> >>  
> >> +	str	x0, [x1]
> >> +	add	x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS
> > 
> > Mmm...this instruction does not really belong in this patch,
> > it should be part of patch 6, correct ? What I mean is, the new
> > struct to stash system regs (struct sleep_stack_data) was added in
> > patch 6, if the offset #SLEEP_STACK_DATA_SYSTEM_REGS (which is 0) had
> > to be added it had to be added in patch 6 too, it does not belong
> > in this patch, am I right ?
> 
> In the previous patch __cpu_suspend_save() was changed to take a
> struct sleep_stack_data, this then passes system_regs to
> cpu_do_suspend(). The 'add	x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS'
> is being done in C, (and hopefully optimised out by the compiler).
> 
> In this patch, we removed __cpu_suspend_save() and call cpu_do_suspend()
> directly, so need to account for system_regs's position in struct
> sleep_stack_data ourselves.

Yes, I missed that during the review, that explains, thanks.

> I'm paranoid, but if you prefer I can remove the 'add 0', and dump a
> comment next to the struct definition making it someone elses problem
> to fix it if they ever change the layout of the struct...

No, what you do is correct and should be kept as-is, I thought it
had to be added in patch 6 but I was wrong, see above, code is
correct as it is.

Thanks,
Lorenzo

> 
> 
> > 
> >>  	push	x29, lr
> >> -	bl	__cpu_suspend_save
> >> +	bl	cpu_do_suspend
> >>  	pop	x29, lr
> >>  	mov	x0, #1
> >>  	ret
> >>  ENDPROC(__cpu_suspend_enter)
> >>  	.ltorg
> 
> [...2x Nits fixed...]
> 
> > With the last updates it seems fine by me, so:
> > 
> > Reviewed-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
> 
> 
> Thanks!
> 
> 
> James
> 
>
diff mbox

Patch

diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
index 365d8cdd0073..29d3c71433e1 100644
--- a/arch/arm64/include/asm/suspend.h
+++ b/arch/arm64/include/asm/suspend.h
@@ -1,7 +1,7 @@ 
 #ifndef __ASM_SUSPEND_H
 #define __ASM_SUSPEND_H
 
-#define NR_CTX_REGS 11
+#define NR_CTX_REGS 10
 #define NR_CALLEE_SAVED_REGS 12
 
 /*
@@ -17,11 +17,6 @@  struct cpu_suspend_ctx {
 	u64 sp;
 } __aligned(16);
 
-struct sleep_save_sp {
-	phys_addr_t *save_ptr_stash;
-	phys_addr_t save_ptr_stash_phys;
-};
-
 /*
  * Memory to save the cpu state is allocated on the stack by
  * __cpu_suspend_enter()'s caller, and populated by __cpu_suspend_enter().
@@ -39,6 +34,8 @@  struct sleep_stack_data {
 	unsigned long		callee_saved_regs[NR_CALLEE_SAVED_REGS];
 };
 
+extern unsigned long *sleep_save_stash;
+
 extern int cpu_suspend(unsigned long arg, int (*fn)(unsigned long));
 extern void cpu_resume(void);
 int __cpu_suspend_enter(struct sleep_stack_data *state);
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 7d7994774e82..d6119c57f28a 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -120,9 +120,6 @@  int main(void)
   DEFINE(CPU_CTX_SP,		offsetof(struct cpu_suspend_ctx, sp));
   DEFINE(MPIDR_HASH_MASK,	offsetof(struct mpidr_hash, mask));
   DEFINE(MPIDR_HASH_SHIFTS,	offsetof(struct mpidr_hash, shift_aff));
-  DEFINE(SLEEP_SAVE_SP_SZ,	sizeof(struct sleep_save_sp));
-  DEFINE(SLEEP_SAVE_SP_PHYS,	offsetof(struct sleep_save_sp, save_ptr_stash_phys));
-  DEFINE(SLEEP_SAVE_SP_VIRT,	offsetof(struct sleep_save_sp, save_ptr_stash));
   DEFINE(SLEEP_STACK_DATA_SYSTEM_REGS,	offsetof(struct sleep_stack_data, system_regs));
   DEFINE(SLEEP_STACK_DATA_CALLEE_REGS,	offsetof(struct sleep_stack_data, callee_saved_regs));
 #endif
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 05b98289093e..6fad2f814ecf 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -636,7 +636,7 @@  ENDPROC(__secondary_switched)
  * If it isn't, park the CPU
  */
 	.section	".idmap.text", "ax"
-__enable_mmu:
+ENTRY(__enable_mmu)
 	mrs	x1, ID_AA64MMFR0_EL1
 	ubfx	x2, x1, #ID_AA64MMFR0_TGRAN_SHIFT, 4
 	cmp	x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index cfed56f0ad26..ad290601f4da 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -175,7 +175,6 @@  static void __init smp_build_mpidr_hash(void)
 	 */
 	if (mpidr_hash_size() > 4 * num_possible_cpus())
 		pr_warn("Large number of MPIDR hash buckets detected\n");
-	__flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash));
 }
 
 static void __init setup_machine_fdt(phys_addr_t dt_phys)
diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S
index dca81612fe90..0e2b36f1fb44 100644
--- a/arch/arm64/kernel/sleep.S
+++ b/arch/arm64/kernel/sleep.S
@@ -73,8 +73,8 @@  ENTRY(__cpu_suspend_enter)
 	str	x2, [x0, #SLEEP_STACK_DATA_SYSTEM_REGS + CPU_CTX_SP]
 
 	/* find the mpidr_hash */
-	ldr	x1, =sleep_save_sp
-	ldr	x1, [x1, #SLEEP_SAVE_SP_VIRT]
+	ldr	x1, =sleep_save_stash
+	ldr	x1, [x1]
 	mrs	x7, mpidr_el1
 	ldr	x9, =mpidr_hash
 	ldr	x10, [x9, #MPIDR_HASH_MASK]
@@ -87,40 +87,26 @@  ENTRY(__cpu_suspend_enter)
 	compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10
 	add	x1, x1, x8, lsl #3
 
+	str	x0, [x1]
+	add	x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS
 	push	x29, lr
-	bl	__cpu_suspend_save
+	bl	cpu_do_suspend
 	pop	x29, lr
 	mov	x0, #1
 	ret
 ENDPROC(__cpu_suspend_enter)
 	.ltorg
 
-/*
- * x0 must contain the sctlr value retrieved from restored context
- */
-	.pushsection	".idmap.text", "ax"
-ENTRY(cpu_resume_mmu)
-	ldr	x3, =cpu_resume_after_mmu
-	msr	sctlr_el1, x0		// restore sctlr_el1
-	isb
-	/*
-	 * Invalidate the local I-cache so that any instructions fetched
-	 * speculatively from the PoC are discarded, since they may have
-	 * been dynamically patched at the PoU.
-	 */
-	ic	iallu
-	dsb	nsh
-	isb
-	br	x3			// global jump to virtual address
-ENDPROC(cpu_resume_mmu)
-	.popsection
-cpu_resume_after_mmu:
-	mov	x0, #0			// return zero on success
-	ret
-ENDPROC(cpu_resume_after_mmu)
-
 ENTRY(cpu_resume)
 	bl	el2_setup		// if in EL2 drop to EL1 cleanly
+	/* enable the MMU early - so we can access sleep_save_stash by va */
+	adr_l	lr, __enable_mmu	/* __cpu_setup will return here */
+	ldr	x27, =_cpu_resume	/* __enable_mmu will branch here */
+	adrp	x25, idmap_pg_dir
+	adrp	x26, swapper_pg_dir
+	b	__cpu_setup
+
+ENTRY(_cpu_resume)
 	mrs	x1, mpidr_el1
 	adrp	x8, mpidr_hash
 	add x8, x8, #:lo12:mpidr_hash // x8 = struct mpidr_hash phys address
@@ -130,29 +116,27 @@  ENTRY(cpu_resume)
 	ldp	w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)]
 	compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2
         /* x7 contains hash index, let's use it to grab context pointer */
-	ldr_l	x0, sleep_save_sp + SLEEP_SAVE_SP_PHYS
+	ldr_l	x0, sleep_save_stash
 	ldr	x0, [x0, x7, lsl #3]
 	add	x29, x0, #SLEEP_STACK_DATA_CALLEE_REGS
 	add	x0, x0, #SLEEP_STACK_DATA_SYSTEM_REGS
 	/* load sp from context */
 	ldr	x2, [x0, #CPU_CTX_SP]
-	/* load physical address of identity map page table in x1 */
-	adrp	x1, idmap_pg_dir
 	mov	sp, x2
 	/* save thread_info */
 	and	x2, x2, #~(THREAD_SIZE - 1)
 	msr	sp_el0, x2
 	/*
-	 * cpu_do_resume expects x0 to contain context physical address
-	 * pointer and x1 to contain physical address of 1:1 page tables
+	 * cpu_do_resume expects x0 to contain context address pointer
 	 */
-	bl	cpu_do_resume		// PC relative jump, MMU off
-	/* Can't access these by physical address once the MMU is on */
+	bl	cpu_do_resume
+
 	ldp	x19, x20, [x29, #16]
 	ldp	x21, x22, [x29, #32]
 	ldp	x23, x24, [x29, #48]
 	ldp	x25, x26, [x29, #64]
 	ldp	x27, x28, [x29, #80]
 	ldp	x29, lr, [x29]
-	b	cpu_resume_mmu		// Resume MMU, never returns
+	mov	x0, #0
+	ret
 ENDPROC(cpu_resume)
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 6fe46100685a..2ec2f94d1690 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -10,30 +10,12 @@ 
 #include <asm/suspend.h>
 #include <asm/tlbflush.h>
 
-
 /*
- * This is called by __cpu_suspend_enter() to save the state, and do whatever
- * flushing is required to ensure that when the CPU goes to sleep we have
- * the necessary data available when the caches are not searched.
- *
- * ptr: sleep_stack_data containing cpu state virtual address.
- * save_ptr: address of the location where the context physical address
- *           must be saved
+ * This is allocate by cpu_suspend_init(), and used in cpuidle to store a
+ * pointer to the 'struct sleep_stack_data' the contains a particular CPUs
+ * state.
  */
-void notrace __cpu_suspend_save(struct sleep_stack_data *ptr,
-				phys_addr_t *save_ptr)
-{
-	*save_ptr = virt_to_phys(ptr);
-
-	cpu_do_suspend(&ptr->system_regs);
-	/*
-	 * Only flush the context that must be retrieved with the MMU
-	 * off. VA primitives ensure the flush is applied to all
-	 * cache levels so context is pushed to DRAM.
-	 */
-	__flush_dcache_area(ptr, sizeof(*ptr));
-	__flush_dcache_area(save_ptr, sizeof(*save_ptr));
-}
+unsigned long *sleep_save_stash;
 
 /*
  * This hook is provided so that cpu_suspend code can restore HW
@@ -137,22 +119,15 @@  int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
 	return ret;
 }
 
-struct sleep_save_sp sleep_save_sp;
-
 static int __init cpu_suspend_init(void)
 {
-	void *ctx_ptr;
-
 	/* ctx_ptr is an array of physical addresses */
-	ctx_ptr = kcalloc(mpidr_hash_size(), sizeof(phys_addr_t), GFP_KERNEL);
+	sleep_save_stash = kcalloc(mpidr_hash_size(), sizeof(*sleep_save_stash),
+				   GFP_KERNEL);
 
-	if (WARN_ON(!ctx_ptr))
+	if (WARN_ON(!sleep_save_stash))
 		return -ENOMEM;
 
-	sleep_save_sp.save_ptr_stash = ctx_ptr;
-	sleep_save_sp.save_ptr_stash_phys = virt_to_phys(ctx_ptr);
-	__flush_dcache_area(&sleep_save_sp, sizeof(struct sleep_save_sp));
-
 	return 0;
 }
 early_initcall(cpu_suspend_init);
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index db832c42ae30..a2ef1256a329 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -24,6 +24,7 @@ 
 #include <asm/asm-offsets.h>
 #include <asm/hwcap.h>
 #include <asm/pgtable.h>
+#include <asm/pgtable-hwdef.h>
 
 #ifdef CONFIG_ARM64_64K_PAGES
 #define TCR_TG_FLAGS	TCR_TG0_64K | TCR_TG1_64K
@@ -61,62 +62,50 @@  ENTRY(cpu_do_suspend)
 	mrs	x2, tpidr_el0
 	mrs	x3, tpidrro_el0
 	mrs	x4, contextidr_el1
-	mrs	x5, mair_el1
 	mrs	x6, cpacr_el1
-	mrs	x7, ttbr1_el1
 	mrs	x8, tcr_el1
 	mrs	x9, vbar_el1
 	mrs	x10, mdscr_el1
 	mrs	x11, oslsr_el1
 	mrs	x12, sctlr_el1
 	stp	x2, x3, [x0]
-	stp	x4, x5, [x0, #16]
-	stp	x6, x7, [x0, #32]
-	stp	x8, x9, [x0, #48]
-	stp	x10, x11, [x0, #64]
-	str	x12, [x0, #80]
+	stp	x4, xzr, [x0, #16]
+	stp	x6, x8, [x0, #32]
+	stp	x9, x10, [x0, #48]
+	stp	x11, x12, [x0, #64]
 	ret
 ENDPROC(cpu_do_suspend)
 
 /**
  * cpu_do_resume - restore CPU register context
  *
- * x0: Physical address of context pointer
- * x1: ttbr0_el1 to be restored
- *
- * Returns:
- *	sctlr_el1 value in x0
+ * x0: Address of context pointer
  */
 ENTRY(cpu_do_resume)
-	/*
-	 * Invalidate local tlb entries before turning on MMU
-	 */
-	tlbi	vmalle1
 	ldp	x2, x3, [x0]
 	ldp	x4, x5, [x0, #16]
-	ldp	x6, x7, [x0, #32]
-	ldp	x8, x9, [x0, #48]
-	ldp	x10, x11, [x0, #64]
-	ldr	x12, [x0, #80]
+	ldp	x6, x8, [x0, #32]
+	ldp	x9, x10, [x0, #48]
+	ldp	x11, x12, [x0, #64]
 	msr	tpidr_el0, x2
 	msr	tpidrro_el0, x3
 	msr	contextidr_el1, x4
-	msr	mair_el1, x5
 	msr	cpacr_el1, x6
-	msr	ttbr0_el1, x1
-	msr	ttbr1_el1, x7
-	tcr_set_idmap_t0sz x8, x7
+
+	/* Don't change t0sz here, mask those bits when restoring */
+	mrs	x5, tcr_el1
+	bfi	x8, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
+
 	msr	tcr_el1, x8
 	msr	vbar_el1, x9
 	msr	mdscr_el1, x10
+	msr	sctlr_el1, x12
 	/*
 	 * Restore oslsr_el1 by writing oslar_el1
 	 */
 	ubfx	x11, x11, #1, #1
 	msr	oslar_el1, x11
 	reset_pmuserenr_el0 x0			// Disable PMU access from EL0
-	mov	x0, x12
-	dsb	nsh		// Make sure local tlb invalidation completed
 	isb
 	ret
 ENDPROC(cpu_do_resume)