diff mbox

[7/7] ARM: redo TTBR setup code for LPAE

Message ID E1YpwbQ-0000wm-Jx@rmk-PC.arm.linux.org.uk (mailing list archive)
State New, archived
Headers show

Commit Message

Russell King May 6, 2015, 10:30 a.m. UTC
Re-engineer the LPAE TTBR setup code.  Rather than passing some shifted
address in order to fit in a CPU register, pass either a full physical
address (in the case of r4, r5 for TTBR0) or a PFN (for TTBR1).

This removes the ARCH_PGD_SHIFT hack, and the last dangerous user of
cpu_set_ttbr() in the secondary CPU startup code path (which was there
to re-set TTBR1 to the appropriate high physical address space on
Keystone2.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 arch/arm/include/asm/memory.h    | 16 ---------------
 arch/arm/include/asm/proc-fns.h  |  7 -------
 arch/arm/include/asm/smp.h       |  2 +-
 arch/arm/kernel/head-nommu.S     |  2 +-
 arch/arm/kernel/head.S           | 42 +++++++++++++++++++++++++++++-----------
 arch/arm/kernel/smp.c            | 10 ++++++----
 arch/arm/mach-keystone/platsmp.c | 13 -------------
 arch/arm/mm/proc-v7-2level.S     |  6 +++---
 arch/arm/mm/proc-v7-3level.S     | 14 +++++---------
 arch/arm/mm/proc-v7.S            | 26 ++++++++++++-------------
 10 files changed, 60 insertions(+), 78 deletions(-)

Comments

Gregory CLEMENT Aug. 5, 2015, 3:26 p.m. UTC | #1
Hi Russell,

On 06/05/2015 12:30, Russell King wrote:
> Re-engineer the LPAE TTBR setup code.  Rather than passing some shifted
> address in order to fit in a CPU register, pass either a full physical
> address (in the case of r4, r5 for TTBR0) or a PFN (for TTBR1).
> 
> This removes the ARCH_PGD_SHIFT hack, and the last dangerous user of
> cpu_set_ttbr() in the secondary CPU startup code path (which was there
> to re-set TTBR1 to the appropriate high physical address space on
> Keystone2.

It seems that since the merge of this commit the secondary CPUs on
Armada XP failed to come online in Big Endian (my configuration is
mvebu_v7_defconfig+CONFIG_CPU_BIG_ENDIAN=y). Once I found that this
commit introduced the regression I also tested but with LPAE but I
had the same result: only the first CPU boot.

I don't know if there is something wrong with this patch or if it
reveals something which was wrong in the Armada XP port.

At a point I suspected something around the change in the
get_arch_pgd function but I didn't find anything useful.

Any guidance would be greatly appreciate.

Thanks,

Gregory



> 
> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
> ---
>  arch/arm/include/asm/memory.h    | 16 ---------------
>  arch/arm/include/asm/proc-fns.h  |  7 -------
>  arch/arm/include/asm/smp.h       |  2 +-
>  arch/arm/kernel/head-nommu.S     |  2 +-
>  arch/arm/kernel/head.S           | 42 +++++++++++++++++++++++++++++-----------
>  arch/arm/kernel/smp.c            | 10 ++++++----
>  arch/arm/mach-keystone/platsmp.c | 13 -------------
>  arch/arm/mm/proc-v7-2level.S     |  6 +++---
>  arch/arm/mm/proc-v7-3level.S     | 14 +++++---------
>  arch/arm/mm/proc-v7.S            | 26 ++++++++++++-------------
>  10 files changed, 60 insertions(+), 78 deletions(-)
> 
> diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
> index 184def0e1652..3a72d69b3255 100644
> --- a/arch/arm/include/asm/memory.h
> +++ b/arch/arm/include/asm/memory.h
> @@ -18,8 +18,6 @@
>  #include <linux/types.h>
>  #include <linux/sizes.h>
>  
> -#include <asm/cache.h>
> -
>  #ifdef CONFIG_NEED_MACH_MEMORY_H
>  #include <mach/memory.h>
>  #endif
> @@ -133,20 +131,6 @@
>  #define phys_to_page(phys)	(pfn_to_page(__phys_to_pfn(phys)))
>  
>  /*
> - * Minimum guaranted alignment in pgd_alloc().  The page table pointers passed
> - * around in head.S and proc-*.S are shifted by this amount, in order to
> - * leave spare high bits for systems with physical address extension.  This
> - * does not fully accomodate the 40-bit addressing capability of ARM LPAE, but
> - * gives us about 38-bits or so.
> - */
> -#ifdef CONFIG_ARM_LPAE
> -#define ARCH_PGD_SHIFT		L1_CACHE_SHIFT
> -#else
> -#define ARCH_PGD_SHIFT		0
> -#endif
> -#define ARCH_PGD_MASK		((1 << ARCH_PGD_SHIFT) - 1)
> -
> -/*
>   * PLAT_PHYS_OFFSET is the offset (from zero) of the start of physical
>   * memory.  This is used for XIP and NoMMU kernels, and on platforms that don't
>   * have CONFIG_ARM_PATCH_PHYS_VIRT. Assembly code must always use
> diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h
> index 5324c1112f3a..8877ad5ffe10 100644
> --- a/arch/arm/include/asm/proc-fns.h
> +++ b/arch/arm/include/asm/proc-fns.h
> @@ -125,13 +125,6 @@ extern void cpu_resume(void);
>  		ttbr;						\
>  	})
>  
> -#define cpu_set_ttbr(nr, val)					\
> -	do {							\
> -		u64 ttbr = val;					\
> -		__asm__("mcrr	p15, " #nr ", %Q0, %R0, c2"	\
> -			: : "r" (ttbr));			\
> -	} while (0)
> -
>  #define cpu_get_pgd()	\
>  	({						\
>  		u64 pg = cpu_get_ttbr(0);		\
> diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
> index 18f5a554134f..487aa08f31ee 100644
> --- a/arch/arm/include/asm/smp.h
> +++ b/arch/arm/include/asm/smp.h
> @@ -61,7 +61,7 @@ asmlinkage void secondary_start_kernel(void);
>  struct secondary_data {
>  	union {
>  		unsigned long mpu_rgn_szr;
> -		unsigned long pgdir;
> +		u64 pgdir;
>  	};
>  	unsigned long swapper_pg_dir;
>  	void *stack;
> diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
> index aebfbf79a1a3..84da14b7cd04 100644
> --- a/arch/arm/kernel/head-nommu.S
> +++ b/arch/arm/kernel/head-nommu.S
> @@ -123,7 +123,7 @@ ENTRY(secondary_startup)
>  ENDPROC(secondary_startup)
>  
>  ENTRY(__secondary_switched)
> -	ldr	sp, [r7, #8]			@ set up the stack pointer
> +	ldr	sp, [r7, #12]			@ set up the stack pointer
>  	mov	fp, #0
>  	b	secondary_start_kernel
>  ENDPROC(__secondary_switched)
> diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
> index 3637973a9708..7304b4c44b52 100644
> --- a/arch/arm/kernel/head.S
> +++ b/arch/arm/kernel/head.S
> @@ -131,13 +131,30 @@ ENTRY(stext)
>  	 * The following calls CPU specific code in a position independent
>  	 * manner.  See arch/arm/mm/proc-*.S for details.  r10 = base of
>  	 * xxx_proc_info structure selected by __lookup_processor_type
> -	 * above.  On return, the CPU will be ready for the MMU to be
> -	 * turned on, and r0 will hold the CPU control register value.
> +	 * above.
> +	 *
> +	 * The processor init function will be called with:
> +	 *  r1 - machine type
> +	 *  r2 - boot data (atags/dt) pointer
> +	 *  r4 - translation table base (low word)
> +	 *  r5 - translation table base (high word, if LPAE)
> +	 *  r8 - translation table base 1 (pfn if LPAE)
> +	 *  r9 - cpuid
> +	 *  r13 - virtual address for __enable_mmu -> __turn_mmu_on
> +	 *
> +	 * On return, the CPU will be ready for the MMU to be turned on,
> +	 * r0 will hold the CPU control register value, r1, r2, r4, and
> +	 * r9 will be preserved.  r5 will also be preserved if LPAE.
>  	 */
>  	ldr	r13, =__mmap_switched		@ address to jump to after
>  						@ mmu has been enabled
>  	adr	lr, BSYM(1f)			@ return (PIC) address
> +#ifdef CONFIG_ARM_LPAE
> +	mov	r5, #0				@ high TTBR0
> +	mov	r8, r4, lsr #12			@ TTBR1 is swapper_pg_dir pfn
> +#else
>  	mov	r8, r4				@ set TTBR1 to swapper_pg_dir
> +#endif
>  	ldr	r12, [r10, #PROCINFO_INITFUNC]
>  	add	r12, r12, r10
>  	ret	r12
> @@ -158,7 +175,7 @@ ENDPROC(stext)
>   *
>   * Returns:
>   *  r0, r3, r5-r7 corrupted
> - *  r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h)
> + *  r4 = physical page table address
>   */
>  __create_page_tables:
>  	pgtbl	r4, r8				@ page table address
> @@ -333,7 +350,6 @@ __create_page_tables:
>  #endif
>  #ifdef CONFIG_ARM_LPAE
>  	sub	r4, r4, #0x1000		@ point to the PGD table
> -	mov	r4, r4, lsr #ARCH_PGD_SHIFT
>  #endif
>  	ret	lr
>  ENDPROC(__create_page_tables)
> @@ -381,9 +397,9 @@ ENTRY(secondary_startup)
>  	adr	r4, __secondary_data
>  	ldmia	r4, {r5, r7, r12}		@ address to jump to after
>  	sub	lr, r4, r5			@ mmu has been enabled
> -	ldr	r4, [r7, lr]			@ get secondary_data.pgdir
> -	add	r7, r7, #4
> -	ldr	r8, [r7, lr]			@ get secondary_data.swapper_pg_dir
> +	add	r3, r7, lr
> +	ldrd	r4, [r3, #0]			@ get secondary_data.pgdir
> +	ldr	r8, [r3, #8]			@ get secondary_data.swapper_pg_dir
>  	adr	lr, BSYM(__enable_mmu)		@ return address
>  	mov	r13, r12			@ __secondary_switched address
>  	ldr	r12, [r10, #PROCINFO_INITFUNC]
> @@ -397,7 +413,7 @@ ENDPROC(secondary_startup_arm)
>  	 * r6  = &secondary_data
>  	 */
>  ENTRY(__secondary_switched)
> -	ldr	sp, [r7, #4]			@ get secondary_data.stack
> +	ldr	sp, [r7, #12]			@ get secondary_data.stack
>  	mov	fp, #0
>  	b	secondary_start_kernel
>  ENDPROC(__secondary_switched)
> @@ -416,12 +432,14 @@ __secondary_data:
>  /*
>   * Setup common bits before finally enabling the MMU.  Essentially
>   * this is just loading the page table pointer and domain access
> - * registers.
> + * registers.  All these registers need to be preserved by the
> + * processor setup function (or set in the case of r0)
>   *
>   *  r0  = cp#15 control register
>   *  r1  = machine ID
>   *  r2  = atags or dtb pointer
> - *  r4  = page table (see ARCH_PGD_SHIFT in asm/memory.h)
> + *  r4  = TTBR pointer (low word)
> + *  r5  = TTBR pointer (high word if LPAE)
>   *  r9  = processor ID
>   *  r13 = *virtual* address to jump to upon completion
>   */
> @@ -440,7 +458,9 @@ __enable_mmu:
>  #ifdef CONFIG_CPU_ICACHE_DISABLE
>  	bic	r0, r0, #CR_I
>  #endif
> -#ifndef CONFIG_ARM_LPAE
> +#ifdef CONFIG_ARM_LPAE
> +	mcrr	p15, 0, r4, r5, c2		@ load TTBR0
> +#else
>  	mov	r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \
>  		      domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
>  		      domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \
> diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
> index cca5b8758185..90dfbedfbfb8 100644
> --- a/arch/arm/kernel/smp.c
> +++ b/arch/arm/kernel/smp.c
> @@ -86,9 +86,11 @@ void __init smp_set_ops(struct smp_operations *ops)
>  
>  static unsigned long get_arch_pgd(pgd_t *pgd)
>  {
> -	phys_addr_t pgdir = virt_to_idmap(pgd);
> -	BUG_ON(pgdir & ARCH_PGD_MASK);
> -	return pgdir >> ARCH_PGD_SHIFT;
> +#ifdef CONFIG_ARM_LPAE
> +	return __phys_to_pfn(virt_to_phys(pgd));
> +#else
> +	return virt_to_phys(pgd);
> +#endif
>  }
>  
>  int __cpu_up(unsigned int cpu, struct task_struct *idle)
> @@ -108,7 +110,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
>  #endif
>  
>  #ifdef CONFIG_MMU
> -	secondary_data.pgdir = get_arch_pgd(idmap_pgd);
> +	secondary_data.pgdir = virt_to_phys(idmap_pgd);
>  	secondary_data.swapper_pg_dir = get_arch_pgd(swapper_pg_dir);
>  #endif
>  	sync_cache_w(&secondary_data);
> diff --git a/arch/arm/mach-keystone/platsmp.c b/arch/arm/mach-keystone/platsmp.c
> index 5f46a7cf907b..4bbb18463bfd 100644
> --- a/arch/arm/mach-keystone/platsmp.c
> +++ b/arch/arm/mach-keystone/platsmp.c
> @@ -39,19 +39,6 @@ static int keystone_smp_boot_secondary(unsigned int cpu,
>  	return error;
>  }
>  
> -#ifdef CONFIG_ARM_LPAE
> -static void __cpuinit keystone_smp_secondary_initmem(unsigned int cpu)
> -{
> -	pgd_t *pgd0 = pgd_offset_k(0);
> -	cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET);
> -	local_flush_tlb_all();
> -}
> -#else
> -static inline void __cpuinit keystone_smp_secondary_initmem(unsigned int cpu)
> -{}
> -#endif
> -
>  struct smp_operations keystone_smp_ops __initdata = {
>  	.smp_boot_secondary	= keystone_smp_boot_secondary,
> -	.smp_secondary_init     = keystone_smp_secondary_initmem,
>  };
> diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S
> index 10405b8d31af..fa385140715f 100644
> --- a/arch/arm/mm/proc-v7-2level.S
> +++ b/arch/arm/mm/proc-v7-2level.S
> @@ -148,10 +148,10 @@ ENDPROC(cpu_v7_set_pte_ext)
>  	 * Macro for setting up the TTBRx and TTBCR registers.
>  	 * - \ttb0 and \ttb1 updated with the corresponding flags.
>  	 */
> -	.macro	v7_ttb_setup, zero, ttbr0, ttbr1, tmp
> +	.macro	v7_ttb_setup, zero, ttbr0l, ttbr0h, ttbr1, tmp
>  	mcr	p15, 0, \zero, c2, c0, 2	@ TTB control register
> -	ALT_SMP(orr	\ttbr0, \ttbr0, #TTB_FLAGS_SMP)
> -	ALT_UP(orr	\ttbr0, \ttbr0, #TTB_FLAGS_UP)
> +	ALT_SMP(orr	\ttbr0l, \ttbr0l, #TTB_FLAGS_SMP)
> +	ALT_UP(orr	\ttbr0l, \ttbr0l, #TTB_FLAGS_UP)
>  	ALT_SMP(orr	\ttbr1, \ttbr1, #TTB_FLAGS_SMP)
>  	ALT_UP(orr	\ttbr1, \ttbr1, #TTB_FLAGS_UP)
>  	mcr	p15, 0, \ttbr1, c2, c0, 1	@ load TTB1
> diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S
> index d3daed0ae0ad..5e5720e8bc5f 100644
> --- a/arch/arm/mm/proc-v7-3level.S
> +++ b/arch/arm/mm/proc-v7-3level.S
> @@ -126,11 +126,10 @@ ENDPROC(cpu_v7_set_pte_ext)
>  	 * Macro for setting up the TTBRx and TTBCR registers.
>  	 * - \ttbr1 updated.
>  	 */
> -	.macro	v7_ttb_setup, zero, ttbr0, ttbr1, tmp
> +	.macro	v7_ttb_setup, zero, ttbr0l, ttbr0h, ttbr1, tmp
>  	ldr	\tmp, =swapper_pg_dir		@ swapper_pg_dir virtual address
> -	mov	\tmp, \tmp, lsr #ARCH_PGD_SHIFT
> -	cmp	\ttbr1, \tmp			@ PHYS_OFFSET > PAGE_OFFSET?
> -	mrc	p15, 0, \tmp, c2, c0, 2		@ TTB control register
> +	cmp	\ttbr1, \tmp, lsr #12		@ PHYS_OFFSET > PAGE_OFFSET?
> +	mrc	p15, 0, \tmp, c2, c0, 2		@ TTB control egister
>  	orr	\tmp, \tmp, #TTB_EAE
>  	ALT_SMP(orr	\tmp, \tmp, #TTB_FLAGS_SMP)
>  	ALT_UP(orr	\tmp, \tmp, #TTB_FLAGS_UP)
> @@ -143,13 +142,10 @@ ENDPROC(cpu_v7_set_pte_ext)
>  	 */
>  	orrls	\tmp, \tmp, #TTBR1_SIZE				@ TTBCR.T1SZ
>  	mcr	p15, 0, \tmp, c2, c0, 2				@ TTBCR
> -	mov	\tmp, \ttbr1, lsr #(32 - ARCH_PGD_SHIFT)	@ upper bits
> -	mov	\ttbr1, \ttbr1, lsl #ARCH_PGD_SHIFT		@ lower bits
> +	mov	\tmp, \ttbr1, lsr #20
> +	mov	\ttbr1, \ttbr1, lsl #12
>  	addls	\ttbr1, \ttbr1, #TTBR1_OFFSET
>  	mcrr	p15, 1, \ttbr1, \tmp, c2			@ load TTBR1
> -	mov	\tmp, \ttbr0, lsr #(32 - ARCH_PGD_SHIFT)	@ upper bits
> -	mov	\ttbr0, \ttbr0, lsl #ARCH_PGD_SHIFT		@ lower bits
> -	mcrr	p15, 0, \ttbr0, \tmp, c2			@ load TTBR0
>  	.endm
>  
>  	/*
> diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
> index 3d1054f11a8a..873230912894 100644
> --- a/arch/arm/mm/proc-v7.S
> +++ b/arch/arm/mm/proc-v7.S
> @@ -343,9 +343,9 @@ __v7_setup:
>  	and	r10, r0, #0xff000000		@ ARM?
>  	teq	r10, #0x41000000
>  	bne	3f
> -	and	r5, r0, #0x00f00000		@ variant
> +	and	r3, r0, #0x00f00000		@ variant
>  	and	r6, r0, #0x0000000f		@ revision
> -	orr	r6, r6, r5, lsr #20-4		@ combine variant and revision
> +	orr	r6, r6, r3, lsr #20-4		@ combine variant and revision
>  	ubfx	r0, r0, #4, #12			@ primary part number
>  
>  	/* Cortex-A8 Errata */
> @@ -354,7 +354,7 @@ __v7_setup:
>  	bne	2f
>  #if defined(CONFIG_ARM_ERRATA_430973) && !defined(CONFIG_ARCH_MULTIPLATFORM)
>  
> -	teq	r5, #0x00100000			@ only present in r1p*
> +	teq	r3, #0x00100000			@ only present in r1p*
>  	mrceq	p15, 0, r10, c1, c0, 1		@ read aux control register
>  	orreq	r10, r10, #(1 << 6)		@ set IBE to 1
>  	mcreq	p15, 0, r10, c1, c0, 1		@ write aux control register
> @@ -395,7 +395,7 @@ __v7_setup:
>  	mcreq	p15, 0, r10, c15, c0, 1		@ write diagnostic register
>  #endif
>  #ifdef CONFIG_ARM_ERRATA_743622
> -	teq	r5, #0x00200000			@ only present in r2p*
> +	teq	r3, #0x00200000			@ only present in r2p*
>  	mrceq	p15, 0, r10, c15, c0, 1		@ read diagnostic register
>  	orreq	r10, r10, #1 << 6		@ set bit #6
>  	mcreq	p15, 0, r10, c15, c0, 1		@ write diagnostic register
> @@ -425,10 +425,10 @@ __v7_setup:
>  	mcr	p15, 0, r10, c7, c5, 0		@ I+BTB cache invalidate
>  #ifdef CONFIG_MMU
>  	mcr	p15, 0, r10, c8, c7, 0		@ invalidate I + D TLBs
> -	v7_ttb_setup r10, r4, r8, r5		@ TTBCR, TTBRx setup
> -	ldr	r5, =PRRR			@ PRRR
> +	v7_ttb_setup r10, r4, r5, r8, r3	@ TTBCR, TTBRx setup
> +	ldr	r3, =PRRR			@ PRRR
>  	ldr	r6, =NMRR			@ NMRR
> -	mcr	p15, 0, r5, c10, c2, 0		@ write PRRR
> +	mcr	p15, 0, r3, c10, c2, 0		@ write PRRR
>  	mcr	p15, 0, r6, c10, c2, 1		@ write NMRR
>  #endif
>  	dsb					@ Complete invalidations
> @@ -437,22 +437,22 @@ __v7_setup:
>  	and	r0, r0, #(0xf << 12)		@ ThumbEE enabled field
>  	teq	r0, #(1 << 12)			@ check if ThumbEE is present
>  	bne	1f
> -	mov	r5, #0
> -	mcr	p14, 6, r5, c1, c0, 0		@ Initialize TEEHBR to 0
> +	mov	r3, #0
> +	mcr	p14, 6, r3, c1, c0, 0		@ Initialize TEEHBR to 0
>  	mrc	p14, 6, r0, c0, c0, 0		@ load TEECR
>  	orr	r0, r0, #1			@ set the 1st bit in order to
>  	mcr	p14, 6, r0, c0, c0, 0		@ stop userspace TEEHBR access
>  1:
>  #endif
> -	adr	r5, v7_crval
> -	ldmia	r5, {r5, r6}
> +	adr	r3, v7_crval
> +	ldmia	r3, {r3, r6}
>   ARM_BE8(orr	r6, r6, #1 << 25)		@ big-endian page tables
>  #ifdef CONFIG_SWP_EMULATE
> -	orr     r5, r5, #(1 << 10)              @ set SW bit in "clear"
> +	orr     r3, r3, #(1 << 10)              @ set SW bit in "clear"
>  	bic     r6, r6, #(1 << 10)              @ clear it in "mmuset"
>  #endif
>     	mrc	p15, 0, r0, c1, c0, 0		@ read control register
> -	bic	r0, r0, r5			@ clear bits them
> +	bic	r0, r0, r3			@ clear bits them
>  	orr	r0, r0, r6			@ set them
>   THUMB(	orr	r0, r0, #1 << 30	)	@ Thumb exceptions
>  	ret	lr				@ return to head.S:__ret
>
Russell King - ARM Linux Aug. 5, 2015, 4:01 p.m. UTC | #2
On Wed, Aug 05, 2015 at 05:26:25PM +0200, Gregory CLEMENT wrote:
> It seems that since the merge of this commit the secondary CPUs on
> Armada XP failed to come online in Big Endian (my configuration is
> mvebu_v7_defconfig+CONFIG_CPU_BIG_ENDIAN=y).

That's probably because I don't test big endian at all, never have done,
and I suspect never will as I've never had systems using BE.  So, I regard
BE as something of a troublesome untestable burden on kernel development.
(And I've always regarded BE to be an abomination.)  In short, I loath
big endian, which really doesn't bode well for me not constantly cocking
it up...

My guess is that it all comes down to much of the new code expecting the
value in r4/r5 to be the least significant 32bits in r4 and the most
significant 32bits in r5.  However, in the secondary code, we load this
using ldrd, which on BE probably reverses that.

> Once I found that this commit introduced the regression I also tested
> but with LPAE but I had the same result: only the first CPU boot.

Not surprising.  I guess BE would need the two registers to mcrr reversing
if they were a 64-bit value loaded with ldrd.

The simple answer may just be to swap r4/r5 after the ldrd.  Trying to do
something different is likely to get into very troublesome problems,
ensuring that r5 doesn't get overwritten (eg, the code which loads the
domain access register would have to switch to a different register -
maybe r4? - but then that needs to be dependent on LE or BE...) and we're
really not going to end up with lots of conditional register usage just
because of annoying BE differences scattered throughout the code.
Gregory CLEMENT Aug. 6, 2015, 11:14 a.m. UTC | #3
On 05/08/2015 18:01, Russell King - ARM Linux wrote:
> On Wed, Aug 05, 2015 at 05:26:25PM +0200, Gregory CLEMENT wrote:
>> It seems that since the merge of this commit the secondary CPUs on
>> Armada XP failed to come online in Big Endian (my configuration is
>> mvebu_v7_defconfig+CONFIG_CPU_BIG_ENDIAN=y).
> 
> That's probably because I don't test big endian at all, never have done,
> and I suspect never will as I've never had systems using BE.  So, I regard
> BE as something of a troublesome untestable burden on kernel development.
> (And I've always regarded BE to be an abomination.)  In short, I loath
> big endian, which really doesn't bode well for me not constantly cocking
> it up...
> 
> My guess is that it all comes down to much of the new code expecting the
> value in r4/r5 to be the least significant 32bits in r4 and the most
> significant 32bits in r5.  However, in the secondary code, we load this
> using ldrd, which on BE probably reverses that.

Thanks for the explanation.

> 
>> Once I found that this commit introduced the regression I also tested
>> but with LPAE but I had the same result: only the first CPU boot.
> 
> Not surprising.  I guess BE would need the two registers to mcrr reversing
> if they were a 64-bit value loaded with ldrd.
> 
> The simple answer may just be to swap r4/r5 after the ldrd.  

I tried this and it fixed the issue.
I will send a patch shortly and if you agree I will submit it to your
patch system.

Thanks,

Gregory
diff mbox

Patch

diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h
index 184def0e1652..3a72d69b3255 100644
--- a/arch/arm/include/asm/memory.h
+++ b/arch/arm/include/asm/memory.h
@@ -18,8 +18,6 @@ 
 #include <linux/types.h>
 #include <linux/sizes.h>
 
-#include <asm/cache.h>
-
 #ifdef CONFIG_NEED_MACH_MEMORY_H
 #include <mach/memory.h>
 #endif
@@ -133,20 +131,6 @@ 
 #define phys_to_page(phys)	(pfn_to_page(__phys_to_pfn(phys)))
 
 /*
- * Minimum guaranted alignment in pgd_alloc().  The page table pointers passed
- * around in head.S and proc-*.S are shifted by this amount, in order to
- * leave spare high bits for systems with physical address extension.  This
- * does not fully accomodate the 40-bit addressing capability of ARM LPAE, but
- * gives us about 38-bits or so.
- */
-#ifdef CONFIG_ARM_LPAE
-#define ARCH_PGD_SHIFT		L1_CACHE_SHIFT
-#else
-#define ARCH_PGD_SHIFT		0
-#endif
-#define ARCH_PGD_MASK		((1 << ARCH_PGD_SHIFT) - 1)
-
-/*
  * PLAT_PHYS_OFFSET is the offset (from zero) of the start of physical
  * memory.  This is used for XIP and NoMMU kernels, and on platforms that don't
  * have CONFIG_ARM_PATCH_PHYS_VIRT. Assembly code must always use
diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h
index 5324c1112f3a..8877ad5ffe10 100644
--- a/arch/arm/include/asm/proc-fns.h
+++ b/arch/arm/include/asm/proc-fns.h
@@ -125,13 +125,6 @@  extern void cpu_resume(void);
 		ttbr;						\
 	})
 
-#define cpu_set_ttbr(nr, val)					\
-	do {							\
-		u64 ttbr = val;					\
-		__asm__("mcrr	p15, " #nr ", %Q0, %R0, c2"	\
-			: : "r" (ttbr));			\
-	} while (0)
-
 #define cpu_get_pgd()	\
 	({						\
 		u64 pg = cpu_get_ttbr(0);		\
diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index 18f5a554134f..487aa08f31ee 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -61,7 +61,7 @@  asmlinkage void secondary_start_kernel(void);
 struct secondary_data {
 	union {
 		unsigned long mpu_rgn_szr;
-		unsigned long pgdir;
+		u64 pgdir;
 	};
 	unsigned long swapper_pg_dir;
 	void *stack;
diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S
index aebfbf79a1a3..84da14b7cd04 100644
--- a/arch/arm/kernel/head-nommu.S
+++ b/arch/arm/kernel/head-nommu.S
@@ -123,7 +123,7 @@  ENTRY(secondary_startup)
 ENDPROC(secondary_startup)
 
 ENTRY(__secondary_switched)
-	ldr	sp, [r7, #8]			@ set up the stack pointer
+	ldr	sp, [r7, #12]			@ set up the stack pointer
 	mov	fp, #0
 	b	secondary_start_kernel
 ENDPROC(__secondary_switched)
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 3637973a9708..7304b4c44b52 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -131,13 +131,30 @@  ENTRY(stext)
 	 * The following calls CPU specific code in a position independent
 	 * manner.  See arch/arm/mm/proc-*.S for details.  r10 = base of
 	 * xxx_proc_info structure selected by __lookup_processor_type
-	 * above.  On return, the CPU will be ready for the MMU to be
-	 * turned on, and r0 will hold the CPU control register value.
+	 * above.
+	 *
+	 * The processor init function will be called with:
+	 *  r1 - machine type
+	 *  r2 - boot data (atags/dt) pointer
+	 *  r4 - translation table base (low word)
+	 *  r5 - translation table base (high word, if LPAE)
+	 *  r8 - translation table base 1 (pfn if LPAE)
+	 *  r9 - cpuid
+	 *  r13 - virtual address for __enable_mmu -> __turn_mmu_on
+	 *
+	 * On return, the CPU will be ready for the MMU to be turned on,
+	 * r0 will hold the CPU control register value, r1, r2, r4, and
+	 * r9 will be preserved.  r5 will also be preserved if LPAE.
 	 */
 	ldr	r13, =__mmap_switched		@ address to jump to after
 						@ mmu has been enabled
 	adr	lr, BSYM(1f)			@ return (PIC) address
+#ifdef CONFIG_ARM_LPAE
+	mov	r5, #0				@ high TTBR0
+	mov	r8, r4, lsr #12			@ TTBR1 is swapper_pg_dir pfn
+#else
 	mov	r8, r4				@ set TTBR1 to swapper_pg_dir
+#endif
 	ldr	r12, [r10, #PROCINFO_INITFUNC]
 	add	r12, r12, r10
 	ret	r12
@@ -158,7 +175,7 @@  ENDPROC(stext)
  *
  * Returns:
  *  r0, r3, r5-r7 corrupted
- *  r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h)
+ *  r4 = physical page table address
  */
 __create_page_tables:
 	pgtbl	r4, r8				@ page table address
@@ -333,7 +350,6 @@  __create_page_tables:
 #endif
 #ifdef CONFIG_ARM_LPAE
 	sub	r4, r4, #0x1000		@ point to the PGD table
-	mov	r4, r4, lsr #ARCH_PGD_SHIFT
 #endif
 	ret	lr
 ENDPROC(__create_page_tables)
@@ -381,9 +397,9 @@  ENTRY(secondary_startup)
 	adr	r4, __secondary_data
 	ldmia	r4, {r5, r7, r12}		@ address to jump to after
 	sub	lr, r4, r5			@ mmu has been enabled
-	ldr	r4, [r7, lr]			@ get secondary_data.pgdir
-	add	r7, r7, #4
-	ldr	r8, [r7, lr]			@ get secondary_data.swapper_pg_dir
+	add	r3, r7, lr
+	ldrd	r4, [r3, #0]			@ get secondary_data.pgdir
+	ldr	r8, [r3, #8]			@ get secondary_data.swapper_pg_dir
 	adr	lr, BSYM(__enable_mmu)		@ return address
 	mov	r13, r12			@ __secondary_switched address
 	ldr	r12, [r10, #PROCINFO_INITFUNC]
@@ -397,7 +413,7 @@  ENDPROC(secondary_startup_arm)
 	 * r6  = &secondary_data
 	 */
 ENTRY(__secondary_switched)
-	ldr	sp, [r7, #4]			@ get secondary_data.stack
+	ldr	sp, [r7, #12]			@ get secondary_data.stack
 	mov	fp, #0
 	b	secondary_start_kernel
 ENDPROC(__secondary_switched)
@@ -416,12 +432,14 @@  __secondary_data:
 /*
  * Setup common bits before finally enabling the MMU.  Essentially
  * this is just loading the page table pointer and domain access
- * registers.
+ * registers.  All these registers need to be preserved by the
+ * processor setup function (or set in the case of r0)
  *
  *  r0  = cp#15 control register
  *  r1  = machine ID
  *  r2  = atags or dtb pointer
- *  r4  = page table (see ARCH_PGD_SHIFT in asm/memory.h)
+ *  r4  = TTBR pointer (low word)
+ *  r5  = TTBR pointer (high word if LPAE)
  *  r9  = processor ID
  *  r13 = *virtual* address to jump to upon completion
  */
@@ -440,7 +458,9 @@  __enable_mmu:
 #ifdef CONFIG_CPU_ICACHE_DISABLE
 	bic	r0, r0, #CR_I
 #endif
-#ifndef CONFIG_ARM_LPAE
+#ifdef CONFIG_ARM_LPAE
+	mcrr	p15, 0, r4, r5, c2		@ load TTBR0
+#else
 	mov	r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \
 		      domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \
 		      domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index cca5b8758185..90dfbedfbfb8 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -86,9 +86,11 @@  void __init smp_set_ops(struct smp_operations *ops)
 
 static unsigned long get_arch_pgd(pgd_t *pgd)
 {
-	phys_addr_t pgdir = virt_to_idmap(pgd);
-	BUG_ON(pgdir & ARCH_PGD_MASK);
-	return pgdir >> ARCH_PGD_SHIFT;
+#ifdef CONFIG_ARM_LPAE
+	return __phys_to_pfn(virt_to_phys(pgd));
+#else
+	return virt_to_phys(pgd);
+#endif
 }
 
 int __cpu_up(unsigned int cpu, struct task_struct *idle)
@@ -108,7 +110,7 @@  int __cpu_up(unsigned int cpu, struct task_struct *idle)
 #endif
 
 #ifdef CONFIG_MMU
-	secondary_data.pgdir = get_arch_pgd(idmap_pgd);
+	secondary_data.pgdir = virt_to_phys(idmap_pgd);
 	secondary_data.swapper_pg_dir = get_arch_pgd(swapper_pg_dir);
 #endif
 	sync_cache_w(&secondary_data);
diff --git a/arch/arm/mach-keystone/platsmp.c b/arch/arm/mach-keystone/platsmp.c
index 5f46a7cf907b..4bbb18463bfd 100644
--- a/arch/arm/mach-keystone/platsmp.c
+++ b/arch/arm/mach-keystone/platsmp.c
@@ -39,19 +39,6 @@  static int keystone_smp_boot_secondary(unsigned int cpu,
 	return error;
 }
 
-#ifdef CONFIG_ARM_LPAE
-static void __cpuinit keystone_smp_secondary_initmem(unsigned int cpu)
-{
-	pgd_t *pgd0 = pgd_offset_k(0);
-	cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET);
-	local_flush_tlb_all();
-}
-#else
-static inline void __cpuinit keystone_smp_secondary_initmem(unsigned int cpu)
-{}
-#endif
-
 struct smp_operations keystone_smp_ops __initdata = {
 	.smp_boot_secondary	= keystone_smp_boot_secondary,
-	.smp_secondary_init     = keystone_smp_secondary_initmem,
 };
diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S
index 10405b8d31af..fa385140715f 100644
--- a/arch/arm/mm/proc-v7-2level.S
+++ b/arch/arm/mm/proc-v7-2level.S
@@ -148,10 +148,10 @@  ENDPROC(cpu_v7_set_pte_ext)
 	 * Macro for setting up the TTBRx and TTBCR registers.
 	 * - \ttb0 and \ttb1 updated with the corresponding flags.
 	 */
-	.macro	v7_ttb_setup, zero, ttbr0, ttbr1, tmp
+	.macro	v7_ttb_setup, zero, ttbr0l, ttbr0h, ttbr1, tmp
 	mcr	p15, 0, \zero, c2, c0, 2	@ TTB control register
-	ALT_SMP(orr	\ttbr0, \ttbr0, #TTB_FLAGS_SMP)
-	ALT_UP(orr	\ttbr0, \ttbr0, #TTB_FLAGS_UP)
+	ALT_SMP(orr	\ttbr0l, \ttbr0l, #TTB_FLAGS_SMP)
+	ALT_UP(orr	\ttbr0l, \ttbr0l, #TTB_FLAGS_UP)
 	ALT_SMP(orr	\ttbr1, \ttbr1, #TTB_FLAGS_SMP)
 	ALT_UP(orr	\ttbr1, \ttbr1, #TTB_FLAGS_UP)
 	mcr	p15, 0, \ttbr1, c2, c0, 1	@ load TTB1
diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S
index d3daed0ae0ad..5e5720e8bc5f 100644
--- a/arch/arm/mm/proc-v7-3level.S
+++ b/arch/arm/mm/proc-v7-3level.S
@@ -126,11 +126,10 @@  ENDPROC(cpu_v7_set_pte_ext)
 	 * Macro for setting up the TTBRx and TTBCR registers.
 	 * - \ttbr1 updated.
 	 */
-	.macro	v7_ttb_setup, zero, ttbr0, ttbr1, tmp
+	.macro	v7_ttb_setup, zero, ttbr0l, ttbr0h, ttbr1, tmp
 	ldr	\tmp, =swapper_pg_dir		@ swapper_pg_dir virtual address
-	mov	\tmp, \tmp, lsr #ARCH_PGD_SHIFT
-	cmp	\ttbr1, \tmp			@ PHYS_OFFSET > PAGE_OFFSET?
-	mrc	p15, 0, \tmp, c2, c0, 2		@ TTB control register
+	cmp	\ttbr1, \tmp, lsr #12		@ PHYS_OFFSET > PAGE_OFFSET?
+	mrc	p15, 0, \tmp, c2, c0, 2		@ TTB control egister
 	orr	\tmp, \tmp, #TTB_EAE
 	ALT_SMP(orr	\tmp, \tmp, #TTB_FLAGS_SMP)
 	ALT_UP(orr	\tmp, \tmp, #TTB_FLAGS_UP)
@@ -143,13 +142,10 @@  ENDPROC(cpu_v7_set_pte_ext)
 	 */
 	orrls	\tmp, \tmp, #TTBR1_SIZE				@ TTBCR.T1SZ
 	mcr	p15, 0, \tmp, c2, c0, 2				@ TTBCR
-	mov	\tmp, \ttbr1, lsr #(32 - ARCH_PGD_SHIFT)	@ upper bits
-	mov	\ttbr1, \ttbr1, lsl #ARCH_PGD_SHIFT		@ lower bits
+	mov	\tmp, \ttbr1, lsr #20
+	mov	\ttbr1, \ttbr1, lsl #12
 	addls	\ttbr1, \ttbr1, #TTBR1_OFFSET
 	mcrr	p15, 1, \ttbr1, \tmp, c2			@ load TTBR1
-	mov	\tmp, \ttbr0, lsr #(32 - ARCH_PGD_SHIFT)	@ upper bits
-	mov	\ttbr0, \ttbr0, lsl #ARCH_PGD_SHIFT		@ lower bits
-	mcrr	p15, 0, \ttbr0, \tmp, c2			@ load TTBR0
 	.endm
 
 	/*
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index 3d1054f11a8a..873230912894 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -343,9 +343,9 @@  __v7_setup:
 	and	r10, r0, #0xff000000		@ ARM?
 	teq	r10, #0x41000000
 	bne	3f
-	and	r5, r0, #0x00f00000		@ variant
+	and	r3, r0, #0x00f00000		@ variant
 	and	r6, r0, #0x0000000f		@ revision
-	orr	r6, r6, r5, lsr #20-4		@ combine variant and revision
+	orr	r6, r6, r3, lsr #20-4		@ combine variant and revision
 	ubfx	r0, r0, #4, #12			@ primary part number
 
 	/* Cortex-A8 Errata */
@@ -354,7 +354,7 @@  __v7_setup:
 	bne	2f
 #if defined(CONFIG_ARM_ERRATA_430973) && !defined(CONFIG_ARCH_MULTIPLATFORM)
 
-	teq	r5, #0x00100000			@ only present in r1p*
+	teq	r3, #0x00100000			@ only present in r1p*
 	mrceq	p15, 0, r10, c1, c0, 1		@ read aux control register
 	orreq	r10, r10, #(1 << 6)		@ set IBE to 1
 	mcreq	p15, 0, r10, c1, c0, 1		@ write aux control register
@@ -395,7 +395,7 @@  __v7_setup:
 	mcreq	p15, 0, r10, c15, c0, 1		@ write diagnostic register
 #endif
 #ifdef CONFIG_ARM_ERRATA_743622
-	teq	r5, #0x00200000			@ only present in r2p*
+	teq	r3, #0x00200000			@ only present in r2p*
 	mrceq	p15, 0, r10, c15, c0, 1		@ read diagnostic register
 	orreq	r10, r10, #1 << 6		@ set bit #6
 	mcreq	p15, 0, r10, c15, c0, 1		@ write diagnostic register
@@ -425,10 +425,10 @@  __v7_setup:
 	mcr	p15, 0, r10, c7, c5, 0		@ I+BTB cache invalidate
 #ifdef CONFIG_MMU
 	mcr	p15, 0, r10, c8, c7, 0		@ invalidate I + D TLBs
-	v7_ttb_setup r10, r4, r8, r5		@ TTBCR, TTBRx setup
-	ldr	r5, =PRRR			@ PRRR
+	v7_ttb_setup r10, r4, r5, r8, r3	@ TTBCR, TTBRx setup
+	ldr	r3, =PRRR			@ PRRR
 	ldr	r6, =NMRR			@ NMRR
-	mcr	p15, 0, r5, c10, c2, 0		@ write PRRR
+	mcr	p15, 0, r3, c10, c2, 0		@ write PRRR
 	mcr	p15, 0, r6, c10, c2, 1		@ write NMRR
 #endif
 	dsb					@ Complete invalidations
@@ -437,22 +437,22 @@  __v7_setup:
 	and	r0, r0, #(0xf << 12)		@ ThumbEE enabled field
 	teq	r0, #(1 << 12)			@ check if ThumbEE is present
 	bne	1f
-	mov	r5, #0
-	mcr	p14, 6, r5, c1, c0, 0		@ Initialize TEEHBR to 0
+	mov	r3, #0
+	mcr	p14, 6, r3, c1, c0, 0		@ Initialize TEEHBR to 0
 	mrc	p14, 6, r0, c0, c0, 0		@ load TEECR
 	orr	r0, r0, #1			@ set the 1st bit in order to
 	mcr	p14, 6, r0, c0, c0, 0		@ stop userspace TEEHBR access
 1:
 #endif
-	adr	r5, v7_crval
-	ldmia	r5, {r5, r6}
+	adr	r3, v7_crval
+	ldmia	r3, {r3, r6}
  ARM_BE8(orr	r6, r6, #1 << 25)		@ big-endian page tables
 #ifdef CONFIG_SWP_EMULATE
-	orr     r5, r5, #(1 << 10)              @ set SW bit in "clear"
+	orr     r3, r3, #(1 << 10)              @ set SW bit in "clear"
 	bic     r6, r6, #(1 << 10)              @ clear it in "mmuset"
 #endif
    	mrc	p15, 0, r0, c1, c0, 0		@ read control register
-	bic	r0, r0, r5			@ clear bits them
+	bic	r0, r0, r3			@ clear bits them
 	orr	r0, r0, r6			@ set them
  THUMB(	orr	r0, r0, #1 << 30	)	@ Thumb exceptions
 	ret	lr				@ return to head.S:__ret