Message ID | E1YpwbQ-0000wm-Jx@rmk-PC.arm.linux.org.uk (mailing list archive) |
---|---|
State | New, archived |
Headers | show |
Hi Russell, On 06/05/2015 12:30, Russell King wrote: > Re-engineer the LPAE TTBR setup code. Rather than passing some shifted > address in order to fit in a CPU register, pass either a full physical > address (in the case of r4, r5 for TTBR0) or a PFN (for TTBR1). > > This removes the ARCH_PGD_SHIFT hack, and the last dangerous user of > cpu_set_ttbr() in the secondary CPU startup code path (which was there > to re-set TTBR1 to the appropriate high physical address space on > Keystone2. It seems that since the merge of this commit the secondary CPUs on Armada XP failed to come online in Big Endian (my configuration is mvebu_v7_defconfig+CONFIG_CPU_BIG_ENDIAN=y). Once I found that this commit introduced the regression I also tested but with LPAE but I had the same result: only the first CPU boot. I don't know if there is something wrong with this patch or if it reveals something which was wrong in the Armada XP port. At a point I suspected something around the change in the get_arch_pgd function but I didn't find anything useful. Any guidance would be greatly appreciate. Thanks, Gregory > > Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk> > --- > arch/arm/include/asm/memory.h | 16 --------------- > arch/arm/include/asm/proc-fns.h | 7 ------- > arch/arm/include/asm/smp.h | 2 +- > arch/arm/kernel/head-nommu.S | 2 +- > arch/arm/kernel/head.S | 42 +++++++++++++++++++++++++++++----------- > arch/arm/kernel/smp.c | 10 ++++++---- > arch/arm/mach-keystone/platsmp.c | 13 ------------- > arch/arm/mm/proc-v7-2level.S | 6 +++--- > arch/arm/mm/proc-v7-3level.S | 14 +++++--------- > arch/arm/mm/proc-v7.S | 26 ++++++++++++------------- > 10 files changed, 60 insertions(+), 78 deletions(-) > > diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h > index 184def0e1652..3a72d69b3255 100644 > --- a/arch/arm/include/asm/memory.h > +++ b/arch/arm/include/asm/memory.h > @@ -18,8 +18,6 @@ > #include <linux/types.h> > #include <linux/sizes.h> > > -#include <asm/cache.h> > - > #ifdef CONFIG_NEED_MACH_MEMORY_H > #include <mach/memory.h> > #endif > @@ -133,20 +131,6 @@ > #define phys_to_page(phys) (pfn_to_page(__phys_to_pfn(phys))) > > /* > - * Minimum guaranted alignment in pgd_alloc(). The page table pointers passed > - * around in head.S and proc-*.S are shifted by this amount, in order to > - * leave spare high bits for systems with physical address extension. This > - * does not fully accomodate the 40-bit addressing capability of ARM LPAE, but > - * gives us about 38-bits or so. > - */ > -#ifdef CONFIG_ARM_LPAE > -#define ARCH_PGD_SHIFT L1_CACHE_SHIFT > -#else > -#define ARCH_PGD_SHIFT 0 > -#endif > -#define ARCH_PGD_MASK ((1 << ARCH_PGD_SHIFT) - 1) > - > -/* > * PLAT_PHYS_OFFSET is the offset (from zero) of the start of physical > * memory. This is used for XIP and NoMMU kernels, and on platforms that don't > * have CONFIG_ARM_PATCH_PHYS_VIRT. Assembly code must always use > diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h > index 5324c1112f3a..8877ad5ffe10 100644 > --- a/arch/arm/include/asm/proc-fns.h > +++ b/arch/arm/include/asm/proc-fns.h > @@ -125,13 +125,6 @@ extern void cpu_resume(void); > ttbr; \ > }) > > -#define cpu_set_ttbr(nr, val) \ > - do { \ > - u64 ttbr = val; \ > - __asm__("mcrr p15, " #nr ", %Q0, %R0, c2" \ > - : : "r" (ttbr)); \ > - } while (0) > - > #define cpu_get_pgd() \ > ({ \ > u64 pg = cpu_get_ttbr(0); \ > diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h > index 18f5a554134f..487aa08f31ee 100644 > --- a/arch/arm/include/asm/smp.h > +++ b/arch/arm/include/asm/smp.h > @@ -61,7 +61,7 @@ asmlinkage void secondary_start_kernel(void); > struct secondary_data { > union { > unsigned long mpu_rgn_szr; > - unsigned long pgdir; > + u64 pgdir; > }; > unsigned long swapper_pg_dir; > void *stack; > diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S > index aebfbf79a1a3..84da14b7cd04 100644 > --- a/arch/arm/kernel/head-nommu.S > +++ b/arch/arm/kernel/head-nommu.S > @@ -123,7 +123,7 @@ ENTRY(secondary_startup) > ENDPROC(secondary_startup) > > ENTRY(__secondary_switched) > - ldr sp, [r7, #8] @ set up the stack pointer > + ldr sp, [r7, #12] @ set up the stack pointer > mov fp, #0 > b secondary_start_kernel > ENDPROC(__secondary_switched) > diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S > index 3637973a9708..7304b4c44b52 100644 > --- a/arch/arm/kernel/head.S > +++ b/arch/arm/kernel/head.S > @@ -131,13 +131,30 @@ ENTRY(stext) > * The following calls CPU specific code in a position independent > * manner. See arch/arm/mm/proc-*.S for details. r10 = base of > * xxx_proc_info structure selected by __lookup_processor_type > - * above. On return, the CPU will be ready for the MMU to be > - * turned on, and r0 will hold the CPU control register value. > + * above. > + * > + * The processor init function will be called with: > + * r1 - machine type > + * r2 - boot data (atags/dt) pointer > + * r4 - translation table base (low word) > + * r5 - translation table base (high word, if LPAE) > + * r8 - translation table base 1 (pfn if LPAE) > + * r9 - cpuid > + * r13 - virtual address for __enable_mmu -> __turn_mmu_on > + * > + * On return, the CPU will be ready for the MMU to be turned on, > + * r0 will hold the CPU control register value, r1, r2, r4, and > + * r9 will be preserved. r5 will also be preserved if LPAE. > */ > ldr r13, =__mmap_switched @ address to jump to after > @ mmu has been enabled > adr lr, BSYM(1f) @ return (PIC) address > +#ifdef CONFIG_ARM_LPAE > + mov r5, #0 @ high TTBR0 > + mov r8, r4, lsr #12 @ TTBR1 is swapper_pg_dir pfn > +#else > mov r8, r4 @ set TTBR1 to swapper_pg_dir > +#endif > ldr r12, [r10, #PROCINFO_INITFUNC] > add r12, r12, r10 > ret r12 > @@ -158,7 +175,7 @@ ENDPROC(stext) > * > * Returns: > * r0, r3, r5-r7 corrupted > - * r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h) > + * r4 = physical page table address > */ > __create_page_tables: > pgtbl r4, r8 @ page table address > @@ -333,7 +350,6 @@ __create_page_tables: > #endif > #ifdef CONFIG_ARM_LPAE > sub r4, r4, #0x1000 @ point to the PGD table > - mov r4, r4, lsr #ARCH_PGD_SHIFT > #endif > ret lr > ENDPROC(__create_page_tables) > @@ -381,9 +397,9 @@ ENTRY(secondary_startup) > adr r4, __secondary_data > ldmia r4, {r5, r7, r12} @ address to jump to after > sub lr, r4, r5 @ mmu has been enabled > - ldr r4, [r7, lr] @ get secondary_data.pgdir > - add r7, r7, #4 > - ldr r8, [r7, lr] @ get secondary_data.swapper_pg_dir > + add r3, r7, lr > + ldrd r4, [r3, #0] @ get secondary_data.pgdir > + ldr r8, [r3, #8] @ get secondary_data.swapper_pg_dir > adr lr, BSYM(__enable_mmu) @ return address > mov r13, r12 @ __secondary_switched address > ldr r12, [r10, #PROCINFO_INITFUNC] > @@ -397,7 +413,7 @@ ENDPROC(secondary_startup_arm) > * r6 = &secondary_data > */ > ENTRY(__secondary_switched) > - ldr sp, [r7, #4] @ get secondary_data.stack > + ldr sp, [r7, #12] @ get secondary_data.stack > mov fp, #0 > b secondary_start_kernel > ENDPROC(__secondary_switched) > @@ -416,12 +432,14 @@ __secondary_data: > /* > * Setup common bits before finally enabling the MMU. Essentially > * this is just loading the page table pointer and domain access > - * registers. > + * registers. All these registers need to be preserved by the > + * processor setup function (or set in the case of r0) > * > * r0 = cp#15 control register > * r1 = machine ID > * r2 = atags or dtb pointer > - * r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h) > + * r4 = TTBR pointer (low word) > + * r5 = TTBR pointer (high word if LPAE) > * r9 = processor ID > * r13 = *virtual* address to jump to upon completion > */ > @@ -440,7 +458,9 @@ __enable_mmu: > #ifdef CONFIG_CPU_ICACHE_DISABLE > bic r0, r0, #CR_I > #endif > -#ifndef CONFIG_ARM_LPAE > +#ifdef CONFIG_ARM_LPAE > + mcrr p15, 0, r4, r5, c2 @ load TTBR0 > +#else > mov r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \ > domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \ > domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \ > diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c > index cca5b8758185..90dfbedfbfb8 100644 > --- a/arch/arm/kernel/smp.c > +++ b/arch/arm/kernel/smp.c > @@ -86,9 +86,11 @@ void __init smp_set_ops(struct smp_operations *ops) > > static unsigned long get_arch_pgd(pgd_t *pgd) > { > - phys_addr_t pgdir = virt_to_idmap(pgd); > - BUG_ON(pgdir & ARCH_PGD_MASK); > - return pgdir >> ARCH_PGD_SHIFT; > +#ifdef CONFIG_ARM_LPAE > + return __phys_to_pfn(virt_to_phys(pgd)); > +#else > + return virt_to_phys(pgd); > +#endif > } > > int __cpu_up(unsigned int cpu, struct task_struct *idle) > @@ -108,7 +110,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) > #endif > > #ifdef CONFIG_MMU > - secondary_data.pgdir = get_arch_pgd(idmap_pgd); > + secondary_data.pgdir = virt_to_phys(idmap_pgd); > secondary_data.swapper_pg_dir = get_arch_pgd(swapper_pg_dir); > #endif > sync_cache_w(&secondary_data); > diff --git a/arch/arm/mach-keystone/platsmp.c b/arch/arm/mach-keystone/platsmp.c > index 5f46a7cf907b..4bbb18463bfd 100644 > --- a/arch/arm/mach-keystone/platsmp.c > +++ b/arch/arm/mach-keystone/platsmp.c > @@ -39,19 +39,6 @@ static int keystone_smp_boot_secondary(unsigned int cpu, > return error; > } > > -#ifdef CONFIG_ARM_LPAE > -static void __cpuinit keystone_smp_secondary_initmem(unsigned int cpu) > -{ > - pgd_t *pgd0 = pgd_offset_k(0); > - cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET); > - local_flush_tlb_all(); > -} > -#else > -static inline void __cpuinit keystone_smp_secondary_initmem(unsigned int cpu) > -{} > -#endif > - > struct smp_operations keystone_smp_ops __initdata = { > .smp_boot_secondary = keystone_smp_boot_secondary, > - .smp_secondary_init = keystone_smp_secondary_initmem, > }; > diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S > index 10405b8d31af..fa385140715f 100644 > --- a/arch/arm/mm/proc-v7-2level.S > +++ b/arch/arm/mm/proc-v7-2level.S > @@ -148,10 +148,10 @@ ENDPROC(cpu_v7_set_pte_ext) > * Macro for setting up the TTBRx and TTBCR registers. > * - \ttb0 and \ttb1 updated with the corresponding flags. > */ > - .macro v7_ttb_setup, zero, ttbr0, ttbr1, tmp > + .macro v7_ttb_setup, zero, ttbr0l, ttbr0h, ttbr1, tmp > mcr p15, 0, \zero, c2, c0, 2 @ TTB control register > - ALT_SMP(orr \ttbr0, \ttbr0, #TTB_FLAGS_SMP) > - ALT_UP(orr \ttbr0, \ttbr0, #TTB_FLAGS_UP) > + ALT_SMP(orr \ttbr0l, \ttbr0l, #TTB_FLAGS_SMP) > + ALT_UP(orr \ttbr0l, \ttbr0l, #TTB_FLAGS_UP) > ALT_SMP(orr \ttbr1, \ttbr1, #TTB_FLAGS_SMP) > ALT_UP(orr \ttbr1, \ttbr1, #TTB_FLAGS_UP) > mcr p15, 0, \ttbr1, c2, c0, 1 @ load TTB1 > diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S > index d3daed0ae0ad..5e5720e8bc5f 100644 > --- a/arch/arm/mm/proc-v7-3level.S > +++ b/arch/arm/mm/proc-v7-3level.S > @@ -126,11 +126,10 @@ ENDPROC(cpu_v7_set_pte_ext) > * Macro for setting up the TTBRx and TTBCR registers. > * - \ttbr1 updated. > */ > - .macro v7_ttb_setup, zero, ttbr0, ttbr1, tmp > + .macro v7_ttb_setup, zero, ttbr0l, ttbr0h, ttbr1, tmp > ldr \tmp, =swapper_pg_dir @ swapper_pg_dir virtual address > - mov \tmp, \tmp, lsr #ARCH_PGD_SHIFT > - cmp \ttbr1, \tmp @ PHYS_OFFSET > PAGE_OFFSET? > - mrc p15, 0, \tmp, c2, c0, 2 @ TTB control register > + cmp \ttbr1, \tmp, lsr #12 @ PHYS_OFFSET > PAGE_OFFSET? > + mrc p15, 0, \tmp, c2, c0, 2 @ TTB control egister > orr \tmp, \tmp, #TTB_EAE > ALT_SMP(orr \tmp, \tmp, #TTB_FLAGS_SMP) > ALT_UP(orr \tmp, \tmp, #TTB_FLAGS_UP) > @@ -143,13 +142,10 @@ ENDPROC(cpu_v7_set_pte_ext) > */ > orrls \tmp, \tmp, #TTBR1_SIZE @ TTBCR.T1SZ > mcr p15, 0, \tmp, c2, c0, 2 @ TTBCR > - mov \tmp, \ttbr1, lsr #(32 - ARCH_PGD_SHIFT) @ upper bits > - mov \ttbr1, \ttbr1, lsl #ARCH_PGD_SHIFT @ lower bits > + mov \tmp, \ttbr1, lsr #20 > + mov \ttbr1, \ttbr1, lsl #12 > addls \ttbr1, \ttbr1, #TTBR1_OFFSET > mcrr p15, 1, \ttbr1, \tmp, c2 @ load TTBR1 > - mov \tmp, \ttbr0, lsr #(32 - ARCH_PGD_SHIFT) @ upper bits > - mov \ttbr0, \ttbr0, lsl #ARCH_PGD_SHIFT @ lower bits > - mcrr p15, 0, \ttbr0, \tmp, c2 @ load TTBR0 > .endm > > /* > diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S > index 3d1054f11a8a..873230912894 100644 > --- a/arch/arm/mm/proc-v7.S > +++ b/arch/arm/mm/proc-v7.S > @@ -343,9 +343,9 @@ __v7_setup: > and r10, r0, #0xff000000 @ ARM? > teq r10, #0x41000000 > bne 3f > - and r5, r0, #0x00f00000 @ variant > + and r3, r0, #0x00f00000 @ variant > and r6, r0, #0x0000000f @ revision > - orr r6, r6, r5, lsr #20-4 @ combine variant and revision > + orr r6, r6, r3, lsr #20-4 @ combine variant and revision > ubfx r0, r0, #4, #12 @ primary part number > > /* Cortex-A8 Errata */ > @@ -354,7 +354,7 @@ __v7_setup: > bne 2f > #if defined(CONFIG_ARM_ERRATA_430973) && !defined(CONFIG_ARCH_MULTIPLATFORM) > > - teq r5, #0x00100000 @ only present in r1p* > + teq r3, #0x00100000 @ only present in r1p* > mrceq p15, 0, r10, c1, c0, 1 @ read aux control register > orreq r10, r10, #(1 << 6) @ set IBE to 1 > mcreq p15, 0, r10, c1, c0, 1 @ write aux control register > @@ -395,7 +395,7 @@ __v7_setup: > mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register > #endif > #ifdef CONFIG_ARM_ERRATA_743622 > - teq r5, #0x00200000 @ only present in r2p* > + teq r3, #0x00200000 @ only present in r2p* > mrceq p15, 0, r10, c15, c0, 1 @ read diagnostic register > orreq r10, r10, #1 << 6 @ set bit #6 > mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register > @@ -425,10 +425,10 @@ __v7_setup: > mcr p15, 0, r10, c7, c5, 0 @ I+BTB cache invalidate > #ifdef CONFIG_MMU > mcr p15, 0, r10, c8, c7, 0 @ invalidate I + D TLBs > - v7_ttb_setup r10, r4, r8, r5 @ TTBCR, TTBRx setup > - ldr r5, =PRRR @ PRRR > + v7_ttb_setup r10, r4, r5, r8, r3 @ TTBCR, TTBRx setup > + ldr r3, =PRRR @ PRRR > ldr r6, =NMRR @ NMRR > - mcr p15, 0, r5, c10, c2, 0 @ write PRRR > + mcr p15, 0, r3, c10, c2, 0 @ write PRRR > mcr p15, 0, r6, c10, c2, 1 @ write NMRR > #endif > dsb @ Complete invalidations > @@ -437,22 +437,22 @@ __v7_setup: > and r0, r0, #(0xf << 12) @ ThumbEE enabled field > teq r0, #(1 << 12) @ check if ThumbEE is present > bne 1f > - mov r5, #0 > - mcr p14, 6, r5, c1, c0, 0 @ Initialize TEEHBR to 0 > + mov r3, #0 > + mcr p14, 6, r3, c1, c0, 0 @ Initialize TEEHBR to 0 > mrc p14, 6, r0, c0, c0, 0 @ load TEECR > orr r0, r0, #1 @ set the 1st bit in order to > mcr p14, 6, r0, c0, c0, 0 @ stop userspace TEEHBR access > 1: > #endif > - adr r5, v7_crval > - ldmia r5, {r5, r6} > + adr r3, v7_crval > + ldmia r3, {r3, r6} > ARM_BE8(orr r6, r6, #1 << 25) @ big-endian page tables > #ifdef CONFIG_SWP_EMULATE > - orr r5, r5, #(1 << 10) @ set SW bit in "clear" > + orr r3, r3, #(1 << 10) @ set SW bit in "clear" > bic r6, r6, #(1 << 10) @ clear it in "mmuset" > #endif > mrc p15, 0, r0, c1, c0, 0 @ read control register > - bic r0, r0, r5 @ clear bits them > + bic r0, r0, r3 @ clear bits them > orr r0, r0, r6 @ set them > THUMB( orr r0, r0, #1 << 30 ) @ Thumb exceptions > ret lr @ return to head.S:__ret >
On Wed, Aug 05, 2015 at 05:26:25PM +0200, Gregory CLEMENT wrote: > It seems that since the merge of this commit the secondary CPUs on > Armada XP failed to come online in Big Endian (my configuration is > mvebu_v7_defconfig+CONFIG_CPU_BIG_ENDIAN=y). That's probably because I don't test big endian at all, never have done, and I suspect never will as I've never had systems using BE. So, I regard BE as something of a troublesome untestable burden on kernel development. (And I've always regarded BE to be an abomination.) In short, I loath big endian, which really doesn't bode well for me not constantly cocking it up... My guess is that it all comes down to much of the new code expecting the value in r4/r5 to be the least significant 32bits in r4 and the most significant 32bits in r5. However, in the secondary code, we load this using ldrd, which on BE probably reverses that. > Once I found that this commit introduced the regression I also tested > but with LPAE but I had the same result: only the first CPU boot. Not surprising. I guess BE would need the two registers to mcrr reversing if they were a 64-bit value loaded with ldrd. The simple answer may just be to swap r4/r5 after the ldrd. Trying to do something different is likely to get into very troublesome problems, ensuring that r5 doesn't get overwritten (eg, the code which loads the domain access register would have to switch to a different register - maybe r4? - but then that needs to be dependent on LE or BE...) and we're really not going to end up with lots of conditional register usage just because of annoying BE differences scattered throughout the code.
On 05/08/2015 18:01, Russell King - ARM Linux wrote: > On Wed, Aug 05, 2015 at 05:26:25PM +0200, Gregory CLEMENT wrote: >> It seems that since the merge of this commit the secondary CPUs on >> Armada XP failed to come online in Big Endian (my configuration is >> mvebu_v7_defconfig+CONFIG_CPU_BIG_ENDIAN=y). > > That's probably because I don't test big endian at all, never have done, > and I suspect never will as I've never had systems using BE. So, I regard > BE as something of a troublesome untestable burden on kernel development. > (And I've always regarded BE to be an abomination.) In short, I loath > big endian, which really doesn't bode well for me not constantly cocking > it up... > > My guess is that it all comes down to much of the new code expecting the > value in r4/r5 to be the least significant 32bits in r4 and the most > significant 32bits in r5. However, in the secondary code, we load this > using ldrd, which on BE probably reverses that. Thanks for the explanation. > >> Once I found that this commit introduced the regression I also tested >> but with LPAE but I had the same result: only the first CPU boot. > > Not surprising. I guess BE would need the two registers to mcrr reversing > if they were a 64-bit value loaded with ldrd. > > The simple answer may just be to swap r4/r5 after the ldrd. I tried this and it fixed the issue. I will send a patch shortly and if you agree I will submit it to your patch system. Thanks, Gregory
diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 184def0e1652..3a72d69b3255 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -18,8 +18,6 @@ #include <linux/types.h> #include <linux/sizes.h> -#include <asm/cache.h> - #ifdef CONFIG_NEED_MACH_MEMORY_H #include <mach/memory.h> #endif @@ -133,20 +131,6 @@ #define phys_to_page(phys) (pfn_to_page(__phys_to_pfn(phys))) /* - * Minimum guaranted alignment in pgd_alloc(). The page table pointers passed - * around in head.S and proc-*.S are shifted by this amount, in order to - * leave spare high bits for systems with physical address extension. This - * does not fully accomodate the 40-bit addressing capability of ARM LPAE, but - * gives us about 38-bits or so. - */ -#ifdef CONFIG_ARM_LPAE -#define ARCH_PGD_SHIFT L1_CACHE_SHIFT -#else -#define ARCH_PGD_SHIFT 0 -#endif -#define ARCH_PGD_MASK ((1 << ARCH_PGD_SHIFT) - 1) - -/* * PLAT_PHYS_OFFSET is the offset (from zero) of the start of physical * memory. This is used for XIP and NoMMU kernels, and on platforms that don't * have CONFIG_ARM_PATCH_PHYS_VIRT. Assembly code must always use diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h index 5324c1112f3a..8877ad5ffe10 100644 --- a/arch/arm/include/asm/proc-fns.h +++ b/arch/arm/include/asm/proc-fns.h @@ -125,13 +125,6 @@ extern void cpu_resume(void); ttbr; \ }) -#define cpu_set_ttbr(nr, val) \ - do { \ - u64 ttbr = val; \ - __asm__("mcrr p15, " #nr ", %Q0, %R0, c2" \ - : : "r" (ttbr)); \ - } while (0) - #define cpu_get_pgd() \ ({ \ u64 pg = cpu_get_ttbr(0); \ diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index 18f5a554134f..487aa08f31ee 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -61,7 +61,7 @@ asmlinkage void secondary_start_kernel(void); struct secondary_data { union { unsigned long mpu_rgn_szr; - unsigned long pgdir; + u64 pgdir; }; unsigned long swapper_pg_dir; void *stack; diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index aebfbf79a1a3..84da14b7cd04 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -123,7 +123,7 @@ ENTRY(secondary_startup) ENDPROC(secondary_startup) ENTRY(__secondary_switched) - ldr sp, [r7, #8] @ set up the stack pointer + ldr sp, [r7, #12] @ set up the stack pointer mov fp, #0 b secondary_start_kernel ENDPROC(__secondary_switched) diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index 3637973a9708..7304b4c44b52 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -131,13 +131,30 @@ ENTRY(stext) * The following calls CPU specific code in a position independent * manner. See arch/arm/mm/proc-*.S for details. r10 = base of * xxx_proc_info structure selected by __lookup_processor_type - * above. On return, the CPU will be ready for the MMU to be - * turned on, and r0 will hold the CPU control register value. + * above. + * + * The processor init function will be called with: + * r1 - machine type + * r2 - boot data (atags/dt) pointer + * r4 - translation table base (low word) + * r5 - translation table base (high word, if LPAE) + * r8 - translation table base 1 (pfn if LPAE) + * r9 - cpuid + * r13 - virtual address for __enable_mmu -> __turn_mmu_on + * + * On return, the CPU will be ready for the MMU to be turned on, + * r0 will hold the CPU control register value, r1, r2, r4, and + * r9 will be preserved. r5 will also be preserved if LPAE. */ ldr r13, =__mmap_switched @ address to jump to after @ mmu has been enabled adr lr, BSYM(1f) @ return (PIC) address +#ifdef CONFIG_ARM_LPAE + mov r5, #0 @ high TTBR0 + mov r8, r4, lsr #12 @ TTBR1 is swapper_pg_dir pfn +#else mov r8, r4 @ set TTBR1 to swapper_pg_dir +#endif ldr r12, [r10, #PROCINFO_INITFUNC] add r12, r12, r10 ret r12 @@ -158,7 +175,7 @@ ENDPROC(stext) * * Returns: * r0, r3, r5-r7 corrupted - * r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h) + * r4 = physical page table address */ __create_page_tables: pgtbl r4, r8 @ page table address @@ -333,7 +350,6 @@ __create_page_tables: #endif #ifdef CONFIG_ARM_LPAE sub r4, r4, #0x1000 @ point to the PGD table - mov r4, r4, lsr #ARCH_PGD_SHIFT #endif ret lr ENDPROC(__create_page_tables) @@ -381,9 +397,9 @@ ENTRY(secondary_startup) adr r4, __secondary_data ldmia r4, {r5, r7, r12} @ address to jump to after sub lr, r4, r5 @ mmu has been enabled - ldr r4, [r7, lr] @ get secondary_data.pgdir - add r7, r7, #4 - ldr r8, [r7, lr] @ get secondary_data.swapper_pg_dir + add r3, r7, lr + ldrd r4, [r3, #0] @ get secondary_data.pgdir + ldr r8, [r3, #8] @ get secondary_data.swapper_pg_dir adr lr, BSYM(__enable_mmu) @ return address mov r13, r12 @ __secondary_switched address ldr r12, [r10, #PROCINFO_INITFUNC] @@ -397,7 +413,7 @@ ENDPROC(secondary_startup_arm) * r6 = &secondary_data */ ENTRY(__secondary_switched) - ldr sp, [r7, #4] @ get secondary_data.stack + ldr sp, [r7, #12] @ get secondary_data.stack mov fp, #0 b secondary_start_kernel ENDPROC(__secondary_switched) @@ -416,12 +432,14 @@ __secondary_data: /* * Setup common bits before finally enabling the MMU. Essentially * this is just loading the page table pointer and domain access - * registers. + * registers. All these registers need to be preserved by the + * processor setup function (or set in the case of r0) * * r0 = cp#15 control register * r1 = machine ID * r2 = atags or dtb pointer - * r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h) + * r4 = TTBR pointer (low word) + * r5 = TTBR pointer (high word if LPAE) * r9 = processor ID * r13 = *virtual* address to jump to upon completion */ @@ -440,7 +458,9 @@ __enable_mmu: #ifdef CONFIG_CPU_ICACHE_DISABLE bic r0, r0, #CR_I #endif -#ifndef CONFIG_ARM_LPAE +#ifdef CONFIG_ARM_LPAE + mcrr p15, 0, r4, r5, c2 @ load TTBR0 +#else mov r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \ domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \ domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \ diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index cca5b8758185..90dfbedfbfb8 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -86,9 +86,11 @@ void __init smp_set_ops(struct smp_operations *ops) static unsigned long get_arch_pgd(pgd_t *pgd) { - phys_addr_t pgdir = virt_to_idmap(pgd); - BUG_ON(pgdir & ARCH_PGD_MASK); - return pgdir >> ARCH_PGD_SHIFT; +#ifdef CONFIG_ARM_LPAE + return __phys_to_pfn(virt_to_phys(pgd)); +#else + return virt_to_phys(pgd); +#endif } int __cpu_up(unsigned int cpu, struct task_struct *idle) @@ -108,7 +110,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) #endif #ifdef CONFIG_MMU - secondary_data.pgdir = get_arch_pgd(idmap_pgd); + secondary_data.pgdir = virt_to_phys(idmap_pgd); secondary_data.swapper_pg_dir = get_arch_pgd(swapper_pg_dir); #endif sync_cache_w(&secondary_data); diff --git a/arch/arm/mach-keystone/platsmp.c b/arch/arm/mach-keystone/platsmp.c index 5f46a7cf907b..4bbb18463bfd 100644 --- a/arch/arm/mach-keystone/platsmp.c +++ b/arch/arm/mach-keystone/platsmp.c @@ -39,19 +39,6 @@ static int keystone_smp_boot_secondary(unsigned int cpu, return error; } -#ifdef CONFIG_ARM_LPAE -static void __cpuinit keystone_smp_secondary_initmem(unsigned int cpu) -{ - pgd_t *pgd0 = pgd_offset_k(0); - cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET); - local_flush_tlb_all(); -} -#else -static inline void __cpuinit keystone_smp_secondary_initmem(unsigned int cpu) -{} -#endif - struct smp_operations keystone_smp_ops __initdata = { .smp_boot_secondary = keystone_smp_boot_secondary, - .smp_secondary_init = keystone_smp_secondary_initmem, }; diff --git a/arch/arm/mm/proc-v7-2level.S b/arch/arm/mm/proc-v7-2level.S index 10405b8d31af..fa385140715f 100644 --- a/arch/arm/mm/proc-v7-2level.S +++ b/arch/arm/mm/proc-v7-2level.S @@ -148,10 +148,10 @@ ENDPROC(cpu_v7_set_pte_ext) * Macro for setting up the TTBRx and TTBCR registers. * - \ttb0 and \ttb1 updated with the corresponding flags. */ - .macro v7_ttb_setup, zero, ttbr0, ttbr1, tmp + .macro v7_ttb_setup, zero, ttbr0l, ttbr0h, ttbr1, tmp mcr p15, 0, \zero, c2, c0, 2 @ TTB control register - ALT_SMP(orr \ttbr0, \ttbr0, #TTB_FLAGS_SMP) - ALT_UP(orr \ttbr0, \ttbr0, #TTB_FLAGS_UP) + ALT_SMP(orr \ttbr0l, \ttbr0l, #TTB_FLAGS_SMP) + ALT_UP(orr \ttbr0l, \ttbr0l, #TTB_FLAGS_UP) ALT_SMP(orr \ttbr1, \ttbr1, #TTB_FLAGS_SMP) ALT_UP(orr \ttbr1, \ttbr1, #TTB_FLAGS_UP) mcr p15, 0, \ttbr1, c2, c0, 1 @ load TTB1 diff --git a/arch/arm/mm/proc-v7-3level.S b/arch/arm/mm/proc-v7-3level.S index d3daed0ae0ad..5e5720e8bc5f 100644 --- a/arch/arm/mm/proc-v7-3level.S +++ b/arch/arm/mm/proc-v7-3level.S @@ -126,11 +126,10 @@ ENDPROC(cpu_v7_set_pte_ext) * Macro for setting up the TTBRx and TTBCR registers. * - \ttbr1 updated. */ - .macro v7_ttb_setup, zero, ttbr0, ttbr1, tmp + .macro v7_ttb_setup, zero, ttbr0l, ttbr0h, ttbr1, tmp ldr \tmp, =swapper_pg_dir @ swapper_pg_dir virtual address - mov \tmp, \tmp, lsr #ARCH_PGD_SHIFT - cmp \ttbr1, \tmp @ PHYS_OFFSET > PAGE_OFFSET? - mrc p15, 0, \tmp, c2, c0, 2 @ TTB control register + cmp \ttbr1, \tmp, lsr #12 @ PHYS_OFFSET > PAGE_OFFSET? + mrc p15, 0, \tmp, c2, c0, 2 @ TTB control egister orr \tmp, \tmp, #TTB_EAE ALT_SMP(orr \tmp, \tmp, #TTB_FLAGS_SMP) ALT_UP(orr \tmp, \tmp, #TTB_FLAGS_UP) @@ -143,13 +142,10 @@ ENDPROC(cpu_v7_set_pte_ext) */ orrls \tmp, \tmp, #TTBR1_SIZE @ TTBCR.T1SZ mcr p15, 0, \tmp, c2, c0, 2 @ TTBCR - mov \tmp, \ttbr1, lsr #(32 - ARCH_PGD_SHIFT) @ upper bits - mov \ttbr1, \ttbr1, lsl #ARCH_PGD_SHIFT @ lower bits + mov \tmp, \ttbr1, lsr #20 + mov \ttbr1, \ttbr1, lsl #12 addls \ttbr1, \ttbr1, #TTBR1_OFFSET mcrr p15, 1, \ttbr1, \tmp, c2 @ load TTBR1 - mov \tmp, \ttbr0, lsr #(32 - ARCH_PGD_SHIFT) @ upper bits - mov \ttbr0, \ttbr0, lsl #ARCH_PGD_SHIFT @ lower bits - mcrr p15, 0, \ttbr0, \tmp, c2 @ load TTBR0 .endm /* diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 3d1054f11a8a..873230912894 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -343,9 +343,9 @@ __v7_setup: and r10, r0, #0xff000000 @ ARM? teq r10, #0x41000000 bne 3f - and r5, r0, #0x00f00000 @ variant + and r3, r0, #0x00f00000 @ variant and r6, r0, #0x0000000f @ revision - orr r6, r6, r5, lsr #20-4 @ combine variant and revision + orr r6, r6, r3, lsr #20-4 @ combine variant and revision ubfx r0, r0, #4, #12 @ primary part number /* Cortex-A8 Errata */ @@ -354,7 +354,7 @@ __v7_setup: bne 2f #if defined(CONFIG_ARM_ERRATA_430973) && !defined(CONFIG_ARCH_MULTIPLATFORM) - teq r5, #0x00100000 @ only present in r1p* + teq r3, #0x00100000 @ only present in r1p* mrceq p15, 0, r10, c1, c0, 1 @ read aux control register orreq r10, r10, #(1 << 6) @ set IBE to 1 mcreq p15, 0, r10, c1, c0, 1 @ write aux control register @@ -395,7 +395,7 @@ __v7_setup: mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register #endif #ifdef CONFIG_ARM_ERRATA_743622 - teq r5, #0x00200000 @ only present in r2p* + teq r3, #0x00200000 @ only present in r2p* mrceq p15, 0, r10, c15, c0, 1 @ read diagnostic register orreq r10, r10, #1 << 6 @ set bit #6 mcreq p15, 0, r10, c15, c0, 1 @ write diagnostic register @@ -425,10 +425,10 @@ __v7_setup: mcr p15, 0, r10, c7, c5, 0 @ I+BTB cache invalidate #ifdef CONFIG_MMU mcr p15, 0, r10, c8, c7, 0 @ invalidate I + D TLBs - v7_ttb_setup r10, r4, r8, r5 @ TTBCR, TTBRx setup - ldr r5, =PRRR @ PRRR + v7_ttb_setup r10, r4, r5, r8, r3 @ TTBCR, TTBRx setup + ldr r3, =PRRR @ PRRR ldr r6, =NMRR @ NMRR - mcr p15, 0, r5, c10, c2, 0 @ write PRRR + mcr p15, 0, r3, c10, c2, 0 @ write PRRR mcr p15, 0, r6, c10, c2, 1 @ write NMRR #endif dsb @ Complete invalidations @@ -437,22 +437,22 @@ __v7_setup: and r0, r0, #(0xf << 12) @ ThumbEE enabled field teq r0, #(1 << 12) @ check if ThumbEE is present bne 1f - mov r5, #0 - mcr p14, 6, r5, c1, c0, 0 @ Initialize TEEHBR to 0 + mov r3, #0 + mcr p14, 6, r3, c1, c0, 0 @ Initialize TEEHBR to 0 mrc p14, 6, r0, c0, c0, 0 @ load TEECR orr r0, r0, #1 @ set the 1st bit in order to mcr p14, 6, r0, c0, c0, 0 @ stop userspace TEEHBR access 1: #endif - adr r5, v7_crval - ldmia r5, {r5, r6} + adr r3, v7_crval + ldmia r3, {r3, r6} ARM_BE8(orr r6, r6, #1 << 25) @ big-endian page tables #ifdef CONFIG_SWP_EMULATE - orr r5, r5, #(1 << 10) @ set SW bit in "clear" + orr r3, r3, #(1 << 10) @ set SW bit in "clear" bic r6, r6, #(1 << 10) @ clear it in "mmuset" #endif mrc p15, 0, r0, c1, c0, 0 @ read control register - bic r0, r0, r5 @ clear bits them + bic r0, r0, r3 @ clear bits them orr r0, r0, r6 @ set them THUMB( orr r0, r0, #1 << 30 ) @ Thumb exceptions ret lr @ return to head.S:__ret
Re-engineer the LPAE TTBR setup code. Rather than passing some shifted address in order to fit in a CPU register, pass either a full physical address (in the case of r4, r5 for TTBR0) or a PFN (for TTBR1). This removes the ARCH_PGD_SHIFT hack, and the last dangerous user of cpu_set_ttbr() in the secondary CPU startup code path (which was there to re-set TTBR1 to the appropriate high physical address space on Keystone2. Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk> --- arch/arm/include/asm/memory.h | 16 --------------- arch/arm/include/asm/proc-fns.h | 7 ------- arch/arm/include/asm/smp.h | 2 +- arch/arm/kernel/head-nommu.S | 2 +- arch/arm/kernel/head.S | 42 +++++++++++++++++++++++++++++----------- arch/arm/kernel/smp.c | 10 ++++++---- arch/arm/mach-keystone/platsmp.c | 13 ------------- arch/arm/mm/proc-v7-2level.S | 6 +++--- arch/arm/mm/proc-v7-3level.S | 14 +++++--------- arch/arm/mm/proc-v7.S | 26 ++++++++++++------------- 10 files changed, 60 insertions(+), 78 deletions(-)