diff mbox

[v3,5/6] ARM: mm: Recreate kernel mappings in early_paging_init()

Message ID 1380835081-12129-6-git-send-email-santosh.shilimkar@ti.com (mailing list archive)
State New, archived
Headers show

Commit Message

Santosh Shilimkar Oct. 3, 2013, 9:17 p.m. UTC
This patch adds a step in the init sequence, in order to recreate
the kernel code/data page table mappings prior to full paging
initialization.  This is necessary on LPAE systems that run out of
a physical address space outside the 4G limit.  On these systems,
this implementation provides a machine descriptor hook that allows
the PHYS_OFFSET to be overridden in a machine specific fashion.

Cc: Nicolas Pitre <nico@linaro.org>
Cc: Russell King <linux@arm.linux.org.uk>

Signed-off-by: R Sricharan <r.sricharan@ti.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
---
 arch/arm/include/asm/mach/arch.h |    1 +
 arch/arm/kernel/setup.c          |    3 ++
 arch/arm/mm/mmu.c                |   82 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 86 insertions(+)

Comments

Nicolas Pitre Oct. 4, 2013, 12:23 a.m. UTC | #1
On Thu, 3 Oct 2013, Santosh Shilimkar wrote:

> This patch adds a step in the init sequence, in order to recreate
> the kernel code/data page table mappings prior to full paging
> initialization.  This is necessary on LPAE systems that run out of
> a physical address space outside the 4G limit.  On these systems,
> this implementation provides a machine descriptor hook that allows
> the PHYS_OFFSET to be overridden in a machine specific fashion.
> 
> Cc: Nicolas Pitre <nico@linaro.org>
> Cc: Russell King <linux@arm.linux.org.uk>
> 
> Signed-off-by: R Sricharan <r.sricharan@ti.com>
> Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>

Acked-by: Nicolas Pitre <nico@linaro.org>

> ---
>  arch/arm/include/asm/mach/arch.h |    1 +
>  arch/arm/kernel/setup.c          |    3 ++
>  arch/arm/mm/mmu.c                |   82 ++++++++++++++++++++++++++++++++++++++
>  3 files changed, 86 insertions(+)
> 
> diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h
> index 402a2bc..17a3fa2 100644
> --- a/arch/arm/include/asm/mach/arch.h
> +++ b/arch/arm/include/asm/mach/arch.h
> @@ -49,6 +49,7 @@ struct machine_desc {
>  	bool			(*smp_init)(void);
>  	void			(*fixup)(struct tag *, char **,
>  					 struct meminfo *);
> +	void			(*init_meminfo)(void);
>  	void			(*reserve)(void);/* reserve mem blocks	*/
>  	void			(*map_io)(void);/* IO mapping function	*/
>  	void			(*init_early)(void);
> diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
> index 0e1e2b3..b9a6dac 100644
> --- a/arch/arm/kernel/setup.c
> +++ b/arch/arm/kernel/setup.c
> @@ -73,6 +73,7 @@ __setup("fpe=", fpe_setup);
>  #endif
>  
>  extern void paging_init(const struct machine_desc *desc);
> +extern void early_paging_init(const struct machine_desc *, struct proc_info_list *);
>  extern void sanity_check_meminfo(void);
>  extern enum reboot_mode reboot_mode;
>  extern void setup_dma_zone(const struct machine_desc *desc);
> @@ -878,6 +879,8 @@ void __init setup_arch(char **cmdline_p)
>  	parse_early_param();
>  
>  	sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]), meminfo_cmp, NULL);
> +
> +	early_paging_init(mdesc, lookup_processor_type(read_cpuid_id()));
>  	sanity_check_meminfo();
>  	arm_memblock_init(&meminfo, mdesc);
>  
> diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
> index b1d17ee..47c7497 100644
> --- a/arch/arm/mm/mmu.c
> +++ b/arch/arm/mm/mmu.c
> @@ -28,6 +28,7 @@
>  #include <asm/highmem.h>
>  #include <asm/system_info.h>
>  #include <asm/traps.h>
> +#include <asm/procinfo.h>
>  
>  #include <asm/mach/arch.h>
>  #include <asm/mach/map.h>
> @@ -1315,6 +1316,87 @@ static void __init map_lowmem(void)
>  	}
>  }
>  
> +#ifdef CONFIG_ARM_LPAE
> +extern void fixup_pv_table(const void *, unsigned long);
> +extern const void *__pv_table_begin, *__pv_table_end;
> +
> +/*
> + * early_paging_init() recreates boot time page table setup, allowing machines
> + * to switch over to a high (>4G) address space on LPAE systems
> + */
> +void __init early_paging_init(const struct machine_desc *mdesc,
> +			      struct proc_info_list *procinfo)
> +{
> +	pmdval_t pmdprot = procinfo->__cpu_mm_mmu_flags;
> +	unsigned long map_start, map_end;
> +	pgd_t *pgd0, *pgdk;
> +	pud_t *pud0, *pudk;
> +	pmd_t *pmd0, *pmdk;
> +	phys_addr_t phys;
> +	int i;
> +
> +	/* remap kernel code and data */
> +	map_start = init_mm.start_code;
> +	map_end   = init_mm.brk;
> +
> +	/* get a handle on things... */
> +	pgd0 = pgd_offset_k(0);
> +	pud0 = pud_offset(pgd0, 0);
> +	pmd0 = pmd_offset(pud0, 0);
> +
> +	pgdk = pgd_offset_k(map_start);
> +	pudk = pud_offset(pgdk, map_start);
> +	pmdk = pmd_offset(pudk, map_start);
> +
> +	phys = PHYS_OFFSET;
> +
> +	if (mdesc->init_meminfo) {
> +		mdesc->init_meminfo();
> +		/* Run the patch stub to update the constants */
> +		fixup_pv_table(&__pv_table_begin,
> +			(&__pv_table_end - &__pv_table_begin) << 2);
> +
> +		/*
> +		 * Cache cleaning operations for self-modifying code
> +		 * We should clean the entries by MVA but running a
> +		 * for loop over every pv_table entry pointer would
> +		 * just complicate the code.
> +		 */
> +		flush_cache_louis();
> +		dsb();
> +		isb();
> +	}
> +
> +	/* remap level 1 table */
> +	for (i = 0; i < PTRS_PER_PGD; i++) {
> +		*pud0++ = __pud(__pa(pmd0) | PMD_TYPE_TABLE | L_PGD_SWAPPER);
> +		pmd0 += PTRS_PER_PMD;
> +	}
> +
> +	/* remap pmds for kernel mapping */
> +	phys = __pa(map_start) & PMD_MASK;
> +	do {
> +		*pmdk++ = __pmd(phys | pmdprot);
> +		phys += PMD_SIZE;
> +	} while (phys < map_end);
> +
> +	flush_cache_all();
> +	cpu_set_ttbr(0, __pa(pgd0));
> +	cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET);
> +	local_flush_tlb_all();
> +}
> +
> +#else
> +
> +void __init early_paging_init(const struct machine_desc *mdesc,
> +			      struct proc_info_list *procinfo)
> +{
> +	if (mdesc->init_meminfo)
> +		mdesc->init_meminfo();
> +}
> +
> +#endif
> +
>  /*
>   * paging_init() sets up the page tables, initialises the zone memory
>   * maps, and sets up the zero page, bad page and bad page tables.
> -- 
> 1.7.9.5
>
Will Deacon Oct. 4, 2013, 3:59 p.m. UTC | #2
On Thu, Oct 03, 2013 at 10:17:59PM +0100, Santosh Shilimkar wrote:
> This patch adds a step in the init sequence, in order to recreate
> the kernel code/data page table mappings prior to full paging
> initialization.  This is necessary on LPAE systems that run out of
> a physical address space outside the 4G limit.  On these systems,
> this implementation provides a machine descriptor hook that allows
> the PHYS_OFFSET to be overridden in a machine specific fashion.

[...]

> diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
> index b1d17ee..47c7497 100644
> --- a/arch/arm/mm/mmu.c
> +++ b/arch/arm/mm/mmu.c
> @@ -28,6 +28,7 @@
>  #include <asm/highmem.h>
>  #include <asm/system_info.h>
>  #include <asm/traps.h>
> +#include <asm/procinfo.h>
>  
>  #include <asm/mach/arch.h>
>  #include <asm/mach/map.h>
> @@ -1315,6 +1316,87 @@ static void __init map_lowmem(void)
>  	}
>  }
>  
> +#ifdef CONFIG_ARM_LPAE
> +extern void fixup_pv_table(const void *, unsigned long);
> +extern const void *__pv_table_begin, *__pv_table_end;
> +
> +/*
> + * early_paging_init() recreates boot time page table setup, allowing machines
> + * to switch over to a high (>4G) address space on LPAE systems
> + */
> +void __init early_paging_init(const struct machine_desc *mdesc,
> +			      struct proc_info_list *procinfo)
> +{
> +	pmdval_t pmdprot = procinfo->__cpu_mm_mmu_flags;
> +	unsigned long map_start, map_end;
> +	pgd_t *pgd0, *pgdk;
> +	pud_t *pud0, *pudk;
> +	pmd_t *pmd0, *pmdk;
> +	phys_addr_t phys;
> +	int i;
> +
> +	/* remap kernel code and data */
> +	map_start = init_mm.start_code;
> +	map_end   = init_mm.brk;
> +
> +	/* get a handle on things... */
> +	pgd0 = pgd_offset_k(0);
> +	pud0 = pud_offset(pgd0, 0);
> +	pmd0 = pmd_offset(pud0, 0);
> +
> +	pgdk = pgd_offset_k(map_start);
> +	pudk = pud_offset(pgdk, map_start);
> +	pmdk = pmd_offset(pudk, map_start);
> +
> +	phys = PHYS_OFFSET;
> +
> +	if (mdesc->init_meminfo) {
> +		mdesc->init_meminfo();
> +		/* Run the patch stub to update the constants */
> +		fixup_pv_table(&__pv_table_begin,
> +			(&__pv_table_end - &__pv_table_begin) << 2);
> +
> +		/*
> +		 * Cache cleaning operations for self-modifying code
> +		 * We should clean the entries by MVA but running a
> +		 * for loop over every pv_table entry pointer would
> +		 * just complicate the code.
> +		 */
> +		flush_cache_louis();
> +		dsb();
> +		isb();

You don't need either of these barriers.

> +	}
> +
> +	/* remap level 1 table */
> +	for (i = 0; i < PTRS_PER_PGD; i++) {
> +		*pud0++ = __pud(__pa(pmd0) | PMD_TYPE_TABLE | L_PGD_SWAPPER);
> +		pmd0 += PTRS_PER_PMD;
> +	}
> +
> +	/* remap pmds for kernel mapping */
> +	phys = __pa(map_start) & PMD_MASK;
> +	do {
> +		*pmdk++ = __pmd(phys | pmdprot);
> +		phys += PMD_SIZE;
> +	} while (phys < map_end);
> +
> +	flush_cache_all();

Why are you being so heavyweight with your cacheflushing? If you're just
interested in flushing the new page tables, then use the proper accessors to
build them. The only case I think you need to flush the world is for VIVT,
which you won't have with LPAE (you could have a BUG_ON here).

> +	cpu_set_ttbr(0, __pa(pgd0));
> +	cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET);

Can you not use cpu_switch_mm with the init_mm for this?

Will
Santosh Shilimkar Oct. 4, 2013, 4:12 p.m. UTC | #3
On Friday 04 October 2013 11:59 AM, Will Deacon wrote:
> On Thu, Oct 03, 2013 at 10:17:59PM +0100, Santosh Shilimkar wrote:
>> This patch adds a step in the init sequence, in order to recreate
>> the kernel code/data page table mappings prior to full paging
>> initialization.  This is necessary on LPAE systems that run out of
>> a physical address space outside the 4G limit.  On these systems,
>> this implementation provides a machine descriptor hook that allows
>> the PHYS_OFFSET to be overridden in a machine specific fashion.
> 
> [...]
> 
>> diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
>> index b1d17ee..47c7497 100644
>> --- a/arch/arm/mm/mmu.c
>> +++ b/arch/arm/mm/mmu.c
>> @@ -28,6 +28,7 @@
>>  #include <asm/highmem.h>
>>  #include <asm/system_info.h>
>>  #include <asm/traps.h>
>> +#include <asm/procinfo.h>
>>  
>>  #include <asm/mach/arch.h>
>>  #include <asm/mach/map.h>
>> @@ -1315,6 +1316,87 @@ static void __init map_lowmem(void)
>>  	}
>>  }
>>  
>> +#ifdef CONFIG_ARM_LPAE
>> +extern void fixup_pv_table(const void *, unsigned long);
>> +extern const void *__pv_table_begin, *__pv_table_end;
>> +
>> +/*
>> + * early_paging_init() recreates boot time page table setup, allowing machines
>> + * to switch over to a high (>4G) address space on LPAE systems
>> + */
>> +void __init early_paging_init(const struct machine_desc *mdesc,
>> +			      struct proc_info_list *procinfo)
>> +{
>> +	pmdval_t pmdprot = procinfo->__cpu_mm_mmu_flags;
>> +	unsigned long map_start, map_end;
>> +	pgd_t *pgd0, *pgdk;
>> +	pud_t *pud0, *pudk;
>> +	pmd_t *pmd0, *pmdk;
>> +	phys_addr_t phys;
>> +	int i;
>> +
>> +	/* remap kernel code and data */
>> +	map_start = init_mm.start_code;
>> +	map_end   = init_mm.brk;
>> +
>> +	/* get a handle on things... */
>> +	pgd0 = pgd_offset_k(0);
>> +	pud0 = pud_offset(pgd0, 0);
>> +	pmd0 = pmd_offset(pud0, 0);
>> +
>> +	pgdk = pgd_offset_k(map_start);
>> +	pudk = pud_offset(pgdk, map_start);
>> +	pmdk = pmd_offset(pudk, map_start);
>> +
>> +	phys = PHYS_OFFSET;
>> +
>> +	if (mdesc->init_meminfo) {
>> +		mdesc->init_meminfo();
>> +		/* Run the patch stub to update the constants */
>> +		fixup_pv_table(&__pv_table_begin,
>> +			(&__pv_table_end - &__pv_table_begin) << 2);
>> +
>> +		/*
>> +		 * Cache cleaning operations for self-modifying code
>> +		 * We should clean the entries by MVA but running a
>> +		 * for loop over every pv_table entry pointer would
>> +		 * just complicate the code.
>> +		 */
>> +		flush_cache_louis();
>> +		dsb();
>> +		isb();
> 
> You don't need either of these barriers.
> 
Agree. Just want to be clear, its because they are already present
in flush_cache_louis(), right ?

>> +	}
>> +
>> +	/* remap level 1 table */
>> +	for (i = 0; i < PTRS_PER_PGD; i++) {
>> +		*pud0++ = __pud(__pa(pmd0) | PMD_TYPE_TABLE | L_PGD_SWAPPER);
>> +		pmd0 += PTRS_PER_PMD;
>> +	}
>> +
>> +	/* remap pmds for kernel mapping */
>> +	phys = __pa(map_start) & PMD_MASK;
>> +	do {
>> +		*pmdk++ = __pmd(phys | pmdprot);
>> +		phys += PMD_SIZE;
>> +	} while (phys < map_end);
>> +
>> +	flush_cache_all();
> 
> Why are you being so heavyweight with your cacheflushing? If you're just
> interested in flushing the new page tables, then use the proper accessors to
> build them. The only case I think you need to flush the world is for VIVT,
> which you won't have with LPAE (you could have a BUG_ON here).
> 
It was mainly to avoid all the looping MVA based stuff but you have valid point.
I shall look at it and see what can be done.

>> +	cpu_set_ttbr(0, __pa(pgd0));
>> +	cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET);
> 
> Can you not use cpu_switch_mm with the init_mm for this?
> 
Probably yes. Will have a look at it.

Regards,
Santosh
diff mbox

Patch

diff --git a/arch/arm/include/asm/mach/arch.h b/arch/arm/include/asm/mach/arch.h
index 402a2bc..17a3fa2 100644
--- a/arch/arm/include/asm/mach/arch.h
+++ b/arch/arm/include/asm/mach/arch.h
@@ -49,6 +49,7 @@  struct machine_desc {
 	bool			(*smp_init)(void);
 	void			(*fixup)(struct tag *, char **,
 					 struct meminfo *);
+	void			(*init_meminfo)(void);
 	void			(*reserve)(void);/* reserve mem blocks	*/
 	void			(*map_io)(void);/* IO mapping function	*/
 	void			(*init_early)(void);
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 0e1e2b3..b9a6dac 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -73,6 +73,7 @@  __setup("fpe=", fpe_setup);
 #endif
 
 extern void paging_init(const struct machine_desc *desc);
+extern void early_paging_init(const struct machine_desc *, struct proc_info_list *);
 extern void sanity_check_meminfo(void);
 extern enum reboot_mode reboot_mode;
 extern void setup_dma_zone(const struct machine_desc *desc);
@@ -878,6 +879,8 @@  void __init setup_arch(char **cmdline_p)
 	parse_early_param();
 
 	sort(&meminfo.bank, meminfo.nr_banks, sizeof(meminfo.bank[0]), meminfo_cmp, NULL);
+
+	early_paging_init(mdesc, lookup_processor_type(read_cpuid_id()));
 	sanity_check_meminfo();
 	arm_memblock_init(&meminfo, mdesc);
 
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index b1d17ee..47c7497 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -28,6 +28,7 @@ 
 #include <asm/highmem.h>
 #include <asm/system_info.h>
 #include <asm/traps.h>
+#include <asm/procinfo.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach/map.h>
@@ -1315,6 +1316,87 @@  static void __init map_lowmem(void)
 	}
 }
 
+#ifdef CONFIG_ARM_LPAE
+extern void fixup_pv_table(const void *, unsigned long);
+extern const void *__pv_table_begin, *__pv_table_end;
+
+/*
+ * early_paging_init() recreates boot time page table setup, allowing machines
+ * to switch over to a high (>4G) address space on LPAE systems
+ */
+void __init early_paging_init(const struct machine_desc *mdesc,
+			      struct proc_info_list *procinfo)
+{
+	pmdval_t pmdprot = procinfo->__cpu_mm_mmu_flags;
+	unsigned long map_start, map_end;
+	pgd_t *pgd0, *pgdk;
+	pud_t *pud0, *pudk;
+	pmd_t *pmd0, *pmdk;
+	phys_addr_t phys;
+	int i;
+
+	/* remap kernel code and data */
+	map_start = init_mm.start_code;
+	map_end   = init_mm.brk;
+
+	/* get a handle on things... */
+	pgd0 = pgd_offset_k(0);
+	pud0 = pud_offset(pgd0, 0);
+	pmd0 = pmd_offset(pud0, 0);
+
+	pgdk = pgd_offset_k(map_start);
+	pudk = pud_offset(pgdk, map_start);
+	pmdk = pmd_offset(pudk, map_start);
+
+	phys = PHYS_OFFSET;
+
+	if (mdesc->init_meminfo) {
+		mdesc->init_meminfo();
+		/* Run the patch stub to update the constants */
+		fixup_pv_table(&__pv_table_begin,
+			(&__pv_table_end - &__pv_table_begin) << 2);
+
+		/*
+		 * Cache cleaning operations for self-modifying code
+		 * We should clean the entries by MVA but running a
+		 * for loop over every pv_table entry pointer would
+		 * just complicate the code.
+		 */
+		flush_cache_louis();
+		dsb();
+		isb();
+	}
+
+	/* remap level 1 table */
+	for (i = 0; i < PTRS_PER_PGD; i++) {
+		*pud0++ = __pud(__pa(pmd0) | PMD_TYPE_TABLE | L_PGD_SWAPPER);
+		pmd0 += PTRS_PER_PMD;
+	}
+
+	/* remap pmds for kernel mapping */
+	phys = __pa(map_start) & PMD_MASK;
+	do {
+		*pmdk++ = __pmd(phys | pmdprot);
+		phys += PMD_SIZE;
+	} while (phys < map_end);
+
+	flush_cache_all();
+	cpu_set_ttbr(0, __pa(pgd0));
+	cpu_set_ttbr(1, __pa(pgd0) + TTBR1_OFFSET);
+	local_flush_tlb_all();
+}
+
+#else
+
+void __init early_paging_init(const struct machine_desc *mdesc,
+			      struct proc_info_list *procinfo)
+{
+	if (mdesc->init_meminfo)
+		mdesc->init_meminfo();
+}
+
+#endif
+
 /*
  * paging_init() sets up the page tables, initialises the zone memory
  * maps, and sets up the zero page, bad page and bad page tables.