diff mbox series

arm64/mm: Fix idmap on [16K|36VA|48PA]

Message ID 1627879359-30303-1-git-send-email-anshuman.khandual@arm.com (mailing list archive)
State New, archived
Headers show
Series arm64/mm: Fix idmap on [16K|36VA|48PA] | expand

Commit Message

Anshuman Khandual Aug. 2, 2021, 4:42 a.m. UTC
When creating the idmap, the kernel may add one extra level to idmap memory
outside the VA range. But for [16K|36VA|48PA], we need two levels to reach
48 bits. If the bootloader places the kernel in memory above (1 << 46), the
kernel will fail to enable the MMU. Although we are not aware of a platform
where this happens, it is worth to accommodate such scenarios and prevent a
possible kernel crash.

Lets fix the problem on the above configuration by creating two additional
idmap page table levels when 'idmap_text_end' is outside the VA range. This
reduces 'idmap_t0sz' to cover the entire PA range which would prevent table
misconfiguration (fault) when a given 'idmap_t0sz' value requires a single
additional page table level where as two have been built.

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: James Morse <james.morse@arm.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Fixes: 215399392fe4 ("arm64: 36 bit VA")
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
---
This applies on v5.14-rc4

RFC: https://lore.kernel.org/lkml/1627019894-14819-1-git-send-email-anshuman.khandual@arm.com/

 arch/arm64/kernel/head.S | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

Comments

Catalin Marinas Aug. 3, 2021, 10:34 a.m. UTC | #1
On Mon, Aug 02, 2021 at 10:12:39AM +0530, Anshuman Khandual wrote:
> When creating the idmap, the kernel may add one extra level to idmap memory
> outside the VA range. But for [16K|36VA|48PA], we need two levels to reach
> 48 bits. If the bootloader places the kernel in memory above (1 << 46), the

Did you mean (1 << 36)?

> kernel will fail to enable the MMU. Although we are not aware of a platform
> where this happens, it is worth to accommodate such scenarios and prevent a
> possible kernel crash.
> 
> Lets fix the problem on the above configuration by creating two additional
> idmap page table levels when 'idmap_text_end' is outside the VA range. This
> reduces 'idmap_t0sz' to cover the entire PA range which would prevent table
> misconfiguration (fault) when a given 'idmap_t0sz' value requires a single
> additional page table level where as two have been built.
[...]
> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
> index c5c994a..da33bbc 100644
> --- a/arch/arm64/kernel/head.S
> +++ b/arch/arm64/kernel/head.S
> @@ -329,7 +329,9 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
>  
>  #if (VA_BITS < 48)
>  #define EXTRA_SHIFT	(PGDIR_SHIFT + PAGE_SHIFT - 3)
> +#define EXTRA_SHIFT_1	(EXTRA_SHIFT + PAGE_SHIFT - 3)
>  #define EXTRA_PTRS	(1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT))
> +#define EXTRA_PTRS_1	(1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT_1))
>  
>  	/*
>  	 * If VA_BITS < 48, we have to configure an additional table level.
> @@ -342,8 +344,30 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
>  #error "Mismatch between VA_BITS and page size/number of translation levels"
>  #endif
>  
> +/*
> + * In this particular CONFIG_ARM64_16K_PAGES config, there might be a
> + * scenario where 'idmap_text_end' ends up high enough in the PA range
> + * requiring two additional idmap page table levels. Reduce idmap_t0sz
> + * to cover the entire PA range. This prevents table misconfiguration
> + * when a given idmap_t0sz value just requires single additional level
> + * where as two levels have been built.
> + */
> +#if defined(CONFIG_ARM64_VA_BITS_36) && defined(CONFIG_ARM64_PA_BITS_48)
> +	mov	x4, EXTRA_PTRS_1
> +	create_table_entry x0, x3, EXTRA_SHIFT_1, x4, x5, x6
> +
> +	mov	x4, PTRS_PER_PTE
> +	create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6
> +
> +	mov	x5, #64 - PHYS_MASK_SHIFT
> +	adr_l	x6, idmap_t0sz
> +	str	x5, [x6]
> +	dmb	sy
> +	dc	ivac, x6
> +#else
>  	mov	x4, EXTRA_PTRS
>  	create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6
> +#endif
>  #else
>  	/*
>  	 * If VA_BITS == 48, we don't have to configure an additional

There's a prior idmap_t0sz setting based on __idmap_text_end. Isn't that
sufficient? We don't care about covering the whole PA space, just the
__idmap_text_end.
Anshuman Khandual Aug. 3, 2021, 11:27 a.m. UTC | #2
On 8/3/21 4:04 PM, Catalin Marinas wrote:
> On Mon, Aug 02, 2021 at 10:12:39AM +0530, Anshuman Khandual wrote:
>> When creating the idmap, the kernel may add one extra level to idmap memory
>> outside the VA range. But for [16K|36VA|48PA], we need two levels to reach
>> 48 bits. If the bootloader places the kernel in memory above (1 << 46), the
> 
> Did you mean (1 << 36)?

No it is actually (1 << 47). If __idmap_text_end is beyond (1 << 47), a single
additional page table level in idmap would not be sufficient to map it. Rather
two more levels would be required. A single additional page table level covers
(PAGE_SHIFT - 3 = 14 - 3 = 11) bits on 16K pages.

First  additional page table level covers	VA(36) --> (47)
Second additional page table level covers	VA(48) --> (48)

> 
>> kernel will fail to enable the MMU. Although we are not aware of a platform
>> where this happens, it is worth to accommodate such scenarios and prevent a
>> possible kernel crash.
>>
>> Lets fix the problem on the above configuration by creating two additional
>> idmap page table levels when 'idmap_text_end' is outside the VA range. This
>> reduces 'idmap_t0sz' to cover the entire PA range which would prevent table
>> misconfiguration (fault) when a given 'idmap_t0sz' value requires a single
>> additional page table level where as two have been built.
> [...]
>> diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
>> index c5c994a..da33bbc 100644
>> --- a/arch/arm64/kernel/head.S
>> +++ b/arch/arm64/kernel/head.S
>> @@ -329,7 +329,9 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
>>  
>>  #if (VA_BITS < 48)
>>  #define EXTRA_SHIFT	(PGDIR_SHIFT + PAGE_SHIFT - 3)
>> +#define EXTRA_SHIFT_1	(EXTRA_SHIFT + PAGE_SHIFT - 3)
>>  #define EXTRA_PTRS	(1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT))
>> +#define EXTRA_PTRS_1	(1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT_1))
>>  
>>  	/*
>>  	 * If VA_BITS < 48, we have to configure an additional table level.
>> @@ -342,8 +344,30 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
>>  #error "Mismatch between VA_BITS and page size/number of translation levels"
>>  #endif
>>  
>> +/*
>> + * In this particular CONFIG_ARM64_16K_PAGES config, there might be a
>> + * scenario where 'idmap_text_end' ends up high enough in the PA range
>> + * requiring two additional idmap page table levels. Reduce idmap_t0sz
>> + * to cover the entire PA range. This prevents table misconfiguration
>> + * when a given idmap_t0sz value just requires single additional level
>> + * where as two levels have been built.
>> + */
>> +#if defined(CONFIG_ARM64_VA_BITS_36) && defined(CONFIG_ARM64_PA_BITS_48)
>> +	mov	x4, EXTRA_PTRS_1
>> +	create_table_entry x0, x3, EXTRA_SHIFT_1, x4, x5, x6
>> +
>> +	mov	x4, PTRS_PER_PTE
>> +	create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6
>> +
>> +	mov	x5, #64 - PHYS_MASK_SHIFT
>> +	adr_l	x6, idmap_t0sz
>> +	str	x5, [x6]
>> +	dmb	sy
>> +	dc	ivac, x6
>> +#else
>>  	mov	x4, EXTRA_PTRS
>>  	create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6
>> +#endif
>>  #else
>>  	/*
>>  	 * If VA_BITS == 48, we don't have to configure an additional
> 
> There's a prior idmap_t0sz setting based on __idmap_text_end. Isn't that
> sufficient? We don't care about covering the whole PA space, just the
> __idmap_text_end.

Right but its bit tricky here.

__idmap_text_end could be any where between VA_BITS (36) and PA_BITS (48)
which would require (one or two) additional page table levels. But in this
solution it creates two additional page table levels for idmap which would
completely map upto PA_BITS, regardless of __idmap_text_end's position. So
in case __idmap_text_end is between VA_BITS (36) and VA_BITS(47), a single
additional page table level is required where as we have created two ! So
to avoid such a situation, adjust idmap_t0sz accordingly. Otherwise there
will be a MMU mis-configuration.

This patch is indented for stable back port and hence tries to be as simple
and minimal as possible. So it creates two additional page table levels
mapping upto PA_BITS without just considering __idmap_text_end's position.
Reducing __idmap_t0sz upto PA_BITS should not be a problem irrespective of
ID_AA64MMFR0_EL1.PARANGE value. As __idmap_text_end would never be on a PA
which is not supported. Hence out of range PA would never be on the bus for
translation.
Catalin Marinas Aug. 3, 2021, 1:12 p.m. UTC | #3
On Tue, Aug 03, 2021 at 04:57:04PM +0530, Anshuman Khandual wrote:
> On 8/3/21 4:04 PM, Catalin Marinas wrote:
> > On Mon, Aug 02, 2021 at 10:12:39AM +0530, Anshuman Khandual wrote:
> >> +/*
> >> + * In this particular CONFIG_ARM64_16K_PAGES config, there might be a
> >> + * scenario where 'idmap_text_end' ends up high enough in the PA range
> >> + * requiring two additional idmap page table levels. Reduce idmap_t0sz
> >> + * to cover the entire PA range. This prevents table misconfiguration
> >> + * when a given idmap_t0sz value just requires single additional level
> >> + * where as two levels have been built.
> >> + */
> >> +#if defined(CONFIG_ARM64_VA_BITS_36) && defined(CONFIG_ARM64_PA_BITS_48)

I don't think you need the PA_BITS_48 check in here. It's either this
one or PA_BITS_52 in the future. Anyway, I think so far our assumption
is that the kernel will always be placed in the first 48-bit, so we
don't need extra check.

> >> +	mov	x4, EXTRA_PTRS_1
> >> +	create_table_entry x0, x3, EXTRA_SHIFT_1, x4, x5, x6
> >> +
> >> +	mov	x4, PTRS_PER_PTE
> >> +	create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6
> >> +
> >> +	mov	x5, #64 - PHYS_MASK_SHIFT
> >> +	adr_l	x6, idmap_t0sz
> >> +	str	x5, [x6]
> >> +	dmb	sy
> >> +	dc	ivac, x6
> >> +#else
> >>  	mov	x4, EXTRA_PTRS
> >>  	create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6
> >> +#endif
> >>  #else
> >>  	/*
> >>  	 * If VA_BITS == 48, we don't have to configure an additional
> > 
> > There's a prior idmap_t0sz setting based on __idmap_text_end. Isn't that
> > sufficient? We don't care about covering the whole PA space, just the
> > __idmap_text_end.
> 
> Right but its bit tricky here.
> 
> __idmap_text_end could be any where between VA_BITS (36) and PA_BITS (48)
> which would require (one or two) additional page table levels. But in this
> solution it creates two additional page table levels for idmap which would
> completely map upto PA_BITS, regardless of __idmap_text_end's position. So
> in case __idmap_text_end is between VA_BITS (36) and VA_BITS(47), a single
> additional page table level is required where as we have created two ! So
> to avoid such a situation, adjust idmap_t0sz accordingly. Otherwise there
> will be a MMU mis-configuration.

I get it now. You need 4 levels with 16K pages for idmap as 3 levels
(one extra in head.S) are not sufficient. The normal page table uses 2
levels with 36-bit VA. Here you chose to go with 4 levels always as the
simplest option.

Do we need to adjust idmap_ptrs_per_pgd? I think even without your
patch, its value is wrong as it doesn't seem to be adjusted for the
extra level. I can't figure out whether it matter but I think we should
remove this variable altogether and just set the x4 register to what we
need in head.S

> This patch is indented for stable back port and hence tries to be as simple
> and minimal as possible. So it creates two additional page table levels
> mapping upto PA_BITS without just considering __idmap_text_end's position.
> Reducing __idmap_t0sz upto PA_BITS should not be a problem irrespective of
> ID_AA64MMFR0_EL1.PARANGE value. As __idmap_text_end would never be on a PA
> which is not supported. Hence out of range PA would never be on the bus for
> translation.

I'd rather have a clean solution (might as well be this one) than
worrying about a stable back-port. It's highly unlikely that we'll trip
over this problem in practice: first you'd need RAM above 47-bit and
second you'd have to enable EXPERT and 36-bit VA.

It looks like idmap_t0sz is used by the kvm_mmu_init() code to calculate
hyp_va_bits. Does it mean that idmap_t0sz should always match PA_SIZE?
Or maybe we should just decouple the two.
Anshuman Khandual Aug. 4, 2021, 3:46 a.m. UTC | #4
On 8/3/21 6:42 PM, Catalin Marinas wrote:
> On Tue, Aug 03, 2021 at 04:57:04PM +0530, Anshuman Khandual wrote:
>> On 8/3/21 4:04 PM, Catalin Marinas wrote:
>>> On Mon, Aug 02, 2021 at 10:12:39AM +0530, Anshuman Khandual wrote:
>>>> +/*
>>>> + * In this particular CONFIG_ARM64_16K_PAGES config, there might be a
>>>> + * scenario where 'idmap_text_end' ends up high enough in the PA range
>>>> + * requiring two additional idmap page table levels. Reduce idmap_t0sz
>>>> + * to cover the entire PA range. This prevents table misconfiguration
>>>> + * when a given idmap_t0sz value just requires single additional level
>>>> + * where as two levels have been built.
>>>> + */
>>>> +#if defined(CONFIG_ARM64_VA_BITS_36) && defined(CONFIG_ARM64_PA_BITS_48)
> 
> I don't think you need the PA_BITS_48 check in here. It's either this
> one or PA_BITS_52 in the future. Anyway, I think so far our assumption

Even PA_BITS_52 range will be well within the two extra idmap page table
levels limit, so it should be covered with this proposed code.

> is that the kernel will always be placed in the first 48-bit, so we
> don't need extra check.

The check on ARM64_PA_BITS_48 here is a code hardening construct. Because
(PAGE_SHIFT - 3) is an important factor which ends up pushing the idmap to
have two extra levels. Could we just assert ARM64_16K_PAGES instead, along
with ARM64_VA_BITS_36 ? I am concerned because in future other page size
configs might have ARM64_VA_BITS_36 as well.

Something like ...

#if defined(CONFIG_ARM64_VA_BITS_36) && defined(CONFIG_ARM64_16K_PAGES)

> 
>>>> +	mov	x4, EXTRA_PTRS_1
>>>> +	create_table_entry x0, x3, EXTRA_SHIFT_1, x4, x5, x6
>>>> +
>>>> +	mov	x4, PTRS_PER_PTE
>>>> +	create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6
>>>> +
>>>> +	mov	x5, #64 - PHYS_MASK_SHIFT
>>>> +	adr_l	x6, idmap_t0sz
>>>> +	str	x5, [x6]
>>>> +	dmb	sy
>>>> +	dc	ivac, x6
>>>> +#else
>>>>  	mov	x4, EXTRA_PTRS
>>>>  	create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6
>>>> +#endif
>>>>  #else
>>>>  	/*
>>>>  	 * If VA_BITS == 48, we don't have to configure an additional
>>>
>>> There's a prior idmap_t0sz setting based on __idmap_text_end. Isn't that
>>> sufficient? We don't care about covering the whole PA space, just the
>>> __idmap_text_end.
>>
>> Right but its bit tricky here.
>>
>> __idmap_text_end could be any where between VA_BITS (36) and PA_BITS (48)
>> which would require (one or two) additional page table levels. But in this
>> solution it creates two additional page table levels for idmap which would
>> completely map upto PA_BITS, regardless of __idmap_text_end's position. So
>> in case __idmap_text_end is between VA_BITS (36) and VA_BITS(47), a single
>> additional page table level is required where as we have created two ! So
>> to avoid such a situation, adjust idmap_t0sz accordingly. Otherwise there
>> will be a MMU mis-configuration.
> 
> I get it now. You need 4 levels with 16K pages for idmap as 3 levels
> (one extra in head.S) are not sufficient. The normal page table uses 2
> levels with 36-bit VA. Here you chose to go with 4 levels always as the
> simplest option.
> 
> Do we need to adjust idmap_ptrs_per_pgd? I think even without your

IIUC idmap_ptrs_per_pgd tracks the number of entries at the highest level
i.e PGD but *as per* the VA_BITS config. IIUC it is used by map_memory()
which creates the mapping for the standard VA_BITS based idmap without the
extra levels, which get their entries populated with create_table_entry().
Hence idmap_ptrs_per_pgd need not be updated when new levels are added. It
should be left unchanged as PTRS_PER_PGD.

> patch, its value is wrong as it doesn't seem to be adjusted for the

The only time the entries get adjusted is when existing number of levels
dont change but all entries at PGD i.e PTRS_PER_PGD are not required. But
seems like it is not done correctly either.

Current idmap_t0sz is decided on what is required for __idmap_text_end
coverage but not PHYS_MASK_SHIFT. So the number of entries at PGD level
should be computed as follows.

idmap_ptrs_per_pgd = 1 << ((64 - idmap_t0sz) - PGDIR_SHIFT)

but instead here is what we have.

        /*
         * If VA_BITS == 48, we don't have to configure an additional
         * translation level, but the top-level table has more entries.
         */
        mov     x4, #1 << (PHYS_MASK_SHIFT - PGDIR_SHIFT)
        str_l   x4, idmap_ptrs_per_pgd, x5

A reduced idmap_t0sz here covers __idmap_text_end not PHYS_MASK_SHIFT.
I guess this just works because idmap page table levels required for the
reduced idmap_t0sz and PHYS_MASK_SHIFT remains the same in all possible
cases for now. The only deviation would have been [16K|36VA|48PA] which
must have faced this issue but then it was blocked by another problem
altogether (being solved here).

> extra level. I can't figure out whether it matter but I think we should
> remove this variable altogether and just set the x4 register to what we
> need in head.S

Right, idmap_ptrs_per_pgd is not required for anything else other than
map_memory() which take it at x4. Could be dropped without any problem.

> 
>> This patch is indented for stable back port and hence tries to be as simple
>> and minimal as possible. So it creates two additional page table levels
>> mapping upto PA_BITS without just considering __idmap_text_end's position.
>> Reducing __idmap_t0sz upto PA_BITS should not be a problem irrespective of
>> ID_AA64MMFR0_EL1.PARANGE value. As __idmap_text_end would never be on a PA
>> which is not supported. Hence out of range PA would never be on the bus for
>> translation.
> 
> I'd rather have a clean solution (might as well be this one) than
> worrying about a stable back-port. It's highly unlikely that we'll trip
> over this problem in practice: first you'd need RAM above 47-bit and
> second you'd have to enable EXPERT and 36-bit VA.

I am working on a solution which re-works idmap extension logic based on the
difference between __idmap_text_end and PGDIR_SHIFT coverage, then creating
additional page table levels and reducing idmap_t0sz appropriately. All the
existing code including this fix here, will be dropped completely. Because
it might be difficult to extend this patch to get the entire thing in order,
the idea was to just fix [16K|36VA|48PA] which could then be backported and
then do the rework in mainline (which need not be backported).

To summarize here, you would prefer to skip the above transition and instead
directly move to the rework which may not be backported ? Please do suggest.

> 
> It looks like idmap_t0sz is used by the kvm_mmu_init() code to calculate
> hyp_va_bits. Does it mean that idmap_t0sz should always match PA_SIZE?

Default value for idmap_t0sz is TCR_T0SZ(VA_BITS_MIN) unless it gets reduced
further to map __idmap_text_end after extending the idmap levels. So it does
not always match PA_SIZE.

> Or maybe we should just decouple the two.

I dont have adequate understanding on this area. It seems like the hyp borrows
the already discovered __idmap_text_end placement via idmap_t0sz to figure out
whether it also needs to have an extended ID map (at EL2). Decoupling it might
suggest KVM should rediscover __idmap_text_end's placement ?
Anshuman Khandual Aug. 4, 2021, 6:47 a.m. UTC | #5
On 8/4/21 9:16 AM, Anshuman Khandual wrote:
> I am working on a solution which re-works idmap extension logic based on the
> difference between __idmap_text_end and PGDIR_SHIFT coverage, then creating
> additional page table levels and reducing idmap_t0sz appropriately. All the
> existing code including this fix here, will be dropped completely. Because
> it might be difficult to extend this patch to get the entire thing in order,
> the idea was to just fix [16K|36VA|48PA] which could then be backported and
> then do the rework in mainline (which need not be backported)

FYI.

Here is the rework (draft standard, lightly tested and not documented) which
I have been working on. It tries to take care of three different situations,
when __idmap_text_end would not be covered with the existing idmap levels.

1. idmap requires single new level
2. idmap requires two new levels
3. idmap does not require more levels but idmap_ptrs_per_pgd needs adjustment

Applies after the fix here.

---
 arch/arm64/include/asm/assembler.h |  9 ++++++
 arch/arm64/kernel/head.S           | 62 +++++++++++++++-----------------------
 2 files changed, 33 insertions(+), 38 deletions(-)

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 89faca0..d826641 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -25,6 +25,15 @@
 #include <asm/ptrace.h>
 #include <asm/thread_info.h>
 
+	.macro shift_to_nr_ptrs, reg1, reg2, reg3, tmp
+	ldr_l   \reg3, idmap_t0sz
+	add     \reg3, \reg3, \tmp
+	mov     \reg2, #64
+	sub     \reg2, \reg2, \reg3
+	mov     \reg1, #1
+	lsr     \reg1, \reg1, \reg2
+	.endm
+
 	/*
 	 * Provide a wxN alias for each wN register so what we can paste a xN
 	 * reference after a 'w' to obtain the 32-bit version.
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index da33bbc..b308787 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -327,54 +327,40 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
 	dmb	sy
 	dc	ivac, x6		// Invalidate potentially stale cache line
 
-#if (VA_BITS < 48)
 #define EXTRA_SHIFT	(PGDIR_SHIFT + PAGE_SHIFT - 3)
 #define EXTRA_SHIFT_1	(EXTRA_SHIFT + PAGE_SHIFT - 3)
-#define EXTRA_PTRS	(1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT))
-#define EXTRA_PTRS_1	(1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT_1))
-
-	/*
-	 * If VA_BITS < 48, we have to configure an additional table level.
-	 * First, we have to verify our assumption that the current value of
-	 * VA_BITS was chosen such that all translation levels are fully
-	 * utilised, and that lowering T0SZ will always result in an additional
-	 * translation level to be configured.
-	 */
-#if VA_BITS != EXTRA_SHIFT
+#if (VA_BITS > EXTRA_SHIFT)
 #error "Mismatch between VA_BITS and page size/number of translation levels"
 #endif
 
-/*
- * In this particular CONFIG_ARM64_16K_PAGES config, there might be a
- * scenario where 'idmap_text_end' ends up high enough in the PA range
- * requiring two additional idmap page table levels. Reduce idmap_t0sz
- * to cover the entire PA range. This prevents table misconfiguration
- * when a given idmap_t0sz value just requires single additional level
- * where as two levels have been built.
- */
-#if defined(CONFIG_ARM64_VA_BITS_36) && defined(CONFIG_ARM64_PA_BITS_48)
-	mov	x4, EXTRA_PTRS_1
-	create_table_entry x0, x3, EXTRA_SHIFT_1, x4, x5, x6
+#if (VA_BITS == EXTRA_SHIFT)
+	mov	x6, #TCR_T0SZ(VA_BITS_MIN)
+	sub	x6, x6, x5
+	cmp	x6, #(PAGE_SHIFT - 3)
+	b.gt	8f
 
-	mov	x4, PTRS_PER_PTE
+	shift_to_nr_ptrs x4, x5, x6, #EXTRA_SHIFT
 	create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6
+	b	1f
+8:
+	shift_to_nr_ptrs x4, x5, x6, #EXTRA_SHIFT_1
+	create_table_entry x0, x3, EXTRA_SHIFT_1, x4, x5, x6
 
-	mov	x5, #64 - PHYS_MASK_SHIFT
-	adr_l	x6, idmap_t0sz
-	str	x5, [x6]
-	dmb	sy
-	dc	ivac, x6
-#else
-	mov	x4, EXTRA_PTRS
+	mov     x4, PTRS_PER_PTE
 	create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6
-#endif
-#else
-	/*
-	 * If VA_BITS == 48, we don't have to configure an additional
-	 * translation level, but the top-level table has more entries.
-	 */
-	mov	x4, #1 << (PHYS_MASK_SHIFT - PGDIR_SHIFT)
+#elif (VA_BITS < EXTRA_SHIFT)
+	mov	x6, #64
+	sub	x6, x6, x5
+	cmp	x6, EXTRA_SHIFT
+	b.eq	1f
+	b.gt	8f
+
+	shift_to_nr_ptrs x4, x5, x6, #PGDIR_SHIFT
 	str_l	x4, idmap_ptrs_per_pgd, x5
+	b	1f
+8:
+	shift_to_nr_ptrs x4, x5, x6, #EXTRA_SHIFT
+	create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6
 #endif
 1:
 	ldr_l	x4, idmap_ptrs_per_pgd
diff mbox series

Patch

diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index c5c994a..da33bbc 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -329,7 +329,9 @@  SYM_FUNC_START_LOCAL(__create_page_tables)
 
 #if (VA_BITS < 48)
 #define EXTRA_SHIFT	(PGDIR_SHIFT + PAGE_SHIFT - 3)
+#define EXTRA_SHIFT_1	(EXTRA_SHIFT + PAGE_SHIFT - 3)
 #define EXTRA_PTRS	(1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT))
+#define EXTRA_PTRS_1	(1 << (PHYS_MASK_SHIFT - EXTRA_SHIFT_1))
 
 	/*
 	 * If VA_BITS < 48, we have to configure an additional table level.
@@ -342,8 +344,30 @@  SYM_FUNC_START_LOCAL(__create_page_tables)
 #error "Mismatch between VA_BITS and page size/number of translation levels"
 #endif
 
+/*
+ * In this particular CONFIG_ARM64_16K_PAGES config, there might be a
+ * scenario where 'idmap_text_end' ends up high enough in the PA range
+ * requiring two additional idmap page table levels. Reduce idmap_t0sz
+ * to cover the entire PA range. This prevents table misconfiguration
+ * when a given idmap_t0sz value just requires single additional level
+ * where as two levels have been built.
+ */
+#if defined(CONFIG_ARM64_VA_BITS_36) && defined(CONFIG_ARM64_PA_BITS_48)
+	mov	x4, EXTRA_PTRS_1
+	create_table_entry x0, x3, EXTRA_SHIFT_1, x4, x5, x6
+
+	mov	x4, PTRS_PER_PTE
+	create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6
+
+	mov	x5, #64 - PHYS_MASK_SHIFT
+	adr_l	x6, idmap_t0sz
+	str	x5, [x6]
+	dmb	sy
+	dc	ivac, x6
+#else
 	mov	x4, EXTRA_PTRS
 	create_table_entry x0, x3, EXTRA_SHIFT, x4, x5, x6
+#endif
 #else
 	/*
 	 * If VA_BITS == 48, we don't have to configure an additional