diff mbox

[v3,04/21] arm64: decouple early fixmap init from linear mapping

Message ID 1452518355-4606-5-git-send-email-ard.biesheuvel@linaro.org (mailing list archive)
State New, archived
Headers show

Commit Message

Ard Biesheuvel Jan. 11, 2016, 1:18 p.m. UTC
Since the early fixmap page tables are populated using pages that are
part of the static footprint of the kernel, they are covered by the
initial kernel mapping, and we can refer to them without using __va/__pa
translations, which are tied to the linear mapping.

Since the fixmap page tables are disjoint from the kernel mapping up
to the top level pgd entry, we can refer to bm_pte[] directly, and there
is no need to walk the page tables and perform __pa()/__va() translations
at each step.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 arch/arm64/mm/mmu.c | 32 ++++++--------------
 1 file changed, 9 insertions(+), 23 deletions(-)

Comments

Mark Rutland Jan. 11, 2016, 4:09 p.m. UTC | #1
On Mon, Jan 11, 2016 at 02:18:57PM +0100, Ard Biesheuvel wrote:
> Since the early fixmap page tables are populated using pages that are
> part of the static footprint of the kernel, they are covered by the
> initial kernel mapping, and we can refer to them without using __va/__pa
> translations, which are tied to the linear mapping.
> 
> Since the fixmap page tables are disjoint from the kernel mapping up
> to the top level pgd entry, we can refer to bm_pte[] directly, and there
> is no need to walk the page tables and perform __pa()/__va() translations
> at each step.
> 
> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> ---
>  arch/arm64/mm/mmu.c | 32 ++++++--------------
>  1 file changed, 9 insertions(+), 23 deletions(-)
> 
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index 7711554a94f4..75b5f0dc3bdc 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -570,38 +570,24 @@ void vmemmap_free(unsigned long start, unsigned long end)
>  #endif	/* CONFIG_SPARSEMEM_VMEMMAP */
>  
>  static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
> -#if CONFIG_PGTABLE_LEVELS > 2
>  static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;
> -#endif
> -#if CONFIG_PGTABLE_LEVELS > 3
>  static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;
> -#endif
>  
>  static inline pud_t * fixmap_pud(unsigned long addr)
>  {
> -	pgd_t *pgd = pgd_offset_k(addr);
> -
> -	BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
> -
> -	return pud_offset(pgd, addr);
> +	return (CONFIG_PGTABLE_LEVELS > 3) ? &bm_pud[pud_index(addr)]
> +					   : (pud_t *)pgd_offset_k(addr);

If we move patch 6 earlier, we could use pud_offset_kimg here, and avoid
the cast, at the cost of passing the pgd into fixmap_pud.

Similarly for fixmap_pmd.

>  }
>  
> -static inline pmd_t * fixmap_pmd(unsigned long addr)
> +static inline pte_t * fixmap_pmd(unsigned long addr)
>  {
> -	pud_t *pud = fixmap_pud(addr);
> -
> -	BUG_ON(pud_none(*pud) || pud_bad(*pud));
> -
> -	return pmd_offset(pud, addr);
> +	return (CONFIG_PGTABLE_LEVELS > 2) ? &bm_pmd[pmd_index(addr)]
> +					   : (pmd_t *)pgd_offset_k(addr);
>  }

I assume the return type change was unintentional?

With STRICT_MM_TYPECHECKS:

arch/arm64/mm/mmu.c: In function 'fixmap_pmd':
arch/arm64/mm/mmu.c:604:9: warning: return from incompatible pointer type [-Wincompatible-pointer-types]
  return (CONFIG_PGTABLE_LEVELS > 2) ? &bm_pmd[pmd_index(addr)]
         ^
arch/arm64/mm/mmu.c: In function 'early_fixmap_init':
arch/arm64/mm/mmu.c:635:6: warning: assignment from incompatible pointer type [-Wincompatible-pointer-types]
  pmd = fixmap_pmd(addr);
      ^
arch/arm64/mm/mmu.c:645:11: warning: comparison of distinct pointer types lacks a cast
  if ((pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
           ^
arch/arm64/mm/mmu.c:646:14: warning: comparison of distinct pointer types lacks a cast
       || pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
              ^

Side note: is there any reason we can't/shouldn't make
STRICT_MM_TYPECHECKS a common config option? Or simply have it on by
default for arm64?

Having built with and without typechecks I see that it doesn't bloat the
kernel Image size, though the binary isn't quite identical:

[mark@leverpostej:~/src/linux]% ls -al *.*checks
-rwxrwxr-x 1 mark mark   9288192 Jan 11 15:40 Image.checks
-rwxrwxr-x 1 mark mark   9288192 Jan 11 15:36 Image.nochecks
-rwxrwxr-x 1 mark mark 106782024 Jan 11 15:40 vmlinux.checks
-rwxrwxr-x 1 mark mark 106688928 Jan 11 15:35 vmlinux.nochecks

Things didn't quite line up between the two images, though I'm not sure
what the underlying difference was.

Thanks,
Mark.
Ard Biesheuvel Jan. 11, 2016, 4:15 p.m. UTC | #2
On 11 January 2016 at 17:09, Mark Rutland <mark.rutland@arm.com> wrote:
> On Mon, Jan 11, 2016 at 02:18:57PM +0100, Ard Biesheuvel wrote:
>> Since the early fixmap page tables are populated using pages that are
>> part of the static footprint of the kernel, they are covered by the
>> initial kernel mapping, and we can refer to them without using __va/__pa
>> translations, which are tied to the linear mapping.
>>
>> Since the fixmap page tables are disjoint from the kernel mapping up
>> to the top level pgd entry, we can refer to bm_pte[] directly, and there
>> is no need to walk the page tables and perform __pa()/__va() translations
>> at each step.
>>
>> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
>> ---
>>  arch/arm64/mm/mmu.c | 32 ++++++--------------
>>  1 file changed, 9 insertions(+), 23 deletions(-)
>>
>> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
>> index 7711554a94f4..75b5f0dc3bdc 100644
>> --- a/arch/arm64/mm/mmu.c
>> +++ b/arch/arm64/mm/mmu.c
>> @@ -570,38 +570,24 @@ void vmemmap_free(unsigned long start, unsigned long end)
>>  #endif       /* CONFIG_SPARSEMEM_VMEMMAP */
>>
>>  static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
>> -#if CONFIG_PGTABLE_LEVELS > 2
>>  static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;
>> -#endif
>> -#if CONFIG_PGTABLE_LEVELS > 3
>>  static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;
>> -#endif
>>
>>  static inline pud_t * fixmap_pud(unsigned long addr)
>>  {
>> -     pgd_t *pgd = pgd_offset_k(addr);
>> -
>> -     BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
>> -
>> -     return pud_offset(pgd, addr);
>> +     return (CONFIG_PGTABLE_LEVELS > 3) ? &bm_pud[pud_index(addr)]
>> +                                        : (pud_t *)pgd_offset_k(addr);
>
> If we move patch 6 earlier, we could use pud_offset_kimg here, and avoid
> the cast, at the cost of passing the pgd into fixmap_pud.
>
> Similarly for fixmap_pmd.
>

Is that necessarily an improvement? I know it hides the cast, but I
think having an explicit pgd_t* to pud_t* cast that so obviously
applies to CONFIG_PGTABLE_LEVELS < 4 only is fine as well.

>>  }
>>
>> -static inline pmd_t * fixmap_pmd(unsigned long addr)
>> +static inline pte_t * fixmap_pmd(unsigned long addr)
>>  {
>> -     pud_t *pud = fixmap_pud(addr);
>> -
>> -     BUG_ON(pud_none(*pud) || pud_bad(*pud));
>> -
>> -     return pmd_offset(pud, addr);
>> +     return (CONFIG_PGTABLE_LEVELS > 2) ? &bm_pmd[pmd_index(addr)]
>> +                                        : (pmd_t *)pgd_offset_k(addr);
>>  }
>
> I assume the return type change was unintentional?
>

Yes. Thanks for spotting that.

> With STRICT_MM_TYPECHECKS:
>
> arch/arm64/mm/mmu.c: In function 'fixmap_pmd':
> arch/arm64/mm/mmu.c:604:9: warning: return from incompatible pointer type [-Wincompatible-pointer-types]
>   return (CONFIG_PGTABLE_LEVELS > 2) ? &bm_pmd[pmd_index(addr)]
>          ^
> arch/arm64/mm/mmu.c: In function 'early_fixmap_init':
> arch/arm64/mm/mmu.c:635:6: warning: assignment from incompatible pointer type [-Wincompatible-pointer-types]
>   pmd = fixmap_pmd(addr);
>       ^
> arch/arm64/mm/mmu.c:645:11: warning: comparison of distinct pointer types lacks a cast
>   if ((pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)))
>            ^
> arch/arm64/mm/mmu.c:646:14: warning: comparison of distinct pointer types lacks a cast
>        || pmd != fixmap_pmd(fix_to_virt(FIX_BTMAP_END))) {
>               ^
>
> Side note: is there any reason we can't/shouldn't make
> STRICT_MM_TYPECHECKS a common config option? Or simply have it on by
> default for arm64?
>

I wouldn't mind at all.

> Having built with and without typechecks I see that it doesn't bloat the
> kernel Image size, though the binary isn't quite identical:
>
> [mark@leverpostej:~/src/linux]% ls -al *.*checks
> -rwxrwxr-x 1 mark mark   9288192 Jan 11 15:40 Image.checks
> -rwxrwxr-x 1 mark mark   9288192 Jan 11 15:36 Image.nochecks
> -rwxrwxr-x 1 mark mark 106782024 Jan 11 15:40 vmlinux.checks
> -rwxrwxr-x 1 mark mark 106688928 Jan 11 15:35 vmlinux.nochecks
>
> Things didn't quite line up between the two images, though I'm not sure
> what the underlying difference was.
>
> Thanks,
> Mark.
Mark Rutland Jan. 11, 2016, 4:27 p.m. UTC | #3
On Mon, Jan 11, 2016 at 05:15:13PM +0100, Ard Biesheuvel wrote:
> On 11 January 2016 at 17:09, Mark Rutland <mark.rutland@arm.com> wrote:
> > On Mon, Jan 11, 2016 at 02:18:57PM +0100, Ard Biesheuvel wrote:
> >> Since the early fixmap page tables are populated using pages that are
> >> part of the static footprint of the kernel, they are covered by the
> >> initial kernel mapping, and we can refer to them without using __va/__pa
> >> translations, which are tied to the linear mapping.
> >>
> >> Since the fixmap page tables are disjoint from the kernel mapping up
> >> to the top level pgd entry, we can refer to bm_pte[] directly, and there
> >> is no need to walk the page tables and perform __pa()/__va() translations
> >> at each step.
> >>
> >> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> >> ---
> >>  arch/arm64/mm/mmu.c | 32 ++++++--------------
> >>  1 file changed, 9 insertions(+), 23 deletions(-)
> >>
> >> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> >> index 7711554a94f4..75b5f0dc3bdc 100644
> >> --- a/arch/arm64/mm/mmu.c
> >> +++ b/arch/arm64/mm/mmu.c
> >> @@ -570,38 +570,24 @@ void vmemmap_free(unsigned long start, unsigned long end)
> >>  #endif       /* CONFIG_SPARSEMEM_VMEMMAP */
> >>
> >>  static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
> >> -#if CONFIG_PGTABLE_LEVELS > 2
> >>  static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;
> >> -#endif
> >> -#if CONFIG_PGTABLE_LEVELS > 3
> >>  static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;
> >> -#endif
> >>
> >>  static inline pud_t * fixmap_pud(unsigned long addr)
> >>  {
> >> -     pgd_t *pgd = pgd_offset_k(addr);
> >> -
> >> -     BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
> >> -
> >> -     return pud_offset(pgd, addr);
> >> +     return (CONFIG_PGTABLE_LEVELS > 3) ? &bm_pud[pud_index(addr)]
> >> +                                        : (pud_t *)pgd_offset_k(addr);
> >
> > If we move patch 6 earlier, we could use pud_offset_kimg here, and avoid
> > the cast, at the cost of passing the pgd into fixmap_pud.
> >
> > Similarly for fixmap_pmd.
> >
> 
> Is that necessarily an improvement? I know it hides the cast, but I
> think having an explicit pgd_t* to pud_t* cast that so obviously
> applies to CONFIG_PGTABLE_LEVELS < 4 only is fine as well.

True; it's not a big thing either way.

> >>  }
> >>
> >> -static inline pmd_t * fixmap_pmd(unsigned long addr)
> >> +static inline pte_t * fixmap_pmd(unsigned long addr)
> >>  {
> >> -     pud_t *pud = fixmap_pud(addr);
> >> -
> >> -     BUG_ON(pud_none(*pud) || pud_bad(*pud));
> >> -
> >> -     return pmd_offset(pud, addr);
> >> +     return (CONFIG_PGTABLE_LEVELS > 2) ? &bm_pmd[pmd_index(addr)]
> >> +                                        : (pmd_t *)pgd_offset_k(addr);
> >>  }
> >
> > I assume the return type change was unintentional?
> >
> 
> Yes. Thanks for spotting that.

With that fixed:

Reviewed-by: Mark Rutland <mark.rutland@arm.com>

> > Side note: is there any reason we can't/shouldn't make
> > STRICT_MM_TYPECHECKS a common config option? Or simply have it on by
> > default for arm64?
> >
> 
> I wouldn't mind at all.

I'll dig into that a bit futher then...

Thanks,
Mark.
Mark Rutland Jan. 11, 2016, 4:51 p.m. UTC | #4
On Mon, Jan 11, 2016 at 04:27:38PM +0000, Mark Rutland wrote:
> On Mon, Jan 11, 2016 at 05:15:13PM +0100, Ard Biesheuvel wrote:
> > On 11 January 2016 at 17:09, Mark Rutland <mark.rutland@arm.com> wrote:
> > > On Mon, Jan 11, 2016 at 02:18:57PM +0100, Ard Biesheuvel wrote:
> > >> Since the early fixmap page tables are populated using pages that are
> > >> part of the static footprint of the kernel, they are covered by the
> > >> initial kernel mapping, and we can refer to them without using __va/__pa
> > >> translations, which are tied to the linear mapping.
> > >>
> > >> Since the fixmap page tables are disjoint from the kernel mapping up
> > >> to the top level pgd entry, we can refer to bm_pte[] directly, and there
> > >> is no need to walk the page tables and perform __pa()/__va() translations
> > >> at each step.
> > >>
> > >> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
> > >> ---
> > >>  arch/arm64/mm/mmu.c | 32 ++++++--------------
> > >>  1 file changed, 9 insertions(+), 23 deletions(-)
> > >>
> > >> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> > >> index 7711554a94f4..75b5f0dc3bdc 100644
> > >> --- a/arch/arm64/mm/mmu.c
> > >> +++ b/arch/arm64/mm/mmu.c
> > >> @@ -570,38 +570,24 @@ void vmemmap_free(unsigned long start, unsigned long end)
> > >>  #endif       /* CONFIG_SPARSEMEM_VMEMMAP */
> > >>
> > >>  static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
> > >> -#if CONFIG_PGTABLE_LEVELS > 2
> > >>  static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;
> > >> -#endif
> > >> -#if CONFIG_PGTABLE_LEVELS > 3
> > >>  static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;
> > >> -#endif
> > >>
> > >>  static inline pud_t * fixmap_pud(unsigned long addr)
> > >>  {
> > >> -     pgd_t *pgd = pgd_offset_k(addr);
> > >> -
> > >> -     BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
> > >> -
> > >> -     return pud_offset(pgd, addr);
> > >> +     return (CONFIG_PGTABLE_LEVELS > 3) ? &bm_pud[pud_index(addr)]
> > >> +                                        : (pud_t *)pgd_offset_k(addr);
> > >
> > > If we move patch 6 earlier, we could use pud_offset_kimg here, and avoid
> > > the cast, at the cost of passing the pgd into fixmap_pud.
> > >
> > > Similarly for fixmap_pmd.
> > >
> > 
> > Is that necessarily an improvement? I know it hides the cast, but I
> > think having an explicit pgd_t* to pud_t* cast that so obviously
> > applies to CONFIG_PGTABLE_LEVELS < 4 only is fine as well.
> 
> True; it's not a big thing either way.

Sorry,  I'm gonig to change my mind on that again. I think using
p?d_offset_kimg is preferable. e.g.

static inline pud_t * fixmap_pud(unsigned long addr)
{
        pgd_t *pgd = pgd_offset_k(addr);

        BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));

        return pud_offset_kimg(pgd, addr);
}

static inline pmd_t * fixmap_pmd(unsigned long addr)
{
        pud_t *pud = fixmap_pud(addr);

        BUG_ON(pud_none(*pud) || pud_bad(*pud));

        return pmd_offset_kimg(pud, addr);
}

That avoids having to check CONFIG_PGTABLE_LEVELS check and perform a cast,
avoids duplicating details about bm_{pud,pmd}, and keeps the existing structure
so it's easier to reason about the change. I was wrong about having to pass the
pgd or pud in, so callers don't need upating.

From my PoV that is preferable.

Thanks,
Mark.
Ard Biesheuvel Jan. 11, 2016, 5:08 p.m. UTC | #5
On 11 January 2016 at 17:51, Mark Rutland <mark.rutland@arm.com> wrote:
> On Mon, Jan 11, 2016 at 04:27:38PM +0000, Mark Rutland wrote:
>> On Mon, Jan 11, 2016 at 05:15:13PM +0100, Ard Biesheuvel wrote:
>> > On 11 January 2016 at 17:09, Mark Rutland <mark.rutland@arm.com> wrote:
>> > > On Mon, Jan 11, 2016 at 02:18:57PM +0100, Ard Biesheuvel wrote:
>> > >> Since the early fixmap page tables are populated using pages that are
>> > >> part of the static footprint of the kernel, they are covered by the
>> > >> initial kernel mapping, and we can refer to them without using __va/__pa
>> > >> translations, which are tied to the linear mapping.
>> > >>
>> > >> Since the fixmap page tables are disjoint from the kernel mapping up
>> > >> to the top level pgd entry, we can refer to bm_pte[] directly, and there
>> > >> is no need to walk the page tables and perform __pa()/__va() translations
>> > >> at each step.
>> > >>
>> > >> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
>> > >> ---
>> > >>  arch/arm64/mm/mmu.c | 32 ++++++--------------
>> > >>  1 file changed, 9 insertions(+), 23 deletions(-)
>> > >>
>> > >> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
>> > >> index 7711554a94f4..75b5f0dc3bdc 100644
>> > >> --- a/arch/arm64/mm/mmu.c
>> > >> +++ b/arch/arm64/mm/mmu.c
>> > >> @@ -570,38 +570,24 @@ void vmemmap_free(unsigned long start, unsigned long end)
>> > >>  #endif       /* CONFIG_SPARSEMEM_VMEMMAP */
>> > >>
>> > >>  static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
>> > >> -#if CONFIG_PGTABLE_LEVELS > 2
>> > >>  static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;
>> > >> -#endif
>> > >> -#if CONFIG_PGTABLE_LEVELS > 3
>> > >>  static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;
>> > >> -#endif
>> > >>
>> > >>  static inline pud_t * fixmap_pud(unsigned long addr)
>> > >>  {
>> > >> -     pgd_t *pgd = pgd_offset_k(addr);
>> > >> -
>> > >> -     BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
>> > >> -
>> > >> -     return pud_offset(pgd, addr);
>> > >> +     return (CONFIG_PGTABLE_LEVELS > 3) ? &bm_pud[pud_index(addr)]
>> > >> +                                        : (pud_t *)pgd_offset_k(addr);
>> > >
>> > > If we move patch 6 earlier, we could use pud_offset_kimg here, and avoid
>> > > the cast, at the cost of passing the pgd into fixmap_pud.
>> > >
>> > > Similarly for fixmap_pmd.
>> > >
>> >
>> > Is that necessarily an improvement? I know it hides the cast, but I
>> > think having an explicit pgd_t* to pud_t* cast that so obviously
>> > applies to CONFIG_PGTABLE_LEVELS < 4 only is fine as well.
>>
>> True; it's not a big thing either way.
>
> Sorry,  I'm gonig to change my mind on that again. I think using
> p?d_offset_kimg is preferable. e.g.
>
> static inline pud_t * fixmap_pud(unsigned long addr)
> {
>         pgd_t *pgd = pgd_offset_k(addr);
>
>         BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
>
>         return pud_offset_kimg(pgd, addr);
> }
>
> static inline pmd_t * fixmap_pmd(unsigned long addr)
> {
>         pud_t *pud = fixmap_pud(addr);
>
>         BUG_ON(pud_none(*pud) || pud_bad(*pud));
>
>         return pmd_offset_kimg(pud, addr);
> }
>
> That avoids having to check CONFIG_PGTABLE_LEVELS check and perform a cast,
> avoids duplicating details about bm_{pud,pmd}, and keeps the existing structure
> so it's easier to reason about the change. I was wrong about having to pass the
> pgd or pud in, so callers don't need upating.
>
> From my PoV that is preferable.
>

OK. I think it looks better, indeed.
Ard Biesheuvel Jan. 11, 2016, 5:15 p.m. UTC | #6
On 11 January 2016 at 18:08, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
> On 11 January 2016 at 17:51, Mark Rutland <mark.rutland@arm.com> wrote:
>> On Mon, Jan 11, 2016 at 04:27:38PM +0000, Mark Rutland wrote:
>>> On Mon, Jan 11, 2016 at 05:15:13PM +0100, Ard Biesheuvel wrote:
>>> > On 11 January 2016 at 17:09, Mark Rutland <mark.rutland@arm.com> wrote:
>>> > > On Mon, Jan 11, 2016 at 02:18:57PM +0100, Ard Biesheuvel wrote:
>>> > >> Since the early fixmap page tables are populated using pages that are
>>> > >> part of the static footprint of the kernel, they are covered by the
>>> > >> initial kernel mapping, and we can refer to them without using __va/__pa
>>> > >> translations, which are tied to the linear mapping.
>>> > >>
>>> > >> Since the fixmap page tables are disjoint from the kernel mapping up
>>> > >> to the top level pgd entry, we can refer to bm_pte[] directly, and there
>>> > >> is no need to walk the page tables and perform __pa()/__va() translations
>>> > >> at each step.
>>> > >>
>>> > >> Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
>>> > >> ---
>>> > >>  arch/arm64/mm/mmu.c | 32 ++++++--------------
>>> > >>  1 file changed, 9 insertions(+), 23 deletions(-)
>>> > >>
>>> > >> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
>>> > >> index 7711554a94f4..75b5f0dc3bdc 100644
>>> > >> --- a/arch/arm64/mm/mmu.c
>>> > >> +++ b/arch/arm64/mm/mmu.c
>>> > >> @@ -570,38 +570,24 @@ void vmemmap_free(unsigned long start, unsigned long end)
>>> > >>  #endif       /* CONFIG_SPARSEMEM_VMEMMAP */
>>> > >>
>>> > >>  static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
>>> > >> -#if CONFIG_PGTABLE_LEVELS > 2
>>> > >>  static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;
>>> > >> -#endif
>>> > >> -#if CONFIG_PGTABLE_LEVELS > 3
>>> > >>  static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;
>>> > >> -#endif
>>> > >>
>>> > >>  static inline pud_t * fixmap_pud(unsigned long addr)
>>> > >>  {
>>> > >> -     pgd_t *pgd = pgd_offset_k(addr);
>>> > >> -
>>> > >> -     BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
>>> > >> -
>>> > >> -     return pud_offset(pgd, addr);
>>> > >> +     return (CONFIG_PGTABLE_LEVELS > 3) ? &bm_pud[pud_index(addr)]
>>> > >> +                                        : (pud_t *)pgd_offset_k(addr);
>>> > >
>>> > > If we move patch 6 earlier, we could use pud_offset_kimg here, and avoid
>>> > > the cast, at the cost of passing the pgd into fixmap_pud.
>>> > >
>>> > > Similarly for fixmap_pmd.
>>> > >
>>> >
>>> > Is that necessarily an improvement? I know it hides the cast, but I
>>> > think having an explicit pgd_t* to pud_t* cast that so obviously
>>> > applies to CONFIG_PGTABLE_LEVELS < 4 only is fine as well.
>>>
>>> True; it's not a big thing either way.
>>
>> Sorry,  I'm gonig to change my mind on that again. I think using
>> p?d_offset_kimg is preferable. e.g.
>>
>> static inline pud_t * fixmap_pud(unsigned long addr)
>> {
>>         pgd_t *pgd = pgd_offset_k(addr);
>>
>>         BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
>>
>>         return pud_offset_kimg(pgd, addr);
>> }
>>
>> static inline pmd_t * fixmap_pmd(unsigned long addr)
>> {
>>         pud_t *pud = fixmap_pud(addr);
>>
>>         BUG_ON(pud_none(*pud) || pud_bad(*pud));
>>
>>         return pmd_offset_kimg(pud, addr);
>> }
>>
>> That avoids having to check CONFIG_PGTABLE_LEVELS check and perform a cast,
>> avoids duplicating details about bm_{pud,pmd}, and keeps the existing structure
>> so it's easier to reason about the change. I was wrong about having to pass the
>> pgd or pud in, so callers don't need upating.
>>
>> From my PoV that is preferable.
>>
>
> OK. I think it looks better, indeed.

... however, this does mean we have to go through a __pa() translation
and back just to get to the address of bm_pud/bm_pmd
Mark Rutland Jan. 11, 2016, 5:21 p.m. UTC | #7
On Mon, Jan 11, 2016 at 06:15:56PM +0100, Ard Biesheuvel wrote:
> On 11 January 2016 at 18:08, Ard Biesheuvel <ard.biesheuvel@linaro.org> wrote:
> > On 11 January 2016 at 17:51, Mark Rutland <mark.rutland@arm.com> wrote:
> >> Sorry,  I'm gonig to change my mind on that again. I think using
> >> p?d_offset_kimg is preferable. e.g.
> >>
> >> static inline pud_t * fixmap_pud(unsigned long addr)
> >> {
> >>         pgd_t *pgd = pgd_offset_k(addr);
> >>
> >>         BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
> >>
> >>         return pud_offset_kimg(pgd, addr);
> >> }
> >>
> >> static inline pmd_t * fixmap_pmd(unsigned long addr)
> >> {
> >>         pud_t *pud = fixmap_pud(addr);
> >>
> >>         BUG_ON(pud_none(*pud) || pud_bad(*pud));
> >>
> >>         return pmd_offset_kimg(pud, addr);
> >> }
> >>
> >> That avoids having to check CONFIG_PGTABLE_LEVELS check and perform a cast,
> >> avoids duplicating details about bm_{pud,pmd}, and keeps the existing structure
> >> so it's easier to reason about the change. I was wrong about having to pass the
> >> pgd or pud in, so callers don't need upating.
> >>
> >> From my PoV that is preferable.
> >>
> >
> > OK. I think it looks better, indeed.
> 
> ... however, this does mean we have to go through a __pa() translation
> and back just to get to the address of bm_pud/bm_pmd

True, but we only do it in the case of a one-off init function, so I
don't think we'll notice the overhead.

Mark.
diff mbox

Patch

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 7711554a94f4..75b5f0dc3bdc 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -570,38 +570,24 @@  void vmemmap_free(unsigned long start, unsigned long end)
 #endif	/* CONFIG_SPARSEMEM_VMEMMAP */
 
 static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
-#if CONFIG_PGTABLE_LEVELS > 2
 static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss;
-#endif
-#if CONFIG_PGTABLE_LEVELS > 3
 static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss;
-#endif
 
 static inline pud_t * fixmap_pud(unsigned long addr)
 {
-	pgd_t *pgd = pgd_offset_k(addr);
-
-	BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd));
-
-	return pud_offset(pgd, addr);
+	return (CONFIG_PGTABLE_LEVELS > 3) ? &bm_pud[pud_index(addr)]
+					   : (pud_t *)pgd_offset_k(addr);
 }
 
-static inline pmd_t * fixmap_pmd(unsigned long addr)
+static inline pte_t * fixmap_pmd(unsigned long addr)
 {
-	pud_t *pud = fixmap_pud(addr);
-
-	BUG_ON(pud_none(*pud) || pud_bad(*pud));
-
-	return pmd_offset(pud, addr);
+	return (CONFIG_PGTABLE_LEVELS > 2) ? &bm_pmd[pmd_index(addr)]
+					   : (pmd_t *)pgd_offset_k(addr);
 }
 
 static inline pte_t * fixmap_pte(unsigned long addr)
 {
-	pmd_t *pmd = fixmap_pmd(addr);
-
-	BUG_ON(pmd_none(*pmd) || pmd_bad(*pmd));
-
-	return pte_offset_kernel(pmd, addr);
+	return &bm_pte[pte_index(addr)];
 }
 
 void __init early_fixmap_init(void)
@@ -613,14 +599,14 @@  void __init early_fixmap_init(void)
 
 	pgd = pgd_offset_k(addr);
 	pgd_populate(&init_mm, pgd, bm_pud);
-	pud = pud_offset(pgd, addr);
+	pud = fixmap_pud(addr);
 	pud_populate(&init_mm, pud, bm_pmd);
-	pmd = pmd_offset(pud, addr);
+	pmd = fixmap_pmd(addr);
 	pmd_populate_kernel(&init_mm, pmd, bm_pte);
 
 	/*
 	 * The boot-ioremap range spans multiple pmds, for which
-	 * we are not preparted:
+	 * we are not prepared:
 	 */
 	BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
 		     != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));