diff mbox series

[v1] arm64: mm: Batch dsb and isb when populating pgtables

Message ID 20240327190723.185232-1-ryan.roberts@arm.com (mailing list archive)
State New, archived
Headers show
Series [v1] arm64: mm: Batch dsb and isb when populating pgtables | expand

Commit Message

Ryan Roberts March 27, 2024, 7:07 p.m. UTC
After removing uneccessary TLBIs, the next bottleneck when creating the
page tables for the linear map is DSB and ISB, which were previously
issued per-pte in __set_pte(). Since we are writing multiple ptes in a
given pte table, we can elide these barriers and insert them once we
have finished writing to the table.

Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
---
 arch/arm64/include/asm/pgtable.h |  7 ++++++-
 arch/arm64/mm/mmu.c              | 13 ++++++++++++-
 2 files changed, 18 insertions(+), 2 deletions(-)

--
2.25.1

Comments

Ard Biesheuvel March 28, 2024, 7:23 a.m. UTC | #1
On Wed, 27 Mar 2024 at 21:07, Ryan Roberts <ryan.roberts@arm.com> wrote:
>
> After removing uneccessary TLBIs, the next bottleneck when creating the
> page tables for the linear map is DSB and ISB, which were previously
> issued per-pte in __set_pte(). Since we are writing multiple ptes in a
> given pte table, we can elide these barriers and insert them once we
> have finished writing to the table.
>

Nice!

> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
> ---
>  arch/arm64/include/asm/pgtable.h |  7 ++++++-
>  arch/arm64/mm/mmu.c              | 13 ++++++++++++-
>  2 files changed, 18 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> index bd5d02f3f0a3..81e427b23b3f 100644
> --- a/arch/arm64/include/asm/pgtable.h
> +++ b/arch/arm64/include/asm/pgtable.h
> @@ -271,9 +271,14 @@ static inline pte_t pte_mkdevmap(pte_t pte)
>         return set_pte_bit(pte, __pgprot(PTE_DEVMAP | PTE_SPECIAL));
>  }
>
> -static inline void __set_pte(pte_t *ptep, pte_t pte)
> +static inline void ___set_pte(pte_t *ptep, pte_t pte)

IMHO, we should either use WRITE_ONCE() directly in the caller, or
find a better name.

>  {
>         WRITE_ONCE(*ptep, pte);
> +}
> +
> +static inline void __set_pte(pte_t *ptep, pte_t pte)
> +{
> +       ___set_pte(ptep, pte);
>
>         /*
>          * Only if the new pte is valid and kernel, otherwise TLB maintenance
> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
> index 1b2a2a2d09b7..c6d5a76732d4 100644
> --- a/arch/arm64/mm/mmu.c
> +++ b/arch/arm64/mm/mmu.c
> @@ -301,7 +301,11 @@ static pte_t *init_pte(pte_t *ptep, unsigned long addr, unsigned long end,
>         do {
>                 pte_t old_pte = __ptep_get(ptep);
>
> -               __set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot));
> +               /*
> +                * Required barriers to make this visible to the table walker
> +                * are deferred to the end of alloc_init_cont_pte().
> +                */
> +               ___set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot));
>
>                 /*
>                  * After the PTE entry has been populated once, we
> @@ -358,6 +362,13 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
>         } while (addr = next, addr != end);
>
>         ops->unmap(TYPE_PTE);
> +
> +       /*
> +        * Ensure all previous pgtable writes are visible to the table walker.
> +        * See init_pte().
> +        */
> +       dsb(ishst);
> +       isb();
>  }
>
>  static pmd_t *init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end,
> --
> 2.25.1
>
Ryan Roberts March 28, 2024, 8:45 a.m. UTC | #2
On 28/03/2024 07:23, Ard Biesheuvel wrote:
> On Wed, 27 Mar 2024 at 21:07, Ryan Roberts <ryan.roberts@arm.com> wrote:
>>
>> After removing uneccessary TLBIs, the next bottleneck when creating the
>> page tables for the linear map is DSB and ISB, which were previously
>> issued per-pte in __set_pte(). Since we are writing multiple ptes in a
>> given pte table, we can elide these barriers and insert them once we
>> have finished writing to the table.
>>
> 
> Nice!
> 
>> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
>> ---
>>  arch/arm64/include/asm/pgtable.h |  7 ++++++-
>>  arch/arm64/mm/mmu.c              | 13 ++++++++++++-
>>  2 files changed, 18 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
>> index bd5d02f3f0a3..81e427b23b3f 100644
>> --- a/arch/arm64/include/asm/pgtable.h
>> +++ b/arch/arm64/include/asm/pgtable.h
>> @@ -271,9 +271,14 @@ static inline pte_t pte_mkdevmap(pte_t pte)
>>         return set_pte_bit(pte, __pgprot(PTE_DEVMAP | PTE_SPECIAL));
>>  }
>>
>> -static inline void __set_pte(pte_t *ptep, pte_t pte)
>> +static inline void ___set_pte(pte_t *ptep, pte_t pte)
> 
> IMHO, we should either use WRITE_ONCE() directly in the caller, or
> find a better name.

How about __set_pte_nosync() ?

> 
>>  {
>>         WRITE_ONCE(*ptep, pte);
>> +}
>> +
>> +static inline void __set_pte(pte_t *ptep, pte_t pte)
>> +{
>> +       ___set_pte(ptep, pte);
>>
>>         /*
>>          * Only if the new pte is valid and kernel, otherwise TLB maintenance
>> diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
>> index 1b2a2a2d09b7..c6d5a76732d4 100644
>> --- a/arch/arm64/mm/mmu.c
>> +++ b/arch/arm64/mm/mmu.c
>> @@ -301,7 +301,11 @@ static pte_t *init_pte(pte_t *ptep, unsigned long addr, unsigned long end,
>>         do {
>>                 pte_t old_pte = __ptep_get(ptep);
>>
>> -               __set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot));
>> +               /*
>> +                * Required barriers to make this visible to the table walker
>> +                * are deferred to the end of alloc_init_cont_pte().
>> +                */
>> +               ___set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot));
>>
>>                 /*
>>                  * After the PTE entry has been populated once, we
>> @@ -358,6 +362,13 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
>>         } while (addr = next, addr != end);
>>
>>         ops->unmap(TYPE_PTE);
>> +
>> +       /*
>> +        * Ensure all previous pgtable writes are visible to the table walker.
>> +        * See init_pte().
>> +        */
>> +       dsb(ishst);
>> +       isb();
>>  }
>>
>>  static pmd_t *init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end,
>> --
>> 2.25.1
>>
Ard Biesheuvel March 28, 2024, 8:56 a.m. UTC | #3
On Thu, 28 Mar 2024 at 10:45, Ryan Roberts <ryan.roberts@arm.com> wrote:
>
> On 28/03/2024 07:23, Ard Biesheuvel wrote:
> > On Wed, 27 Mar 2024 at 21:07, Ryan Roberts <ryan.roberts@arm.com> wrote:
> >>
> >> After removing uneccessary TLBIs, the next bottleneck when creating the
> >> page tables for the linear map is DSB and ISB, which were previously
> >> issued per-pte in __set_pte(). Since we are writing multiple ptes in a
> >> given pte table, we can elide these barriers and insert them once we
> >> have finished writing to the table.
> >>
> >
> > Nice!
> >
> >> Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
> >> ---
> >>  arch/arm64/include/asm/pgtable.h |  7 ++++++-
> >>  arch/arm64/mm/mmu.c              | 13 ++++++++++++-
> >>  2 files changed, 18 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> >> index bd5d02f3f0a3..81e427b23b3f 100644
> >> --- a/arch/arm64/include/asm/pgtable.h
> >> +++ b/arch/arm64/include/asm/pgtable.h
> >> @@ -271,9 +271,14 @@ static inline pte_t pte_mkdevmap(pte_t pte)
> >>         return set_pte_bit(pte, __pgprot(PTE_DEVMAP | PTE_SPECIAL));
> >>  }
> >>
> >> -static inline void __set_pte(pte_t *ptep, pte_t pte)
> >> +static inline void ___set_pte(pte_t *ptep, pte_t pte)
> >
> > IMHO, we should either use WRITE_ONCE() directly in the caller, or
> > find a better name.
>
> How about __set_pte_nosync() ?
>

Works for me.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index bd5d02f3f0a3..81e427b23b3f 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -271,9 +271,14 @@  static inline pte_t pte_mkdevmap(pte_t pte)
 	return set_pte_bit(pte, __pgprot(PTE_DEVMAP | PTE_SPECIAL));
 }

-static inline void __set_pte(pte_t *ptep, pte_t pte)
+static inline void ___set_pte(pte_t *ptep, pte_t pte)
 {
 	WRITE_ONCE(*ptep, pte);
+}
+
+static inline void __set_pte(pte_t *ptep, pte_t pte)
+{
+	___set_pte(ptep, pte);

 	/*
 	 * Only if the new pte is valid and kernel, otherwise TLB maintenance
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 1b2a2a2d09b7..c6d5a76732d4 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -301,7 +301,11 @@  static pte_t *init_pte(pte_t *ptep, unsigned long addr, unsigned long end,
 	do {
 		pte_t old_pte = __ptep_get(ptep);

-		__set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot));
+		/*
+		 * Required barriers to make this visible to the table walker
+		 * are deferred to the end of alloc_init_cont_pte().
+		 */
+		___set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot));

 		/*
 		 * After the PTE entry has been populated once, we
@@ -358,6 +362,13 @@  static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
 	} while (addr = next, addr != end);

 	ops->unmap(TYPE_PTE);
+
+	/*
+	 * Ensure all previous pgtable writes are visible to the table walker.
+	 * See init_pte().
+	 */
+	dsb(ishst);
+	isb();
 }

 static pmd_t *init_pmd(pmd_t *pmdp, unsigned long addr, unsigned long end,