diff mbox series

[v4,2/4] riscv: Improve flush_tlb_range() for hugetlb pages

Message ID 20230911131224.61924-3-alexghiti@rivosinc.com (mailing list archive)
State Superseded
Headers show
Series riscv: tlb flush improvements | expand

Checks

Context Check Description
conchuod/cover_letter success Series has a cover letter
conchuod/tree_selection success Guessed tree name to be for-next at HEAD 0bb80ecc33a8
conchuod/fixes_present success Fixes tag not required for -next series
conchuod/maintainers_pattern success MAINTAINERS pattern errors before the patch: 5 and now 5
conchuod/verify_signedoff success Signed-off-by tag matches author and committer
conchuod/kdoc success Errors and warnings before: 0 this patch: 0
conchuod/build_rv64_clang_allmodconfig success Errors and warnings before: 9 this patch: 9
conchuod/module_param success Was 0 now: 0
conchuod/build_rv64_gcc_allmodconfig success Errors and warnings before: 9 this patch: 9
conchuod/build_rv32_defconfig success Build OK
conchuod/dtb_warn_rv64 success Errors and warnings before: 25 this patch: 25
conchuod/header_inline success No static functions without inline keyword in header files
conchuod/checkpatch success total: 0 errors, 0 warnings, 0 checks, 51 lines checked
conchuod/build_rv64_nommu_k210_defconfig success Build OK
conchuod/verify_fixes success No Fixes tag
conchuod/build_rv64_nommu_virt_defconfig success Build OK
conchuod/patch-2-test-13 success .github/scripts/patches/verify_signedoff.sh
conchuod/vmtest-for-next-PR warning PR summary
conchuod/patch-2-test-1 success .github/scripts/patches/build_rv32_defconfig.sh
conchuod/patch-2-test-2 success .github/scripts/patches/build_rv64_clang_allmodconfig.sh
conchuod/patch-2-test-3 success .github/scripts/patches/build_rv64_gcc_allmodconfig.sh
conchuod/patch-2-test-4 success .github/scripts/patches/build_rv64_nommu_k210_defconfig.sh
conchuod/patch-2-test-5 success .github/scripts/patches/build_rv64_nommu_virt_defconfig.sh
conchuod/patch-2-test-6 warning .github/scripts/patches/checkpatch.sh
conchuod/patch-2-test-7 success .github/scripts/patches/dtb_warn_rv64.sh
conchuod/patch-2-test-8 success .github/scripts/patches/header_inline.sh
conchuod/patch-2-test-9 success .github/scripts/patches/kdoc.sh
conchuod/patch-2-test-10 success .github/scripts/patches/module_param.sh
conchuod/patch-2-test-11 success .github/scripts/patches/verify_fixes.sh
conchuod/patch-2-test-12 success .github/scripts/patches/verify_signedoff.sh

Commit Message

Alexandre Ghiti Sept. 11, 2023, 1:12 p.m. UTC
flush_tlb_range() uses a fixed stride of PAGE_SIZE and in its current form,
when a hugetlb mapping needs to be flushed, flush_tlb_range() flushes the
whole tlb: so set a stride of the size of the hugetlb mapping in order to
only flush the hugetlb mapping. However, if the hugepage is a NAPOT region,
all PTEs that constitute this mapping must be invalidated, so the stride
size must actually be the size of the PTE.

Note that THPs are directly handled by flush_pmd_tlb_range().

Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
---
 arch/riscv/mm/tlbflush.c | 39 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

Comments

Lad, Prabhakar Sept. 19, 2023, 12:07 p.m. UTC | #1
On Mon, Sep 11, 2023 at 2:14 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>
> flush_tlb_range() uses a fixed stride of PAGE_SIZE and in its current form,
> when a hugetlb mapping needs to be flushed, flush_tlb_range() flushes the
> whole tlb: so set a stride of the size of the hugetlb mapping in order to
> only flush the hugetlb mapping. However, if the hugepage is a NAPOT region,
> all PTEs that constitute this mapping must be invalidated, so the stride
> size must actually be the size of the PTE.
>
> Note that THPs are directly handled by flush_pmd_tlb_range().
>
> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
> ---
>  arch/riscv/mm/tlbflush.c | 39 ++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 38 insertions(+), 1 deletion(-)
>
Tested-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com> #
On RZ/Five SMARC

Cheers,
Prabhakar

> diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
> index fa03289853d8..5bda6d4fed90 100644
> --- a/arch/riscv/mm/tlbflush.c
> +++ b/arch/riscv/mm/tlbflush.c
> @@ -3,6 +3,7 @@
>  #include <linux/mm.h>
>  #include <linux/smp.h>
>  #include <linux/sched.h>
> +#include <linux/hugetlb.h>
>  #include <asm/sbi.h>
>  #include <asm/mmu_context.h>
>
> @@ -147,7 +148,43 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
>  void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
>                      unsigned long end)
>  {
> -       __flush_tlb_range(vma->vm_mm, start, end - start, PAGE_SIZE);
> +       unsigned long stride_size;
> +
> +       stride_size = is_vm_hugetlb_page(vma) ?
> +                               huge_page_size(hstate_vma(vma)) :
> +                               PAGE_SIZE;
> +
> +#ifdef CONFIG_RISCV_ISA_SVNAPOT
> +       /*
> +        * As stated in the privileged specification, every PTE in a NAPOT
> +        * region must be invalidated, so reset the stride in that case.
> +        */
> +       if (has_svnapot()) {
> +               unsigned long order, napot_size;
> +
> +               for_each_napot_order(order) {
> +                       napot_size = napot_cont_size(order);
> +
> +                       if (stride_size != napot_size)
> +                               continue;
> +
> +                       if (napot_size >= PGDIR_SIZE)
> +                               stride_size = PGDIR_SIZE;
> +                       else if (napot_size >= P4D_SIZE)
> +                               stride_size = P4D_SIZE;
> +                       else if (napot_size >= PUD_SIZE)
> +                               stride_size = PUD_SIZE;
> +                       else if (napot_size >= PMD_SIZE)
> +                               stride_size = PMD_SIZE;
> +                       else
> +                               stride_size = PAGE_SIZE;
> +
> +                       break;
> +               }
> +       }
> +#endif
> +
> +       __flush_tlb_range(vma->vm_mm, start, end - start, stride_size);
>  }
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>  void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
> --
> 2.39.2
>
Samuel Holland Oct. 9, 2023, 5:53 p.m. UTC | #2
Hi Alex,

On 2023-09-11 8:12 AM, Alexandre Ghiti wrote:
> flush_tlb_range() uses a fixed stride of PAGE_SIZE and in its current form,
> when a hugetlb mapping needs to be flushed, flush_tlb_range() flushes the
> whole tlb: so set a stride of the size of the hugetlb mapping in order to
> only flush the hugetlb mapping. However, if the hugepage is a NAPOT region,
> all PTEs that constitute this mapping must be invalidated, so the stride
> size must actually be the size of the PTE.
> 
> Note that THPs are directly handled by flush_pmd_tlb_range().
> 
> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
> ---
>  arch/riscv/mm/tlbflush.c | 39 ++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 38 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
> index fa03289853d8..5bda6d4fed90 100644
> --- a/arch/riscv/mm/tlbflush.c
> +++ b/arch/riscv/mm/tlbflush.c
> @@ -3,6 +3,7 @@
>  #include <linux/mm.h>
>  #include <linux/smp.h>
>  #include <linux/sched.h>
> +#include <linux/hugetlb.h>
>  #include <asm/sbi.h>
>  #include <asm/mmu_context.h>
>  
> @@ -147,7 +148,43 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
>  void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
>  		     unsigned long end)
>  {
> -	__flush_tlb_range(vma->vm_mm, start, end - start, PAGE_SIZE);
> +	unsigned long stride_size;
> +
> +	stride_size = is_vm_hugetlb_page(vma) ?
> +				huge_page_size(hstate_vma(vma)) :
> +				PAGE_SIZE;
> +
> +#ifdef CONFIG_RISCV_ISA_SVNAPOT
> +	/*
> +	 * As stated in the privileged specification, every PTE in a NAPOT
> +	 * region must be invalidated, so reset the stride in that case.
> +	 */
> +	if (has_svnapot()) {

This whole block should probably go inside the is_vm_hugetlb_page(vma) check,
since we have to perform that check anyway.

> +		unsigned long order, napot_size;
> +
> +		for_each_napot_order(order) {
> +			napot_size = napot_cont_size(order);
> +
> +			if (stride_size != napot_size)
> +				continue;
> +
> +			if (napot_size >= PGDIR_SIZE)

Can you check stride_size here directly, and drop the loop? We should be able to
assume that the huge page size is valid. Non-NAPOT hugepages will hit one of the
equal-to cases below, which is fine.

Regards,
Samuel

> +				stride_size = PGDIR_SIZE;
> +			else if (napot_size >= P4D_SIZE)
> +				stride_size = P4D_SIZE;
> +			else if (napot_size >= PUD_SIZE)
> +				stride_size = PUD_SIZE;
> +			else if (napot_size >= PMD_SIZE)
> +				stride_size = PMD_SIZE;
> +			else
> +				stride_size = PAGE_SIZE;
> +
> +			break;
> +		}
> +	}
> +#endif
> +
> +	__flush_tlb_range(vma->vm_mm, start, end - start, stride_size);
>  }
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>  void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
Alexandre Ghiti Oct. 18, 2023, 11:32 a.m. UTC | #3
On Mon, Oct 9, 2023 at 7:53 PM Samuel Holland <samuel.holland@sifive.com> wrote:
>
> Hi Alex,
>
> On 2023-09-11 8:12 AM, Alexandre Ghiti wrote:
> > flush_tlb_range() uses a fixed stride of PAGE_SIZE and in its current form,
> > when a hugetlb mapping needs to be flushed, flush_tlb_range() flushes the
> > whole tlb: so set a stride of the size of the hugetlb mapping in order to
> > only flush the hugetlb mapping. However, if the hugepage is a NAPOT region,
> > all PTEs that constitute this mapping must be invalidated, so the stride
> > size must actually be the size of the PTE.
> >
> > Note that THPs are directly handled by flush_pmd_tlb_range().
> >
> > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
> > ---
> >  arch/riscv/mm/tlbflush.c | 39 ++++++++++++++++++++++++++++++++++++++-
> >  1 file changed, 38 insertions(+), 1 deletion(-)
> >
> > diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
> > index fa03289853d8..5bda6d4fed90 100644
> > --- a/arch/riscv/mm/tlbflush.c
> > +++ b/arch/riscv/mm/tlbflush.c
> > @@ -3,6 +3,7 @@
> >  #include <linux/mm.h>
> >  #include <linux/smp.h>
> >  #include <linux/sched.h>
> > +#include <linux/hugetlb.h>
> >  #include <asm/sbi.h>
> >  #include <asm/mmu_context.h>
> >
> > @@ -147,7 +148,43 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
> >  void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
> >                    unsigned long end)
> >  {
> > -     __flush_tlb_range(vma->vm_mm, start, end - start, PAGE_SIZE);
> > +     unsigned long stride_size;
> > +
> > +     stride_size = is_vm_hugetlb_page(vma) ?
> > +                             huge_page_size(hstate_vma(vma)) :
> > +                             PAGE_SIZE;
> > +
> > +#ifdef CONFIG_RISCV_ISA_SVNAPOT
> > +     /*
> > +      * As stated in the privileged specification, every PTE in a NAPOT
> > +      * region must be invalidated, so reset the stride in that case.
> > +      */
> > +     if (has_svnapot()) {
>
> This whole block should probably go inside the is_vm_hugetlb_page(vma) check,
> since we have to perform that check anyway.

Yes, you're right.

>
> > +             unsigned long order, napot_size;
> > +
> > +             for_each_napot_order(order) {
> > +                     napot_size = napot_cont_size(order);
> > +
> > +                     if (stride_size != napot_size)
> > +                             continue;
> > +
> > +                     if (napot_size >= PGDIR_SIZE)
>
> Can you check stride_size here directly, and drop the loop? We should be able to
> assume that the huge page size is valid. Non-NAPOT hugepages will hit one of the
> equal-to cases below, which is fine.

Yes, again, you're right.

I'll respin a new version now, let it go through our CI and send it tomorrow,

Thanks,

Alex

>
> Regards,
> Samuel
>
> > +                             stride_size = PGDIR_SIZE;
> > +                     else if (napot_size >= P4D_SIZE)
> > +                             stride_size = P4D_SIZE;
> > +                     else if (napot_size >= PUD_SIZE)
> > +                             stride_size = PUD_SIZE;
> > +                     else if (napot_size >= PMD_SIZE)
> > +                             stride_size = PMD_SIZE;
> > +                     else
> > +                             stride_size = PAGE_SIZE;
> > +
> > +                     break;
> > +             }
> > +     }
> > +#endif
> > +
> > +     __flush_tlb_range(vma->vm_mm, start, end - start, stride_size);
> >  }
> >  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> >  void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
>
diff mbox series

Patch

diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index fa03289853d8..5bda6d4fed90 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -3,6 +3,7 @@ 
 #include <linux/mm.h>
 #include <linux/smp.h>
 #include <linux/sched.h>
+#include <linux/hugetlb.h>
 #include <asm/sbi.h>
 #include <asm/mmu_context.h>
 
@@ -147,7 +148,43 @@  void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
 void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 		     unsigned long end)
 {
-	__flush_tlb_range(vma->vm_mm, start, end - start, PAGE_SIZE);
+	unsigned long stride_size;
+
+	stride_size = is_vm_hugetlb_page(vma) ?
+				huge_page_size(hstate_vma(vma)) :
+				PAGE_SIZE;
+
+#ifdef CONFIG_RISCV_ISA_SVNAPOT
+	/*
+	 * As stated in the privileged specification, every PTE in a NAPOT
+	 * region must be invalidated, so reset the stride in that case.
+	 */
+	if (has_svnapot()) {
+		unsigned long order, napot_size;
+
+		for_each_napot_order(order) {
+			napot_size = napot_cont_size(order);
+
+			if (stride_size != napot_size)
+				continue;
+
+			if (napot_size >= PGDIR_SIZE)
+				stride_size = PGDIR_SIZE;
+			else if (napot_size >= P4D_SIZE)
+				stride_size = P4D_SIZE;
+			else if (napot_size >= PUD_SIZE)
+				stride_size = PUD_SIZE;
+			else if (napot_size >= PMD_SIZE)
+				stride_size = PMD_SIZE;
+			else
+				stride_size = PAGE_SIZE;
+
+			break;
+		}
+	}
+#endif
+
+	__flush_tlb_range(vma->vm_mm, start, end - start, stride_size);
 }
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,