diff mbox series

[v3,4/4] riscv: Improve flush_tlb_kernel_range()

Message ID 20230801085402.1168351-5-alexghiti@rivosinc.com (mailing list archive)
State Superseded
Commit bbc9ad35b51b42a8f99963393f4b4ca5cff99918
Headers show
Series riscv: tlb flush improvements | expand

Checks

Context Check Description
conchuod/cover_letter success Series has a cover letter
conchuod/tree_selection success Guessed tree name to be for-next at HEAD 471aba2e4760
conchuod/fixes_present success Fixes tag not required for -next series
conchuod/maintainers_pattern success MAINTAINERS pattern errors before the patch: 4 and now 4
conchuod/verify_signedoff success Signed-off-by tag matches author and committer
conchuod/kdoc success Errors and warnings before: 0 this patch: 0
conchuod/build_rv64_clang_allmodconfig success Errors and warnings before: 2786 this patch: 2786
conchuod/module_param success Was 0 now: 0
conchuod/build_rv64_gcc_allmodconfig success Errors and warnings before: 15671 this patch: 15670
conchuod/build_rv32_defconfig success Build OK
conchuod/dtb_warn_rv64 success Errors and warnings before: 3 this patch: 3
conchuod/header_inline success No static functions without inline keyword in header files
conchuod/checkpatch success total: 0 errors, 0 warnings, 0 checks, 83 lines checked
conchuod/build_rv64_nommu_k210_defconfig success Build OK
conchuod/verify_fixes success No Fixes tag
conchuod/build_rv64_nommu_virt_defconfig success Build OK

Commit Message

Alexandre Ghiti Aug. 1, 2023, 8:54 a.m. UTC
This function used to simply flush the whole tlb of all harts, be more
subtile and try to only flush the range.

The problem is that we can only use PAGE_SIZE as stride since we don't know
the size of the underlying mapping and then this function will be improved
only if the size of the region to flush is < threshold * PAGE_SIZE.

Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
---
 arch/riscv/include/asm/tlbflush.h | 11 +++++-----
 arch/riscv/mm/tlbflush.c          | 34 +++++++++++++++++++++++--------
 2 files changed, 31 insertions(+), 14 deletions(-)

Comments

Lad, Prabhakar Sept. 6, 2023, 11:48 a.m. UTC | #1
Hi Alexandre,

On Tue, Aug 1, 2023 at 9:58 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>
> This function used to simply flush the whole tlb of all harts, be more
> subtile and try to only flush the range.
>
> The problem is that we can only use PAGE_SIZE as stride since we don't know
> the size of the underlying mapping and then this function will be improved
> only if the size of the region to flush is < threshold * PAGE_SIZE.
>
> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
> ---
>  arch/riscv/include/asm/tlbflush.h | 11 +++++-----
>  arch/riscv/mm/tlbflush.c          | 34 +++++++++++++++++++++++--------
>  2 files changed, 31 insertions(+), 14 deletions(-)
>
After applying this patch, I am seeing module load issues on RZ/Five
(complete log [0]). I am testing defconfig + [1] (rz/five related
configs).

Any pointers on what could be an issue here?

[0] https://paste.debian.net/1291116/
[1] https://paste.debian.net/1291118/

Cheers,
Prabhakar

> diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
> index f5c4fb0ae642..7426fdcd8ec5 100644
> --- a/arch/riscv/include/asm/tlbflush.h
> +++ b/arch/riscv/include/asm/tlbflush.h
> @@ -37,6 +37,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
>  void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr);
>  void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
>                      unsigned long end);
> +void flush_tlb_kernel_range(unsigned long start, unsigned long end);
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>  #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
>  void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
> @@ -53,15 +54,15 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
>         local_flush_tlb_all();
>  }
>
> -#define flush_tlb_mm(mm) flush_tlb_all()
> -#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all()
> -#endif /* !CONFIG_SMP || !CONFIG_MMU */
> -
>  /* Flush a range of kernel pages */
>  static inline void flush_tlb_kernel_range(unsigned long start,
>         unsigned long end)
>  {
> -       flush_tlb_all();
> +       local_flush_tlb_all();
>  }
>
> +#define flush_tlb_mm(mm) flush_tlb_all()
> +#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all()
> +#endif /* !CONFIG_SMP || !CONFIG_MMU */
> +
>  #endif /* _ASM_RISCV_TLBFLUSH_H */
> diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
> index 0c955c474f3a..687808013758 100644
> --- a/arch/riscv/mm/tlbflush.c
> +++ b/arch/riscv/mm/tlbflush.c
> @@ -120,18 +120,27 @@ static void __flush_tlb_range(struct mm_struct *mm, unsigned long start,
>                               unsigned long size, unsigned long stride)
>  {
>         struct flush_tlb_range_data ftd;
> -       struct cpumask *cmask = mm_cpumask(mm);
> -       unsigned int cpuid;
> +       struct cpumask *cmask, full_cmask;
>         bool broadcast;
>
> -       if (cpumask_empty(cmask))
> -               return;
> +       if (mm) {
> +               unsigned int cpuid;
> +
> +               cmask = mm_cpumask(mm);
> +               if (cpumask_empty(cmask))
> +                       return;
> +
> +               cpuid = get_cpu();
> +               /* check if the tlbflush needs to be sent to other CPUs */
> +               broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids;
> +       } else {
> +               cpumask_setall(&full_cmask);
> +               cmask = &full_cmask;
> +               broadcast = true;
> +       }
>
> -       cpuid = get_cpu();
> -       /* check if the tlbflush needs to be sent to other CPUs */
> -       broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids;
>         if (static_branch_unlikely(&use_asid_allocator)) {
> -               unsigned long asid = atomic_long_read(&mm->context.id) & asid_mask;
> +               unsigned long asid = mm ? atomic_long_read(&mm->context.id) & asid_mask : 0;
>
>                 if (broadcast) {
>                         if (riscv_use_ipi_for_rfence()) {
> @@ -165,7 +174,8 @@ static void __flush_tlb_range(struct mm_struct *mm, unsigned long start,
>                 }
>         }
>
> -       put_cpu();
> +       if (mm)
> +               put_cpu();
>  }
>
>  void flush_tlb_mm(struct mm_struct *mm)
> @@ -196,6 +206,12 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
>
>         __flush_tlb_range(vma->vm_mm, start, end - start, stride_size);
>  }
> +
> +void flush_tlb_kernel_range(unsigned long start, unsigned long end)
> +{
> +       __flush_tlb_range(NULL, start, end, PAGE_SIZE);
> +}
> +
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>  void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
>                         unsigned long end)
> --
> 2.39.2
>
>
Alexandre Ghiti Sept. 6, 2023, 12:01 p.m. UTC | #2
Hi Prabhakar,

On Wed, Sep 6, 2023 at 1:49 PM Lad, Prabhakar
<prabhakar.csengg@gmail.com> wrote:
>
> Hi Alexandre,
>
> On Tue, Aug 1, 2023 at 9:58 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> >
> > This function used to simply flush the whole tlb of all harts, be more
> > subtile and try to only flush the range.
> >
> > The problem is that we can only use PAGE_SIZE as stride since we don't know
> > the size of the underlying mapping and then this function will be improved
> > only if the size of the region to flush is < threshold * PAGE_SIZE.
> >
> > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
> > ---
> >  arch/riscv/include/asm/tlbflush.h | 11 +++++-----
> >  arch/riscv/mm/tlbflush.c          | 34 +++++++++++++++++++++++--------
> >  2 files changed, 31 insertions(+), 14 deletions(-)
> >
> After applying this patch, I am seeing module load issues on RZ/Five
> (complete log [0]). I am testing defconfig + [1] (rz/five related
> configs).
>
> Any pointers on what could be an issue here?

Can you give me the exact version of the kernel you use? The trap
addresses are vmalloc addresses, and a fix for those landed very late
in the release cycle.

>
> [0] https://paste.debian.net/1291116/
> [1] https://paste.debian.net/1291118/
>
> Cheers,
> Prabhakar
>
> > diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
> > index f5c4fb0ae642..7426fdcd8ec5 100644
> > --- a/arch/riscv/include/asm/tlbflush.h
> > +++ b/arch/riscv/include/asm/tlbflush.h
> > @@ -37,6 +37,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
> >  void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr);
> >  void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
> >                      unsigned long end);
> > +void flush_tlb_kernel_range(unsigned long start, unsigned long end);
> >  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> >  #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
> >  void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
> > @@ -53,15 +54,15 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
> >         local_flush_tlb_all();
> >  }
> >
> > -#define flush_tlb_mm(mm) flush_tlb_all()
> > -#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all()
> > -#endif /* !CONFIG_SMP || !CONFIG_MMU */
> > -
> >  /* Flush a range of kernel pages */
> >  static inline void flush_tlb_kernel_range(unsigned long start,
> >         unsigned long end)
> >  {
> > -       flush_tlb_all();
> > +       local_flush_tlb_all();
> >  }
> >
> > +#define flush_tlb_mm(mm) flush_tlb_all()
> > +#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all()
> > +#endif /* !CONFIG_SMP || !CONFIG_MMU */
> > +
> >  #endif /* _ASM_RISCV_TLBFLUSH_H */
> > diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
> > index 0c955c474f3a..687808013758 100644
> > --- a/arch/riscv/mm/tlbflush.c
> > +++ b/arch/riscv/mm/tlbflush.c
> > @@ -120,18 +120,27 @@ static void __flush_tlb_range(struct mm_struct *mm, unsigned long start,
> >                               unsigned long size, unsigned long stride)
> >  {
> >         struct flush_tlb_range_data ftd;
> > -       struct cpumask *cmask = mm_cpumask(mm);
> > -       unsigned int cpuid;
> > +       struct cpumask *cmask, full_cmask;
> >         bool broadcast;
> >
> > -       if (cpumask_empty(cmask))
> > -               return;
> > +       if (mm) {
> > +               unsigned int cpuid;
> > +
> > +               cmask = mm_cpumask(mm);
> > +               if (cpumask_empty(cmask))
> > +                       return;
> > +
> > +               cpuid = get_cpu();
> > +               /* check if the tlbflush needs to be sent to other CPUs */
> > +               broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids;
> > +       } else {
> > +               cpumask_setall(&full_cmask);
> > +               cmask = &full_cmask;
> > +               broadcast = true;
> > +       }
> >
> > -       cpuid = get_cpu();
> > -       /* check if the tlbflush needs to be sent to other CPUs */
> > -       broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids;
> >         if (static_branch_unlikely(&use_asid_allocator)) {
> > -               unsigned long asid = atomic_long_read(&mm->context.id) & asid_mask;
> > +               unsigned long asid = mm ? atomic_long_read(&mm->context.id) & asid_mask : 0;
> >
> >                 if (broadcast) {
> >                         if (riscv_use_ipi_for_rfence()) {
> > @@ -165,7 +174,8 @@ static void __flush_tlb_range(struct mm_struct *mm, unsigned long start,
> >                 }
> >         }
> >
> > -       put_cpu();
> > +       if (mm)
> > +               put_cpu();
> >  }
> >
> >  void flush_tlb_mm(struct mm_struct *mm)
> > @@ -196,6 +206,12 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
> >
> >         __flush_tlb_range(vma->vm_mm, start, end - start, stride_size);
> >  }
> > +
> > +void flush_tlb_kernel_range(unsigned long start, unsigned long end)
> > +{
> > +       __flush_tlb_range(NULL, start, end, PAGE_SIZE);
> > +}
> > +
> >  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
> >  void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
> >                         unsigned long end)
> > --
> > 2.39.2
> >
> >
Lad, Prabhakar Sept. 6, 2023, 12:08 p.m. UTC | #3
Hi Alexandre,

On Wed, Sep 6, 2023 at 1:01 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>
> Hi Prabhakar,
>
> On Wed, Sep 6, 2023 at 1:49 PM Lad, Prabhakar
> <prabhakar.csengg@gmail.com> wrote:
> >
> > Hi Alexandre,
> >
> > On Tue, Aug 1, 2023 at 9:58 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> > >
> > > This function used to simply flush the whole tlb of all harts, be more
> > > subtile and try to only flush the range.
> > >
> > > The problem is that we can only use PAGE_SIZE as stride since we don't know
> > > the size of the underlying mapping and then this function will be improved
> > > only if the size of the region to flush is < threshold * PAGE_SIZE.
> > >
> > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > > Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
> > > ---
> > >  arch/riscv/include/asm/tlbflush.h | 11 +++++-----
> > >  arch/riscv/mm/tlbflush.c          | 34 +++++++++++++++++++++++--------
> > >  2 files changed, 31 insertions(+), 14 deletions(-)
> > >
> > After applying this patch, I am seeing module load issues on RZ/Five
> > (complete log [0]). I am testing defconfig + [1] (rz/five related
> > configs).
> >
> > Any pointers on what could be an issue here?
>
> Can you give me the exact version of the kernel you use? The trap
> addresses are vmalloc addresses, and a fix for those landed very late
> in the release cycle.
>
I am using next-20230906, Ive pushed a branch [1] for you to have a look.

[0] https://github.com/prabhakarlad/linux/tree/rzfive-debug

Cheers,
Prabhakar
Alexandre Ghiti Sept. 6, 2023, 12:18 p.m. UTC | #4
On Wed, Sep 6, 2023 at 2:09 PM Lad, Prabhakar
<prabhakar.csengg@gmail.com> wrote:
>
> Hi Alexandre,
>
> On Wed, Sep 6, 2023 at 1:01 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> >
> > Hi Prabhakar,
> >
> > On Wed, Sep 6, 2023 at 1:49 PM Lad, Prabhakar
> > <prabhakar.csengg@gmail.com> wrote:
> > >
> > > Hi Alexandre,
> > >
> > > On Tue, Aug 1, 2023 at 9:58 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> > > >
> > > > This function used to simply flush the whole tlb of all harts, be more
> > > > subtile and try to only flush the range.
> > > >
> > > > The problem is that we can only use PAGE_SIZE as stride since we don't know
> > > > the size of the underlying mapping and then this function will be improved
> > > > only if the size of the region to flush is < threshold * PAGE_SIZE.
> > > >
> > > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > > > Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
> > > > ---
> > > >  arch/riscv/include/asm/tlbflush.h | 11 +++++-----
> > > >  arch/riscv/mm/tlbflush.c          | 34 +++++++++++++++++++++++--------
> > > >  2 files changed, 31 insertions(+), 14 deletions(-)
> > > >
> > > After applying this patch, I am seeing module load issues on RZ/Five
> > > (complete log [0]). I am testing defconfig + [1] (rz/five related
> > > configs).
> > >
> > > Any pointers on what could be an issue here?
> >
> > Can you give me the exact version of the kernel you use? The trap
> > addresses are vmalloc addresses, and a fix for those landed very late
> > in the release cycle.
> >
> I am using next-20230906, Ive pushed a branch [1] for you to have a look.
>
> [0] https://github.com/prabhakarlad/linux/tree/rzfive-debug

Great, thanks, I had to get rid of this possibility :)

As-is, I have no idea, can you try to "bisect" the problem? I mean
which patch in the series leads to those traps?

Thanks!

Alex

>
> Cheers,
> Prabhakar
Lad, Prabhakar Sept. 6, 2023, 12:23 p.m. UTC | #5
Hi Alexandre,

On Wed, Sep 6, 2023 at 1:18 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>
> On Wed, Sep 6, 2023 at 2:09 PM Lad, Prabhakar
> <prabhakar.csengg@gmail.com> wrote:
> >
> > Hi Alexandre,
> >
> > On Wed, Sep 6, 2023 at 1:01 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> > >
> > > Hi Prabhakar,
> > >
> > > On Wed, Sep 6, 2023 at 1:49 PM Lad, Prabhakar
> > > <prabhakar.csengg@gmail.com> wrote:
> > > >
> > > > Hi Alexandre,
> > > >
> > > > On Tue, Aug 1, 2023 at 9:58 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> > > > >
> > > > > This function used to simply flush the whole tlb of all harts, be more
> > > > > subtile and try to only flush the range.
> > > > >
> > > > > The problem is that we can only use PAGE_SIZE as stride since we don't know
> > > > > the size of the underlying mapping and then this function will be improved
> > > > > only if the size of the region to flush is < threshold * PAGE_SIZE.
> > > > >
> > > > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > > > > Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
> > > > > ---
> > > > >  arch/riscv/include/asm/tlbflush.h | 11 +++++-----
> > > > >  arch/riscv/mm/tlbflush.c          | 34 +++++++++++++++++++++++--------
> > > > >  2 files changed, 31 insertions(+), 14 deletions(-)
> > > > >
> > > > After applying this patch, I am seeing module load issues on RZ/Five
> > > > (complete log [0]). I am testing defconfig + [1] (rz/five related
> > > > configs).
> > > >
> > > > Any pointers on what could be an issue here?
> > >
> > > Can you give me the exact version of the kernel you use? The trap
> > > addresses are vmalloc addresses, and a fix for those landed very late
> > > in the release cycle.
> > >
> > I am using next-20230906, Ive pushed a branch [1] for you to have a look.
> >
> > [0] https://github.com/prabhakarlad/linux/tree/rzfive-debug
>
> Great, thanks, I had to get rid of this possibility :)
>
> As-is, I have no idea, can you try to "bisect" the problem? I mean
> which patch in the series leads to those traps?
>
Oops sorry for not mentioning earlier, this is the offending patch
which leads to the issues seen on rz/five.

Cheers,
Prabhakar
Alexandre Ghiti Sept. 6, 2023, 12:43 p.m. UTC | #6
On Wed, Sep 6, 2023 at 2:24 PM Lad, Prabhakar
<prabhakar.csengg@gmail.com> wrote:
>
> Hi Alexandre,
>
> On Wed, Sep 6, 2023 at 1:18 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> >
> > On Wed, Sep 6, 2023 at 2:09 PM Lad, Prabhakar
> > <prabhakar.csengg@gmail.com> wrote:
> > >
> > > Hi Alexandre,
> > >
> > > On Wed, Sep 6, 2023 at 1:01 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> > > >
> > > > Hi Prabhakar,
> > > >
> > > > On Wed, Sep 6, 2023 at 1:49 PM Lad, Prabhakar
> > > > <prabhakar.csengg@gmail.com> wrote:
> > > > >
> > > > > Hi Alexandre,
> > > > >
> > > > > On Tue, Aug 1, 2023 at 9:58 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> > > > > >
> > > > > > This function used to simply flush the whole tlb of all harts, be more
> > > > > > subtile and try to only flush the range.
> > > > > >
> > > > > > The problem is that we can only use PAGE_SIZE as stride since we don't know
> > > > > > the size of the underlying mapping and then this function will be improved
> > > > > > only if the size of the region to flush is < threshold * PAGE_SIZE.
> > > > > >
> > > > > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > > > > > Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
> > > > > > ---
> > > > > >  arch/riscv/include/asm/tlbflush.h | 11 +++++-----
> > > > > >  arch/riscv/mm/tlbflush.c          | 34 +++++++++++++++++++++++--------
> > > > > >  2 files changed, 31 insertions(+), 14 deletions(-)
> > > > > >
> > > > > After applying this patch, I am seeing module load issues on RZ/Five
> > > > > (complete log [0]). I am testing defconfig + [1] (rz/five related
> > > > > configs).
> > > > >
> > > > > Any pointers on what could be an issue here?
> > > >
> > > > Can you give me the exact version of the kernel you use? The trap
> > > > addresses are vmalloc addresses, and a fix for those landed very late
> > > > in the release cycle.
> > > >
> > > I am using next-20230906, Ive pushed a branch [1] for you to have a look.
> > >
> > > [0] https://github.com/prabhakarlad/linux/tree/rzfive-debug
> >
> > Great, thanks, I had to get rid of this possibility :)
> >
> > As-is, I have no idea, can you try to "bisect" the problem? I mean
> > which patch in the series leads to those traps?
> >
> Oops sorry for not mentioning earlier, this is the offending patch
> which leads to the issues seen on rz/five.

Ok, so at least I found the following problem, but I don't see how
that could fix your issue: can you give a try anyway? I keep looking
into this, thanks

diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index df2a0838c3a1..b5692bc6c76a 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -239,7 +239,7 @@ void flush_tlb_range(struct vm_area_struct *vma,
unsigned long start,

 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 {
-       __flush_tlb_range(NULL, start, end, PAGE_SIZE);
+       __flush_tlb_range(NULL, start, end - start, PAGE_SIZE);
 }

 #ifdef CONFIG_TRANSPARENT_HUGEPAGE

>
> Cheers,
> Prabhakar
Palmer Dabbelt Sept. 6, 2023, 1:16 p.m. UTC | #7
On Wed, 06 Sep 2023 05:43:46 PDT (-0700), alexghiti@rivosinc.com wrote:
> On Wed, Sep 6, 2023 at 2:24 PM Lad, Prabhakar
> <prabhakar.csengg@gmail.com> wrote:
>>
>> Hi Alexandre,
>>
>> On Wed, Sep 6, 2023 at 1:18 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>> >
>> > On Wed, Sep 6, 2023 at 2:09 PM Lad, Prabhakar
>> > <prabhakar.csengg@gmail.com> wrote:
>> > >
>> > > Hi Alexandre,
>> > >
>> > > On Wed, Sep 6, 2023 at 1:01 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>> > > >
>> > > > Hi Prabhakar,
>> > > >
>> > > > On Wed, Sep 6, 2023 at 1:49 PM Lad, Prabhakar
>> > > > <prabhakar.csengg@gmail.com> wrote:
>> > > > >
>> > > > > Hi Alexandre,
>> > > > >
>> > > > > On Tue, Aug 1, 2023 at 9:58 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>> > > > > >
>> > > > > > This function used to simply flush the whole tlb of all harts, be more
>> > > > > > subtile and try to only flush the range.
>> > > > > >
>> > > > > > The problem is that we can only use PAGE_SIZE as stride since we don't know
>> > > > > > the size of the underlying mapping and then this function will be improved
>> > > > > > only if the size of the region to flush is < threshold * PAGE_SIZE.
>> > > > > >
>> > > > > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
>> > > > > > Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
>> > > > > > ---
>> > > > > >  arch/riscv/include/asm/tlbflush.h | 11 +++++-----
>> > > > > >  arch/riscv/mm/tlbflush.c          | 34 +++++++++++++++++++++++--------
>> > > > > >  2 files changed, 31 insertions(+), 14 deletions(-)
>> > > > > >
>> > > > > After applying this patch, I am seeing module load issues on RZ/Five
>> > > > > (complete log [0]). I am testing defconfig + [1] (rz/five related
>> > > > > configs).
>> > > > >
>> > > > > Any pointers on what could be an issue here?
>> > > >
>> > > > Can you give me the exact version of the kernel you use? The trap
>> > > > addresses are vmalloc addresses, and a fix for those landed very late
>> > > > in the release cycle.
>> > > >
>> > > I am using next-20230906, Ive pushed a branch [1] for you to have a look.
>> > >
>> > > [0] https://github.com/prabhakarlad/linux/tree/rzfive-debug
>> >
>> > Great, thanks, I had to get rid of this possibility :)
>> >
>> > As-is, I have no idea, can you try to "bisect" the problem? I mean
>> > which patch in the series leads to those traps?
>> >
>> Oops sorry for not mentioning earlier, this is the offending patch
>> which leads to the issues seen on rz/five.
>
> Ok, so at least I found the following problem, but I don't see how
> that could fix your issue: can you give a try anyway? I keep looking
> into this, thanks
>
> diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
> index df2a0838c3a1..b5692bc6c76a 100644
> --- a/arch/riscv/mm/tlbflush.c
> +++ b/arch/riscv/mm/tlbflush.c
> @@ -239,7 +239,7 @@ void flush_tlb_range(struct vm_area_struct *vma,
> unsigned long start,
>
>  void flush_tlb_kernel_range(unsigned long start, unsigned long end)
>  {
> -       __flush_tlb_range(NULL, start, end, PAGE_SIZE);
> +       __flush_tlb_range(NULL, start, end - start, PAGE_SIZE);
>  }
>
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE

Alex and I were talking a bit.  I'm OK just dropping the TLB flush 
series for this release, that way we can get to the bottom of what's 
wrong.

>
>>
>> Cheers,
>> Prabhakar
Lad, Prabhakar Sept. 6, 2023, 1:54 p.m. UTC | #8
Hi Alexandre,

On Wed, Sep 6, 2023 at 1:43 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>
> On Wed, Sep 6, 2023 at 2:24 PM Lad, Prabhakar
> <prabhakar.csengg@gmail.com> wrote:
> >
> > Hi Alexandre,
> >
> > On Wed, Sep 6, 2023 at 1:18 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> > >
> > > On Wed, Sep 6, 2023 at 2:09 PM Lad, Prabhakar
> > > <prabhakar.csengg@gmail.com> wrote:
> > > >
> > > > Hi Alexandre,
> > > >
> > > > On Wed, Sep 6, 2023 at 1:01 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> > > > >
> > > > > Hi Prabhakar,
> > > > >
> > > > > On Wed, Sep 6, 2023 at 1:49 PM Lad, Prabhakar
> > > > > <prabhakar.csengg@gmail.com> wrote:
> > > > > >
> > > > > > Hi Alexandre,
> > > > > >
> > > > > > On Tue, Aug 1, 2023 at 9:58 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> > > > > > >
> > > > > > > This function used to simply flush the whole tlb of all harts, be more
> > > > > > > subtile and try to only flush the range.
> > > > > > >
> > > > > > > The problem is that we can only use PAGE_SIZE as stride since we don't know
> > > > > > > the size of the underlying mapping and then this function will be improved
> > > > > > > only if the size of the region to flush is < threshold * PAGE_SIZE.
> > > > > > >
> > > > > > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > > > > > > Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
> > > > > > > ---
> > > > > > >  arch/riscv/include/asm/tlbflush.h | 11 +++++-----
> > > > > > >  arch/riscv/mm/tlbflush.c          | 34 +++++++++++++++++++++++--------
> > > > > > >  2 files changed, 31 insertions(+), 14 deletions(-)
> > > > > > >
> > > > > > After applying this patch, I am seeing module load issues on RZ/Five
> > > > > > (complete log [0]). I am testing defconfig + [1] (rz/five related
> > > > > > configs).
> > > > > >
> > > > > > Any pointers on what could be an issue here?
> > > > >
> > > > > Can you give me the exact version of the kernel you use? The trap
> > > > > addresses are vmalloc addresses, and a fix for those landed very late
> > > > > in the release cycle.
> > > > >
> > > > I am using next-20230906, Ive pushed a branch [1] for you to have a look.
> > > >
> > > > [0] https://github.com/prabhakarlad/linux/tree/rzfive-debug
> > >
> > > Great, thanks, I had to get rid of this possibility :)
> > >
> > > As-is, I have no idea, can you try to "bisect" the problem? I mean
> > > which patch in the series leads to those traps?
> > >
> > Oops sorry for not mentioning earlier, this is the offending patch
> > which leads to the issues seen on rz/five.
>
> Ok, so at least I found the following problem, but I don't see how
> that could fix your issue: can you give a try anyway? I keep looking
> into this, thanks
>
> diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
> index df2a0838c3a1..b5692bc6c76a 100644
> --- a/arch/riscv/mm/tlbflush.c
> +++ b/arch/riscv/mm/tlbflush.c
> @@ -239,7 +239,7 @@ void flush_tlb_range(struct vm_area_struct *vma,
> unsigned long start,
>
>  void flush_tlb_kernel_range(unsigned long start, unsigned long end)
>  {
> -       __flush_tlb_range(NULL, start, end, PAGE_SIZE);
> +       __flush_tlb_range(NULL, start, end - start, PAGE_SIZE);
>  }
>
I am able to reproduce the issue with the above change too.

Cheers,
Prabhakar
Nadav Amit Sept. 6, 2023, 8:22 p.m. UTC | #9
> On Sep 6, 2023, at 4:48 AM, Lad, Prabhakar <prabhakar.csengg@gmail.com> wrote:
> 
> Hi Alexandre,
> 
> On Tue, Aug 1, 2023 at 9:58 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>> 
>> This function used to simply flush the whole tlb of all harts, be more
>> subtile and try to only flush the range.
>> 
>> The problem is that we can only use PAGE_SIZE as stride since we don't know
>> the size of the underlying mapping and then this function will be improved
>> only if the size of the region to flush is < threshold * PAGE_SIZE.
>> 
>> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
>> Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
>> ---
>> arch/riscv/include/asm/tlbflush.h | 11 +++++-----
>> arch/riscv/mm/tlbflush.c          | 34 +++++++++++++++++++++++--------
>> 2 files changed, 31 insertions(+), 14 deletions(-)
>> 
> After applying this patch, I am seeing module load issues on RZ/Five
> (complete log [0]). I am testing defconfig + [1] (rz/five related
> configs).
> 
> Any pointers on what could be an issue here?

None of my business, but looking at your code, it seems that you do not memory
barrier before reading mm_cpumask() in __flush_tlb_range(). I believe you
would want to synchronize __flush_tlb_range with switch_mm() similarly to the
way it is done in x86.
Alexandre Ghiti Sept. 7, 2023, 9:05 a.m. UTC | #10
Hi Prabhakar,

On Wed, Sep 6, 2023 at 3:55 PM Lad, Prabhakar
<prabhakar.csengg@gmail.com> wrote:
>
> Hi Alexandre,
>
> On Wed, Sep 6, 2023 at 1:43 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> >
> > On Wed, Sep 6, 2023 at 2:24 PM Lad, Prabhakar
> > <prabhakar.csengg@gmail.com> wrote:
> > >
> > > Hi Alexandre,
> > >
> > > On Wed, Sep 6, 2023 at 1:18 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> > > >
> > > > On Wed, Sep 6, 2023 at 2:09 PM Lad, Prabhakar
> > > > <prabhakar.csengg@gmail.com> wrote:
> > > > >
> > > > > Hi Alexandre,
> > > > >
> > > > > On Wed, Sep 6, 2023 at 1:01 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> > > > > >
> > > > > > Hi Prabhakar,
> > > > > >
> > > > > > On Wed, Sep 6, 2023 at 1:49 PM Lad, Prabhakar
> > > > > > <prabhakar.csengg@gmail.com> wrote:
> > > > > > >
> > > > > > > Hi Alexandre,
> > > > > > >
> > > > > > > On Tue, Aug 1, 2023 at 9:58 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> > > > > > > >
> > > > > > > > This function used to simply flush the whole tlb of all harts, be more
> > > > > > > > subtile and try to only flush the range.
> > > > > > > >
> > > > > > > > The problem is that we can only use PAGE_SIZE as stride since we don't know
> > > > > > > > the size of the underlying mapping and then this function will be improved
> > > > > > > > only if the size of the region to flush is < threshold * PAGE_SIZE.
> > > > > > > >
> > > > > > > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > > > > > > > Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
> > > > > > > > ---
> > > > > > > >  arch/riscv/include/asm/tlbflush.h | 11 +++++-----
> > > > > > > >  arch/riscv/mm/tlbflush.c          | 34 +++++++++++++++++++++++--------
> > > > > > > >  2 files changed, 31 insertions(+), 14 deletions(-)
> > > > > > > >
> > > > > > > After applying this patch, I am seeing module load issues on RZ/Five
> > > > > > > (complete log [0]). I am testing defconfig + [1] (rz/five related
> > > > > > > configs).
> > > > > > >
> > > > > > > Any pointers on what could be an issue here?
> > > > > >
> > > > > > Can you give me the exact version of the kernel you use? The trap
> > > > > > addresses are vmalloc addresses, and a fix for those landed very late
> > > > > > in the release cycle.
> > > > > >
> > > > > I am using next-20230906, Ive pushed a branch [1] for you to have a look.
> > > > >
> > > > > [0] https://github.com/prabhakarlad/linux/tree/rzfive-debug
> > > >
> > > > Great, thanks, I had to get rid of this possibility :)
> > > >
> > > > As-is, I have no idea, can you try to "bisect" the problem? I mean
> > > > which patch in the series leads to those traps?
> > > >
> > > Oops sorry for not mentioning earlier, this is the offending patch
> > > which leads to the issues seen on rz/five.
> >
> > Ok, so at least I found the following problem, but I don't see how
> > that could fix your issue: can you give a try anyway? I keep looking
> > into this, thanks
> >
> > diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
> > index df2a0838c3a1..b5692bc6c76a 100644
> > --- a/arch/riscv/mm/tlbflush.c
> > +++ b/arch/riscv/mm/tlbflush.c
> > @@ -239,7 +239,7 @@ void flush_tlb_range(struct vm_area_struct *vma,
> > unsigned long start,
> >
> >  void flush_tlb_kernel_range(unsigned long start, unsigned long end)
> >  {
> > -       __flush_tlb_range(NULL, start, end, PAGE_SIZE);
> > +       __flush_tlb_range(NULL, start, end - start, PAGE_SIZE);
> >  }
> >
> I am able to reproduce the issue with the above change too.

I can't reproduce the problem on my Unmatched or Qemu, so it is not
easy to debug. But I took another look at your traces and something is
weird to me. In the following trace (and there is another one), the
trap is taken at 0xffffffff015ca034, which is the beginning of
rz_ssi_probe(): that's a page fault, so no translation was found (or
an invalid one is cached).

[   16.586527] Unable to handle kernel paging request at virtual
address ffffffff015ca034
[   16.594750] Oops [#3]
...
[   16.622000] epc : rz_ssi_probe+0x0/0x52a [snd_soc_rz_ssi]
...
[   16.708697] status: 0000000200000120 badaddr: ffffffff015ca034
cause: 000000000000000c
[   16.716580] [<ffffffff015ca034>] rz_ssi_probe+0x0/0x52a
[snd_soc_rz_ssi]
...

But then here we are able to read the code at this same address:
[   16.821620] Code: 0109 6597 0000 8593 5f65 7097 7f34 80e7 7aa0 b7a9
(7139) f822

So that looks like a "transient" error. Do you know if you uarch
caches invalid TLB entries? If you don't know, I have just written
some piece of code to determine if it does, let me know.

Do those errors always happen?

>
> Cheers,
> Prabhakar
Lad, Prabhakar Sept. 7, 2023, 10:49 a.m. UTC | #11
Hi Alexandre,

On Thu, Sep 7, 2023 at 10:06 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>
> Hi Prabhakar,
>
> On Wed, Sep 6, 2023 at 3:55 PM Lad, Prabhakar
> <prabhakar.csengg@gmail.com> wrote:
> >
> > Hi Alexandre,
> >
> > On Wed, Sep 6, 2023 at 1:43 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> > >
> > > On Wed, Sep 6, 2023 at 2:24 PM Lad, Prabhakar
> > > <prabhakar.csengg@gmail.com> wrote:
> > > >
> > > > Hi Alexandre,
> > > >
> > > > On Wed, Sep 6, 2023 at 1:18 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> > > > >
> > > > > On Wed, Sep 6, 2023 at 2:09 PM Lad, Prabhakar
> > > > > <prabhakar.csengg@gmail.com> wrote:
> > > > > >
> > > > > > Hi Alexandre,
> > > > > >
> > > > > > On Wed, Sep 6, 2023 at 1:01 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> > > > > > >
> > > > > > > Hi Prabhakar,
> > > > > > >
> > > > > > > On Wed, Sep 6, 2023 at 1:49 PM Lad, Prabhakar
> > > > > > > <prabhakar.csengg@gmail.com> wrote:
> > > > > > > >
> > > > > > > > Hi Alexandre,
> > > > > > > >
> > > > > > > > On Tue, Aug 1, 2023 at 9:58 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> > > > > > > > >
> > > > > > > > > This function used to simply flush the whole tlb of all harts, be more
> > > > > > > > > subtile and try to only flush the range.
> > > > > > > > >
> > > > > > > > > The problem is that we can only use PAGE_SIZE as stride since we don't know
> > > > > > > > > the size of the underlying mapping and then this function will be improved
> > > > > > > > > only if the size of the region to flush is < threshold * PAGE_SIZE.
> > > > > > > > >
> > > > > > > > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > > > > > > > > Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
> > > > > > > > > ---
> > > > > > > > >  arch/riscv/include/asm/tlbflush.h | 11 +++++-----
> > > > > > > > >  arch/riscv/mm/tlbflush.c          | 34 +++++++++++++++++++++++--------
> > > > > > > > >  2 files changed, 31 insertions(+), 14 deletions(-)
> > > > > > > > >
> > > > > > > > After applying this patch, I am seeing module load issues on RZ/Five
> > > > > > > > (complete log [0]). I am testing defconfig + [1] (rz/five related
> > > > > > > > configs).
> > > > > > > >
> > > > > > > > Any pointers on what could be an issue here?
> > > > > > >
> > > > > > > Can you give me the exact version of the kernel you use? The trap
> > > > > > > addresses are vmalloc addresses, and a fix for those landed very late
> > > > > > > in the release cycle.
> > > > > > >
> > > > > > I am using next-20230906, Ive pushed a branch [1] for you to have a look.
> > > > > >
> > > > > > [0] https://github.com/prabhakarlad/linux/tree/rzfive-debug
> > > > >
> > > > > Great, thanks, I had to get rid of this possibility :)
> > > > >
> > > > > As-is, I have no idea, can you try to "bisect" the problem? I mean
> > > > > which patch in the series leads to those traps?
> > > > >
> > > > Oops sorry for not mentioning earlier, this is the offending patch
> > > > which leads to the issues seen on rz/five.
> > >
> > > Ok, so at least I found the following problem, but I don't see how
> > > that could fix your issue: can you give a try anyway? I keep looking
> > > into this, thanks
> > >
> > > diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
> > > index df2a0838c3a1..b5692bc6c76a 100644
> > > --- a/arch/riscv/mm/tlbflush.c
> > > +++ b/arch/riscv/mm/tlbflush.c
> > > @@ -239,7 +239,7 @@ void flush_tlb_range(struct vm_area_struct *vma,
> > > unsigned long start,
> > >
> > >  void flush_tlb_kernel_range(unsigned long start, unsigned long end)
> > >  {
> > > -       __flush_tlb_range(NULL, start, end, PAGE_SIZE);
> > > +       __flush_tlb_range(NULL, start, end - start, PAGE_SIZE);
> > >  }
> > >
> > I am able to reproduce the issue with the above change too.
>
> I can't reproduce the problem on my Unmatched or Qemu, so it is not
> easy to debug. But I took another look at your traces and something is
> weird to me. In the following trace (and there is another one), the
> trap is taken at 0xffffffff015ca034, which is the beginning of
> rz_ssi_probe(): that's a page fault, so no translation was found (or
> an invalid one is cached).
>
> [   16.586527] Unable to handle kernel paging request at virtual
> address ffffffff015ca034
> [   16.594750] Oops [#3]
> ...
> [   16.622000] epc : rz_ssi_probe+0x0/0x52a [snd_soc_rz_ssi]
> ...
> [   16.708697] status: 0000000200000120 badaddr: ffffffff015ca034
> cause: 000000000000000c
> [   16.716580] [<ffffffff015ca034>] rz_ssi_probe+0x0/0x52a
> [snd_soc_rz_ssi]
> ...
>
> But then here we are able to read the code at this same address:
> [   16.821620] Code: 0109 6597 0000 8593 5f65 7097 7f34 80e7 7aa0 b7a9
> (7139) f822
>
> So that looks like a "transient" error. Do you know if you uarch
> caches invalid TLB entries? If you don't know, I have just written
> some piece of code to determine if it does, let me know.
>
No I dont, can you please share the details so that I can pass on the
information to you.

> Do those errors always happen?
>
Yes they do.

Cheers,
Prabhakar
Alexandre Ghiti Sept. 7, 2023, 1:47 p.m. UTC | #12
Hi Nadav,

On 06/09/2023 22:22, Nadav Amit wrote:
>
>> On Sep 6, 2023, at 4:48 AM, Lad, Prabhakar <prabhakar.csengg@gmail.com> wrote:
>>
>> Hi Alexandre,
>>
>> On Tue, Aug 1, 2023 at 9:58 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
>>> This function used to simply flush the whole tlb of all harts, be more
>>> subtile and try to only flush the range.
>>>
>>> The problem is that we can only use PAGE_SIZE as stride since we don't know
>>> the size of the underlying mapping and then this function will be improved
>>> only if the size of the region to flush is < threshold * PAGE_SIZE.
>>>
>>> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
>>> Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
>>> ---
>>> arch/riscv/include/asm/tlbflush.h | 11 +++++-----
>>> arch/riscv/mm/tlbflush.c          | 34 +++++++++++++++++++++++--------
>>> 2 files changed, 31 insertions(+), 14 deletions(-)
>>>
>> After applying this patch, I am seeing module load issues on RZ/Five
>> (complete log [0]). I am testing defconfig + [1] (rz/five related
>> configs).
>>
>> Any pointers on what could be an issue here?
> None of my business, but looking at your code, it seems that you do not memory
> barrier before reading mm_cpumask() in __flush_tlb_range(). I believe you
> would want to synchronize __flush_tlb_range with switch_mm() similarly to the
> way it is done in x86.
>

Noted, I'll take a look at that, thanks for the advice!

Alex
Alexandre Ghiti Sept. 8, 2023, 12:34 p.m. UTC | #13
Hi Prabhakar,

On Thu, Sep 7, 2023 at 12:50 PM Lad, Prabhakar
<prabhakar.csengg@gmail.com> wrote:
>
> Hi Alexandre,
>
> On Thu, Sep 7, 2023 at 10:06 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> >
> > Hi Prabhakar,
> >
> > On Wed, Sep 6, 2023 at 3:55 PM Lad, Prabhakar
> > <prabhakar.csengg@gmail.com> wrote:
> > >
> > > Hi Alexandre,
> > >
> > > On Wed, Sep 6, 2023 at 1:43 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> > > >
> > > > On Wed, Sep 6, 2023 at 2:24 PM Lad, Prabhakar
> > > > <prabhakar.csengg@gmail.com> wrote:
> > > > >
> > > > > Hi Alexandre,
> > > > >
> > > > > On Wed, Sep 6, 2023 at 1:18 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> > > > > >
> > > > > > On Wed, Sep 6, 2023 at 2:09 PM Lad, Prabhakar
> > > > > > <prabhakar.csengg@gmail.com> wrote:
> > > > > > >
> > > > > > > Hi Alexandre,
> > > > > > >
> > > > > > > On Wed, Sep 6, 2023 at 1:01 PM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> > > > > > > >
> > > > > > > > Hi Prabhakar,
> > > > > > > >
> > > > > > > > On Wed, Sep 6, 2023 at 1:49 PM Lad, Prabhakar
> > > > > > > > <prabhakar.csengg@gmail.com> wrote:
> > > > > > > > >
> > > > > > > > > Hi Alexandre,
> > > > > > > > >
> > > > > > > > > On Tue, Aug 1, 2023 at 9:58 AM Alexandre Ghiti <alexghiti@rivosinc.com> wrote:
> > > > > > > > > >
> > > > > > > > > > This function used to simply flush the whole tlb of all harts, be more
> > > > > > > > > > subtile and try to only flush the range.
> > > > > > > > > >
> > > > > > > > > > The problem is that we can only use PAGE_SIZE as stride since we don't know
> > > > > > > > > > the size of the underlying mapping and then this function will be improved
> > > > > > > > > > only if the size of the region to flush is < threshold * PAGE_SIZE.
> > > > > > > > > >
> > > > > > > > > > Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> > > > > > > > > > Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
> > > > > > > > > > ---
> > > > > > > > > >  arch/riscv/include/asm/tlbflush.h | 11 +++++-----
> > > > > > > > > >  arch/riscv/mm/tlbflush.c          | 34 +++++++++++++++++++++++--------
> > > > > > > > > >  2 files changed, 31 insertions(+), 14 deletions(-)
> > > > > > > > > >
> > > > > > > > > After applying this patch, I am seeing module load issues on RZ/Five
> > > > > > > > > (complete log [0]). I am testing defconfig + [1] (rz/five related
> > > > > > > > > configs).
> > > > > > > > >
> > > > > > > > > Any pointers on what could be an issue here?
> > > > > > > >
> > > > > > > > Can you give me the exact version of the kernel you use? The trap
> > > > > > > > addresses are vmalloc addresses, and a fix for those landed very late
> > > > > > > > in the release cycle.
> > > > > > > >
> > > > > > > I am using next-20230906, Ive pushed a branch [1] for you to have a look.
> > > > > > >
> > > > > > > [0] https://github.com/prabhakarlad/linux/tree/rzfive-debug
> > > > > >
> > > > > > Great, thanks, I had to get rid of this possibility :)
> > > > > >
> > > > > > As-is, I have no idea, can you try to "bisect" the problem? I mean
> > > > > > which patch in the series leads to those traps?
> > > > > >
> > > > > Oops sorry for not mentioning earlier, this is the offending patch
> > > > > which leads to the issues seen on rz/five.
> > > >
> > > > Ok, so at least I found the following problem, but I don't see how
> > > > that could fix your issue: can you give a try anyway? I keep looking
> > > > into this, thanks
> > > >
> > > > diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
> > > > index df2a0838c3a1..b5692bc6c76a 100644
> > > > --- a/arch/riscv/mm/tlbflush.c
> > > > +++ b/arch/riscv/mm/tlbflush.c
> > > > @@ -239,7 +239,7 @@ void flush_tlb_range(struct vm_area_struct *vma,
> > > > unsigned long start,
> > > >
> > > >  void flush_tlb_kernel_range(unsigned long start, unsigned long end)
> > > >  {
> > > > -       __flush_tlb_range(NULL, start, end, PAGE_SIZE);
> > > > +       __flush_tlb_range(NULL, start, end - start, PAGE_SIZE);
> > > >  }
> > > >
> > > I am able to reproduce the issue with the above change too.
> >
> > I can't reproduce the problem on my Unmatched or Qemu, so it is not
> > easy to debug. But I took another look at your traces and something is
> > weird to me. In the following trace (and there is another one), the
> > trap is taken at 0xffffffff015ca034, which is the beginning of
> > rz_ssi_probe(): that's a page fault, so no translation was found (or
> > an invalid one is cached).
> >
> > [   16.586527] Unable to handle kernel paging request at virtual
> > address ffffffff015ca034
> > [   16.594750] Oops [#3]
> > ...
> > [   16.622000] epc : rz_ssi_probe+0x0/0x52a [snd_soc_rz_ssi]
> > ...
> > [   16.708697] status: 0000000200000120 badaddr: ffffffff015ca034
> > cause: 000000000000000c
> > [   16.716580] [<ffffffff015ca034>] rz_ssi_probe+0x0/0x52a
> > [snd_soc_rz_ssi]
> > ...
> >
> > But then here we are able to read the code at this same address:
> > [   16.821620] Code: 0109 6597 0000 8593 5f65 7097 7f34 80e7 7aa0 b7a9
> > (7139) f822
> >
> > So that looks like a "transient" error. Do you know if you uarch
> > caches invalid TLB entries? If you don't know, I have just written
> > some piece of code to determine if it does, let me know.
> >
> No I dont, can you please share the details so that I can pass on the
> information to you.
>
> > Do those errors always happen?
> >
> Yes they do.
>

I still can't reproduce those errors, I built different configs
including yours, insmod/rmmod a few modules but still can't reproduce
that. I'm having a hard time understanding how the correct mapping
magically appears in the trap handler. We finally removed this
patchset from 6.6...

You can give the following patch a try to determine if your uarch
caches invalid TLB entries, but honestly, I'm not sure if that would
help (but it will test my patch :)). The output can be seen in dmesg
"uarch caches invalid entries:".

If the trap addresses are constant, I would try to breakpoint on
flush_tlb_kernel_range() on those addresses and see what happens:
maybe that's an alignment issue or something else, maybe that's not
even called before the trap...etc. More info are welcome :)

Thanks!

diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 80af436c04ac..8f863b251898 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -58,6 +58,8 @@ bool pgtable_l5_enabled = IS_ENABLED(CONFIG_64BIT)
&& !IS_ENABLED(CONFIG_XIP_KER
 EXPORT_SYMBOL(pgtable_l4_enabled);
 EXPORT_SYMBOL(pgtable_l5_enabled);

+bool tlb_caching_invalid_entries;
+
 phys_addr_t phys_ram_base __ro_after_init;
 EXPORT_SYMBOL(phys_ram_base);

@@ -752,6 +754,18 @@ static void __init disable_pgtable_l4(void)
        satp_mode = SATP_MODE_39;
 }

+static void __init enable_pgtable_l5(void)
+{
+       pgtable_l5_enabled = true;
+       satp_mode = SATP_MODE_57;
+}
+
+static void __init enable_pgtable_l4(void)
+{
+       pgtable_l4_enabled = true;
+       satp_mode = SATP_MODE_48;
+}
+
 static int __init print_no4lvl(char *p)
 {
        pr_info("Disabled 4-level and 5-level paging");
@@ -828,6 +842,113 @@ static __init void set_satp_mode(uintptr_t dtb_pa)
        memset(early_pud, 0, PAGE_SIZE);
        memset(early_pmd, 0, PAGE_SIZE);
 }
+
+/* Determine at runtime if the uarch caches invalid TLB entries */
+static __init void set_tlb_caching_invalid_entries(void)
+{
+#define NR_RETRIES_CACHING_INVALID_ENTRIES     50
+       uintptr_t set_tlb_caching_invalid_entries_pmd = ((unsigned
long)set_tlb_caching_invalid_entries) & PMD_MASK;
+       // TODO the test_addr as defined below could go into another pud...
+       uintptr_t test_addr = set_tlb_caching_invalid_entries_pmd + 2
* PMD_SIZE;
+       pmd_t valid_pmd;
+       u64 satp;
+       int i = 0;
+
+       /* To ease the page table creation */
+       // TODO use variable instead, like in the clean, nop stap_mode too
+       disable_pgtable_l5();
+       disable_pgtable_l4();
+
+       /* Establish a mapping for set_tlb_caching_invalid_entries() in sv39 */
+       create_pgd_mapping(early_pg_dir,
+                          set_tlb_caching_invalid_entries_pmd,
+                          (uintptr_t)early_pmd,
+                          PGDIR_SIZE, PAGE_TABLE);
+
+       /* Handle the case where set_tlb_caching_invalid_entries
straddles 2 PMDs */
+       create_pmd_mapping(early_pmd,
+                          set_tlb_caching_invalid_entries_pmd,
+                          set_tlb_caching_invalid_entries_pmd,
+                          PMD_SIZE, PAGE_KERNEL_EXEC);
+       create_pmd_mapping(early_pmd,
+                          set_tlb_caching_invalid_entries_pmd + PMD_SIZE,
+                          set_tlb_caching_invalid_entries_pmd + PMD_SIZE,
+                          PMD_SIZE, PAGE_KERNEL_EXEC);
+
+       /* Establish an invalid mapping */
+       create_pmd_mapping(early_pmd, test_addr, 0, PMD_SIZE, __pgprot(0));
+
+       /* Precompute the valid pmd here because the mapping for
pfn_pmd() won't exist */
+       valid_pmd =
pfn_pmd(PFN_DOWN(set_tlb_caching_invalid_entries_pmd), PAGE_KERNEL);
+
+       local_flush_tlb_all();
+       satp = PFN_DOWN((uintptr_t)&early_pg_dir) | SATP_MODE_39;
+       csr_write(CSR_SATP, satp);
+
+       /*
+        * Set stvec to after the trapping access, access this invalid mapping
+        * and legitimately trap
+        */
+       // TODO: Should I save the previous stvec?
+#define ASM_STR(x)     __ASM_STR(x)
+       asm volatile(
+               "la a0, 1f                              \n"
+               "csrw " ASM_STR(CSR_TVEC) ", a0         \n"
+               "ld a0, 0(%0)                           \n"
+               ".align 2                               \n"
+               "1:                                     \n"
+               :
+               : "r" (test_addr)
+               : "a0"
+       );
+
+       /* Now establish a valid mapping to check if the invalid one
is cached */
+       early_pmd[pmd_index(test_addr)] = valid_pmd;
+
+       /*
+        * Access the valid mapping multiple times: indeed, we can't use
+        * sfence.vma as a barrier to make sure the cpu did not reorder accesses
+        * so we may trap even if the uarch does not cache invalid entries. By
+        * trying a few times, we make sure that those uarchs will see the right
+        * mapping at some point.
+        */
+
+       i = NR_RETRIES_CACHING_INVALID_ENTRIES;
+
+#define ASM_STR(x)     __ASM_STR(x)
+       asm_volatile_goto(
+               "la a0, 1f                                      \n"
+               "csrw " ASM_STR(CSR_TVEC) ", a0                 \n"
+               ".align 2                                       \n"
+               "1:                                             \n"
+               "addi %0, %0, -1                                \n"
+               "blt %0, zero, %l[caching_invalid_entries]      \n"
+               "ld a0, 0(%1)                                   \n"
+               :
+               : "r" (i), "r" (test_addr)
+               : "a0"
+               : caching_invalid_entries
+       );
+
+       csr_write(CSR_SATP, 0ULL);
+       local_flush_tlb_all();
+
+       /* If we don't trap, the uarch does not cache invalid entries! */
+       tlb_caching_invalid_entries = false;
+       goto clean;
+
+caching_invalid_entries:
+       csr_write(CSR_SATP, 0ULL);
+       local_flush_tlb_all();
+
+       tlb_caching_invalid_entries = true;
+clean:
+       memset(early_pg_dir, 0, PAGE_SIZE);
+       memset(early_pmd, 0, PAGE_SIZE);
+
+       enable_pgtable_l4();
+       enable_pgtable_l5();
+}
 #endif

 /*
@@ -1040,6 +1161,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 #endif

 #if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL)
+       set_tlb_caching_invalid_entries();
        set_satp_mode(dtb_pa);
 #endif

@@ -1290,6 +1412,9 @@ static void __init setup_vm_final(void)
        local_flush_tlb_all();

        pt_ops_set_late();
+
+       pr_info("uarch caches invalid entries: %s",
+               tlb_caching_invalid_entries ? "yes": "no");
 }
 #else
 asmlinkage void __init setup_vm(uintptr_t dtb_pa)


> Cheers,
> Prabhakar
Samuel Holland Sept. 9, 2023, 7 p.m. UTC | #14
Hi Alex,

On 8/1/23 03:54, Alexandre Ghiti wrote:
> This function used to simply flush the whole tlb of all harts, be more
> subtile and try to only flush the range.
> 
> The problem is that we can only use PAGE_SIZE as stride since we don't know
> the size of the underlying mapping and then this function will be improved
> only if the size of the region to flush is < threshold * PAGE_SIZE.
> 
> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
> Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
> ---
>  arch/riscv/include/asm/tlbflush.h | 11 +++++-----
>  arch/riscv/mm/tlbflush.c          | 34 +++++++++++++++++++++++--------
>  2 files changed, 31 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
> index f5c4fb0ae642..7426fdcd8ec5 100644
> --- a/arch/riscv/include/asm/tlbflush.h
> +++ b/arch/riscv/include/asm/tlbflush.h
> @@ -37,6 +37,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
>  void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr);
>  void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
>  		     unsigned long end);
> +void flush_tlb_kernel_range(unsigned long start, unsigned long end);
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>  #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
>  void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
> @@ -53,15 +54,15 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
>  	local_flush_tlb_all();
>  }
>  
> -#define flush_tlb_mm(mm) flush_tlb_all()
> -#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all()
> -#endif /* !CONFIG_SMP || !CONFIG_MMU */
> -
>  /* Flush a range of kernel pages */
>  static inline void flush_tlb_kernel_range(unsigned long start,
>  	unsigned long end)
>  {
> -	flush_tlb_all();
> +	local_flush_tlb_all();
>  }
>  
> +#define flush_tlb_mm(mm) flush_tlb_all()
> +#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all()
> +#endif /* !CONFIG_SMP || !CONFIG_MMU */
> +
>  #endif /* _ASM_RISCV_TLBFLUSH_H */
> diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
> index 0c955c474f3a..687808013758 100644
> --- a/arch/riscv/mm/tlbflush.c
> +++ b/arch/riscv/mm/tlbflush.c
> @@ -120,18 +120,27 @@ static void __flush_tlb_range(struct mm_struct *mm, unsigned long start,
>  			      unsigned long size, unsigned long stride)
>  {
>  	struct flush_tlb_range_data ftd;
> -	struct cpumask *cmask = mm_cpumask(mm);
> -	unsigned int cpuid;
> +	struct cpumask *cmask, full_cmask;
>  	bool broadcast;
>  
> -	if (cpumask_empty(cmask))
> -		return;
> +	if (mm) {
> +		unsigned int cpuid;
> +
> +		cmask = mm_cpumask(mm);
> +		if (cpumask_empty(cmask))
> +			return;
> +
> +		cpuid = get_cpu();
> +		/* check if the tlbflush needs to be sent to other CPUs */
> +		broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids;
> +	} else {
> +		cpumask_setall(&full_cmask);
> +		cmask = &full_cmask;
> +		broadcast = true;
> +	}
>  
> -	cpuid = get_cpu();
> -	/* check if the tlbflush needs to be sent to other CPUs */
> -	broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids;
>  	if (static_branch_unlikely(&use_asid_allocator)) {
> -		unsigned long asid = atomic_long_read(&mm->context.id) & asid_mask;
> +		unsigned long asid = mm ? atomic_long_read(&mm->context.id) & asid_mask : 0;

I think the bug is here. Passing a value of 0 for the ASID is not the
same as passing the ASID in register x0. Only in the latter case does
the TLB flush apply to global mappings, which is what you need for
flush_tlb_kernel_range().

Regards,
Samuel

>  
>  		if (broadcast) {
>  			if (riscv_use_ipi_for_rfence()) {
> @@ -165,7 +174,8 @@ static void __flush_tlb_range(struct mm_struct *mm, unsigned long start,
>  		}
>  	}
>  
> -	put_cpu();
> +	if (mm)
> +		put_cpu();
>  }
>  
>  void flush_tlb_mm(struct mm_struct *mm)
> @@ -196,6 +206,12 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
>  
>  	__flush_tlb_range(vma->vm_mm, start, end - start, stride_size);
>  }
> +
> +void flush_tlb_kernel_range(unsigned long start, unsigned long end)
> +{
> +	__flush_tlb_range(NULL, start, end, PAGE_SIZE);
> +}
> +
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>  void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
>  			unsigned long end)
Alexandre Ghiti Sept. 11, 2023, 8:33 a.m. UTC | #15
On 09/09/2023 21:00, Samuel Holland wrote:
> Hi Alex,
>
> On 8/1/23 03:54, Alexandre Ghiti wrote:
>> This function used to simply flush the whole tlb of all harts, be more
>> subtile and try to only flush the range.
>>
>> The problem is that we can only use PAGE_SIZE as stride since we don't know
>> the size of the underlying mapping and then this function will be improved
>> only if the size of the region to flush is < threshold * PAGE_SIZE.
>>
>> Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
>> Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
>> ---
>>   arch/riscv/include/asm/tlbflush.h | 11 +++++-----
>>   arch/riscv/mm/tlbflush.c          | 34 +++++++++++++++++++++++--------
>>   2 files changed, 31 insertions(+), 14 deletions(-)
>>
>> diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
>> index f5c4fb0ae642..7426fdcd8ec5 100644
>> --- a/arch/riscv/include/asm/tlbflush.h
>> +++ b/arch/riscv/include/asm/tlbflush.h
>> @@ -37,6 +37,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
>>   void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr);
>>   void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
>>   		     unsigned long end);
>> +void flush_tlb_kernel_range(unsigned long start, unsigned long end);
>>   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>>   #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
>>   void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
>> @@ -53,15 +54,15 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
>>   	local_flush_tlb_all();
>>   }
>>   
>> -#define flush_tlb_mm(mm) flush_tlb_all()
>> -#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all()
>> -#endif /* !CONFIG_SMP || !CONFIG_MMU */
>> -
>>   /* Flush a range of kernel pages */
>>   static inline void flush_tlb_kernel_range(unsigned long start,
>>   	unsigned long end)
>>   {
>> -	flush_tlb_all();
>> +	local_flush_tlb_all();
>>   }
>>   
>> +#define flush_tlb_mm(mm) flush_tlb_all()
>> +#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all()
>> +#endif /* !CONFIG_SMP || !CONFIG_MMU */
>> +
>>   #endif /* _ASM_RISCV_TLBFLUSH_H */
>> diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
>> index 0c955c474f3a..687808013758 100644
>> --- a/arch/riscv/mm/tlbflush.c
>> +++ b/arch/riscv/mm/tlbflush.c
>> @@ -120,18 +120,27 @@ static void __flush_tlb_range(struct mm_struct *mm, unsigned long start,
>>   			      unsigned long size, unsigned long stride)
>>   {
>>   	struct flush_tlb_range_data ftd;
>> -	struct cpumask *cmask = mm_cpumask(mm);
>> -	unsigned int cpuid;
>> +	struct cpumask *cmask, full_cmask;
>>   	bool broadcast;
>>   
>> -	if (cpumask_empty(cmask))
>> -		return;
>> +	if (mm) {
>> +		unsigned int cpuid;
>> +
>> +		cmask = mm_cpumask(mm);
>> +		if (cpumask_empty(cmask))
>> +			return;
>> +
>> +		cpuid = get_cpu();
>> +		/* check if the tlbflush needs to be sent to other CPUs */
>> +		broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids;
>> +	} else {
>> +		cpumask_setall(&full_cmask);
>> +		cmask = &full_cmask;
>> +		broadcast = true;
>> +	}
>>   
>> -	cpuid = get_cpu();
>> -	/* check if the tlbflush needs to be sent to other CPUs */
>> -	broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids;
>>   	if (static_branch_unlikely(&use_asid_allocator)) {
>> -		unsigned long asid = atomic_long_read(&mm->context.id) & asid_mask;
>> +		unsigned long asid = mm ? atomic_long_read(&mm->context.id) & asid_mask : 0;
> I think the bug is here. Passing a value of 0 for the ASID is not the
> same as passing the ASID in register x0. Only in the latter case does
> the TLB flush apply to global mappings, which is what you need for
> flush_tlb_kernel_range().


Fantastic, thank you, I was miles away from finding this! Really nice 
catch, thanks again.

I'm fixing this and while doing so, I may be stepping a bit on your 
patchset (some code removal), sorry about that. I'll provide a new 
version quickly for Prabhakar to test, and we'll see how we'll rebase 
each other series.

Thanks again Samuel, well done!

Alex


> Regards,
> Samuel
>
>>   
>>   		if (broadcast) {
>>   			if (riscv_use_ipi_for_rfence()) {
>> @@ -165,7 +174,8 @@ static void __flush_tlb_range(struct mm_struct *mm, unsigned long start,
>>   		}
>>   	}
>>   
>> -	put_cpu();
>> +	if (mm)
>> +		put_cpu();
>>   }
>>   
>>   void flush_tlb_mm(struct mm_struct *mm)
>> @@ -196,6 +206,12 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
>>   
>>   	__flush_tlb_range(vma->vm_mm, start, end - start, stride_size);
>>   }
>> +
>> +void flush_tlb_kernel_range(unsigned long start, unsigned long end)
>> +{
>> +	__flush_tlb_range(NULL, start, end, PAGE_SIZE);
>> +}
>> +
>>   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>>   void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
>>   			unsigned long end)
>
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
diff mbox series

Patch

diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index f5c4fb0ae642..7426fdcd8ec5 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -37,6 +37,7 @@  void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr);
 void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 		     unsigned long end);
+void flush_tlb_kernel_range(unsigned long start, unsigned long end);
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
 void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
@@ -53,15 +54,15 @@  static inline void flush_tlb_range(struct vm_area_struct *vma,
 	local_flush_tlb_all();
 }
 
-#define flush_tlb_mm(mm) flush_tlb_all()
-#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all()
-#endif /* !CONFIG_SMP || !CONFIG_MMU */
-
 /* Flush a range of kernel pages */
 static inline void flush_tlb_kernel_range(unsigned long start,
 	unsigned long end)
 {
-	flush_tlb_all();
+	local_flush_tlb_all();
 }
 
+#define flush_tlb_mm(mm) flush_tlb_all()
+#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all()
+#endif /* !CONFIG_SMP || !CONFIG_MMU */
+
 #endif /* _ASM_RISCV_TLBFLUSH_H */
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index 0c955c474f3a..687808013758 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -120,18 +120,27 @@  static void __flush_tlb_range(struct mm_struct *mm, unsigned long start,
 			      unsigned long size, unsigned long stride)
 {
 	struct flush_tlb_range_data ftd;
-	struct cpumask *cmask = mm_cpumask(mm);
-	unsigned int cpuid;
+	struct cpumask *cmask, full_cmask;
 	bool broadcast;
 
-	if (cpumask_empty(cmask))
-		return;
+	if (mm) {
+		unsigned int cpuid;
+
+		cmask = mm_cpumask(mm);
+		if (cpumask_empty(cmask))
+			return;
+
+		cpuid = get_cpu();
+		/* check if the tlbflush needs to be sent to other CPUs */
+		broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids;
+	} else {
+		cpumask_setall(&full_cmask);
+		cmask = &full_cmask;
+		broadcast = true;
+	}
 
-	cpuid = get_cpu();
-	/* check if the tlbflush needs to be sent to other CPUs */
-	broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids;
 	if (static_branch_unlikely(&use_asid_allocator)) {
-		unsigned long asid = atomic_long_read(&mm->context.id) & asid_mask;
+		unsigned long asid = mm ? atomic_long_read(&mm->context.id) & asid_mask : 0;
 
 		if (broadcast) {
 			if (riscv_use_ipi_for_rfence()) {
@@ -165,7 +174,8 @@  static void __flush_tlb_range(struct mm_struct *mm, unsigned long start,
 		}
 	}
 
-	put_cpu();
+	if (mm)
+		put_cpu();
 }
 
 void flush_tlb_mm(struct mm_struct *mm)
@@ -196,6 +206,12 @@  void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 
 	__flush_tlb_range(vma->vm_mm, start, end - start, stride_size);
 }
+
+void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+{
+	__flush_tlb_range(NULL, start, end, PAGE_SIZE);
+}
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start,
 			unsigned long end)