diff mbox series

[V3,2/2] riscv: Use use_asid_allocator flush TLB

Message ID 1621945447-38820-3-git-send-email-guoren@kernel.org (mailing list archive)
State New, archived
Headers show
Series riscv: Fixup asid_allocator remaining issues | expand

Commit Message

Guo Ren May 25, 2021, 12:24 p.m. UTC
From: Guo Ren <guoren@linux.alibaba.com>

Use static_branch_unlikely(&use_asid_allocator) to keep the origin
tlb flush style, so it's no effect on the existing machine. Here
are the optimized functions:
 - flush_tlb_mm
 - flush_tlb_page
 - flush_tlb_range

All above are based on the below new implement functions:
 - __sbi_tlb_flush_range_asid
 - local_flush_tlb_range_asid

Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Reviewed-by: Anup Patel <anup.patel@wdc.com>
Cc: Palmer Dabbelt <palmerdabbelt@google.com>
Cc: Christoph Hellwig <hch@lst.de>
---
 arch/riscv/include/asm/mmu_context.h |  2 ++
 arch/riscv/include/asm/tlbflush.h    | 21 +++++++++++++++++++
 arch/riscv/mm/context.c              |  2 +-
 arch/riscv/mm/tlbflush.c             | 40 +++++++++++++++++++++++++++++++++---
 4 files changed, 61 insertions(+), 4 deletions(-)

Comments

Christoph Hellwig May 25, 2021, 12:35 p.m. UTC | #1
On Tue, May 25, 2021 at 12:24:07PM +0000, guoren@kernel.org wrote:
> From: Guo Ren <guoren@linux.alibaba.com>
> 
> Use static_branch_unlikely(&use_asid_allocator) to keep the origin
> tlb flush style, so it's no effect on the existing machine. Here
> are the optimized functions:
>  - flush_tlb_mm
>  - flush_tlb_page
>  - flush_tlb_range
> 
> All above are based on the below new implement functions:
>  - __sbi_tlb_flush_range_asid
>  - local_flush_tlb_range_asid


This mentiones what functions you're changing, but not what the
substantial change is, and more importantly why you change it.

> +static inline void local_flush_tlb_range_asid(unsigned long start, unsigned long size,
> +					      unsigned long asid)

Crazy long line.  Should be:

static inline void local_flush_tlb_range_asid(unsigned long start,
		unsigned long size, unsigned long asid)

> +{
> +	unsigned long tmp = start & PAGE_MASK;
> +	unsigned long end = ALIGN(start + size, PAGE_SIZE);
> +
> +	if (size == -1) {
> +		__asm__ __volatile__ ("sfence.vma x0, %0" : : "r" (asid) : "memory");
> +		return;

Please split the global (size == -1) case into separate helpers.

> +	while(tmp < end) {

Missing whitespace befre the (.

Also I think this would read nicer as:

	for (tmp = start & PAGE_MASK; tmp < end; tmp += PAGE_SIZE)

> +static void __sbi_tlb_flush_range_asid(struct cpumask *cmask, unsigned long start,
> +				       unsigned long size, unsigned long asid)

Another overly long line.

Also for all thee __sbi_* functions, why the __ prefix?

> +	if (cpumask_any_but(cmask, cpuid) >= nr_cpu_ids) {
> +		local_flush_tlb_range_asid(start, size, asid);
> +	} else {
> +		riscv_cpuid_to_hartid_mask(cmask, &hmask);
> +		sbi_remote_sfence_vma_asid(cpumask_bits(&hmask), start, size, asid);

Another long line (and a few more later).
Guo Ren May 26, 2021, 3:12 a.m. UTC | #2
On Tue, May 25, 2021 at 8:35 PM Christoph Hellwig <hch@lst.de> wrote:
>
> On Tue, May 25, 2021 at 12:24:07PM +0000, guoren@kernel.org wrote:
> > From: Guo Ren <guoren@linux.alibaba.com>
> >
> > Use static_branch_unlikely(&use_asid_allocator) to keep the origin
> > tlb flush style, so it's no effect on the existing machine. Here
> > are the optimized functions:
> >  - flush_tlb_mm
> >  - flush_tlb_page
> >  - flush_tlb_range
> >
> > All above are based on the below new implement functions:
> >  - __sbi_tlb_flush_range_asid
> >  - local_flush_tlb_range_asid
>
>
> This mentiones what functions you're changing, but not what the
> substantial change is, and more importantly why you change it.
>
> > +static inline void local_flush_tlb_range_asid(unsigned long start, unsigned long size,
> > +                                           unsigned long asid)
>
> Crazy long line.  Should be:
>
> static inline void local_flush_tlb_range_asid(unsigned long start,
>                 unsigned long size, unsigned long asid)
>
> > +{
> > +     unsigned long tmp = start & PAGE_MASK;
> > +     unsigned long end = ALIGN(start + size, PAGE_SIZE);
> > +
> > +     if (size == -1) {
> > +             __asm__ __volatile__ ("sfence.vma x0, %0" : : "r" (asid) : "memory");
> > +             return;
>
> Please split the global (size == -1) case into separate helpers.
Do you mean:
        if (size == -1) {
                __asm__ __volatile__ ("sfence.vma x0, %0"
                                :
                                : "r" (asid)
                                : "memory");
        } else {
                for (tmp = start & PAGE_MASK; tmp < end; tmp += PAGE_SIZE) {
                        __asm__ __volatile__ ("sfence.vma %0, %1"
                                        :
                                        : "r" (tmp), "r" (asid)
                                        : "memory");
                        tmp += PAGE_SIZE;
                }
        }

>
> > +     while(tmp < end) {
>
> Missing whitespace befre the (.
>
> Also I think this would read nicer as:
>
>         for (tmp = start & PAGE_MASK; tmp < end; tmp += PAGE_SIZE)
>
> > +static void __sbi_tlb_flush_range_asid(struct cpumask *cmask, unsigned long start,
> > +                                    unsigned long size, unsigned long asid)
>
> Another overly long line.
>
> Also for all thee __sbi_* functions, why the __ prefix?
I just follow the previous coding convention by __sbi_tlb_flush_range.
If you don't like it, I think it should be another coding convention
patchset.
This patchset is only to add the functions of tlb_flush_with_asid.

>
> > +     if (cpumask_any_but(cmask, cpuid) >= nr_cpu_ids) {
> > +             local_flush_tlb_range_asid(start, size, asid);
> > +     } else {
> > +             riscv_cpuid_to_hartid_mask(cmask, &hmask);
> > +             sbi_remote_sfence_vma_asid(cpumask_bits(&hmask), start, size, asid);
>
> Another long line (and a few more later).
Christoph Hellwig May 26, 2021, 5:23 a.m. UTC | #3
On Wed, May 26, 2021 at 11:12:40AM +0800, Guo Ren wrote:
> > static inline void local_flush_tlb_range_asid(unsigned long start,
> >                 unsigned long size, unsigned long asid)
> >
> > > +{
> > > +     unsigned long tmp = start & PAGE_MASK;
> > > +     unsigned long end = ALIGN(start + size, PAGE_SIZE);
> > > +
> > > +     if (size == -1) {
> > > +             __asm__ __volatile__ ("sfence.vma x0, %0" : : "r" (asid) : "memory");
> > > +             return;
> >
> > Please split the global (size == -1) case into separate helpers.
> Do you mean:

No.  Basically a

static inline void local_flush_tlb_ll_asid(unsigned long asid)
{
	__asm__ __volatile__ ("sfence.vma x0, %0"
				:
				: "r" (asid)
				: "memory");
}

and

static inline void local_flush_tlb_range_asid(unsigned long start,
		unsigned long size, unsigned long asid)

{
	unsigned long end = ALIGN(start + size, PAGE_SIZE), tmp;

	for (tmp = start & PAGE_MASK; tmp < end; tmp += PAGE_SIZE) {
		__asm__ __volatile__ ("sfence.vma %0, %1"
					:
					: "r" (tmp), "r" (asid)
					: "memory");
	}
}
diff mbox series

Patch

diff --git a/arch/riscv/include/asm/mmu_context.h b/arch/riscv/include/asm/mmu_context.h
index b065941..7030837 100644
--- a/arch/riscv/include/asm/mmu_context.h
+++ b/arch/riscv/include/asm/mmu_context.h
@@ -33,6 +33,8 @@  static inline int init_new_context(struct task_struct *tsk,
 	return 0;
 }
 
+DECLARE_STATIC_KEY_FALSE(use_asid_allocator);
+
 #include <asm-generic/mmu_context.h>
 
 #endif /* _ASM_RISCV_MMU_CONTEXT_H */
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index c84218a..4b33e7f 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -22,9 +22,30 @@  static inline void local_flush_tlb_page(unsigned long addr)
 {
 	ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory"));
 }
+
+static inline void local_flush_tlb_range_asid(unsigned long start, unsigned long size,
+					      unsigned long asid)
+{
+	unsigned long tmp = start & PAGE_MASK;
+	unsigned long end = ALIGN(start + size, PAGE_SIZE);
+
+	if (size == -1) {
+		__asm__ __volatile__ ("sfence.vma x0, %0" : : "r" (asid) : "memory");
+		return;
+	}
+
+	while(tmp < end) {
+		__asm__ __volatile__ ("sfence.vma %0, %1"
+				:
+				: "r" (tmp), "r" (asid)
+				: "memory");
+		tmp += PAGE_SIZE;
+	}
+}
 #else /* CONFIG_MMU */
 #define local_flush_tlb_all()			do { } while (0)
 #define local_flush_tlb_page(addr)		do { } while (0)
+#define local_flush_tlb_range_asid(addr)	do { } while (0)
 #endif /* CONFIG_MMU */
 
 #if defined(CONFIG_SMP) && defined(CONFIG_MMU)
diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
index 68aa312..45c1b04 100644
--- a/arch/riscv/mm/context.c
+++ b/arch/riscv/mm/context.c
@@ -18,7 +18,7 @@ 
 
 #ifdef CONFIG_MMU
 
-static DEFINE_STATIC_KEY_FALSE(use_asid_allocator);
+DEFINE_STATIC_KEY_FALSE(use_asid_allocator);
 
 static unsigned long asid_bits;
 static unsigned long num_asids;
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index 720b443..69588dc 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -4,6 +4,7 @@ 
 #include <linux/smp.h>
 #include <linux/sched.h>
 #include <asm/sbi.h>
+#include <asm/mmu_context.h>
 
 void flush_tlb_all(void)
 {
@@ -39,18 +40,51 @@  static void __sbi_tlb_flush_range(struct cpumask *cmask, unsigned long start,
 	put_cpu();
 }
 
+static void __sbi_tlb_flush_range_asid(struct cpumask *cmask, unsigned long start,
+				       unsigned long size, unsigned long asid)
+{
+	struct cpumask hmask;
+	unsigned int cpuid;
+
+	if (cpumask_empty(cmask))
+		return;
+
+	cpuid = get_cpu();
+
+	if (cpumask_any_but(cmask, cpuid) >= nr_cpu_ids) {
+		local_flush_tlb_range_asid(start, size, asid);
+	} else {
+		riscv_cpuid_to_hartid_mask(cmask, &hmask);
+		sbi_remote_sfence_vma_asid(cpumask_bits(&hmask), start, size, asid);
+	}
+
+	put_cpu();
+}
+
 void flush_tlb_mm(struct mm_struct *mm)
 {
-	__sbi_tlb_flush_range(mm_cpumask(mm), 0, -1);
+	if (static_branch_unlikely(&use_asid_allocator))
+		__sbi_tlb_flush_range_asid(mm_cpumask(mm), 0, -1,
+					   atomic_long_read(&mm->context.id));
+	else
+		__sbi_tlb_flush_range(mm_cpumask(mm), 0, -1);
 }
 
 void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
 {
-	__sbi_tlb_flush_range(mm_cpumask(vma->vm_mm), addr, PAGE_SIZE);
+	if (static_branch_unlikely(&use_asid_allocator))
+		__sbi_tlb_flush_range_asid(mm_cpumask(vma->vm_mm), addr, PAGE_SIZE,
+					   atomic_long_read(&vma->vm_mm->context.id));
+	else
+		__sbi_tlb_flush_range(mm_cpumask(vma->vm_mm), addr, PAGE_SIZE);
 }
 
 void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
 		     unsigned long end)
 {
-	__sbi_tlb_flush_range(mm_cpumask(vma->vm_mm), start, end - start);
+	if (static_branch_unlikely(&use_asid_allocator))
+		__sbi_tlb_flush_range_asid(mm_cpumask(vma->vm_mm), start, end - start,
+					   atomic_long_read(&vma->vm_mm->context.id));
+	else
+		__sbi_tlb_flush_range(mm_cpumask(vma->vm_mm), start, end - start);
 }