diff mbox series

[v4,1/1] riscv: mm: use svinval instructions instead of sfence.vma

Message ID 20230621124133.779572-2-mchitale@ventanamicro.com (mailing list archive)
State Superseded
Headers show
Series Risc-V Svinval support | expand

Checks

Context Check Description
conchuod/cover_letter success Series has a cover letter
conchuod/tree_selection success Guessed tree name to be for-next at HEAD 4681dacadeef
conchuod/fixes_present success Fixes tag not required for -next series
conchuod/maintainers_pattern success MAINTAINERS pattern errors before the patch: 6 and now 6
conchuod/verify_signedoff success Signed-off-by tag matches author and committer
conchuod/kdoc success Errors and warnings before: 0 this patch: 0
conchuod/build_rv64_clang_allmodconfig success Errors and warnings before: 8 this patch: 8
conchuod/module_param success Was 0 now: 0
conchuod/build_rv64_gcc_allmodconfig success Errors and warnings before: 8 this patch: 8
conchuod/build_rv32_defconfig success Build OK
conchuod/dtb_warn_rv64 success Errors and warnings before: 20 this patch: 20
conchuod/header_inline success No static functions without inline keyword in header files
conchuod/checkpatch warning CHECK: spaces preferred around that '%' (ctx:BxV) CHECK: spaces preferred around that '%' (ctx:WxV)
conchuod/build_rv64_nommu_k210_defconfig success Build OK
conchuod/verify_fixes success No Fixes tag
conchuod/build_rv64_nommu_virt_defconfig success Build OK

Commit Message

Mayuresh Chitale June 21, 2023, 12:41 p.m. UTC
When svinval is supported the local_flush_tlb_page*
functions would prefer to use the following sequence
to optimize the tlb flushes instead of a simple sfence.vma:

sfence.w.inval
svinval.vma
  .
  .
svinval.vma
sfence.inval.ir

The maximum number of consecutive svinval.vma instructions
that can be executed in local_flush_tlb_page* functions is
limited to PTRS_PER_PTE. This is required to avoid soft
lockups and the approach is similar to that used in arm64.

Signed-off-by: Mayuresh Chitale <mchitale@ventanamicro.com>
---
 arch/riscv/mm/tlbflush.c | 62 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 56 insertions(+), 6 deletions(-)

Comments

Andrew Jones June 21, 2023, 2:14 p.m. UTC | #1
On Wed, Jun 21, 2023 at 06:11:33PM +0530, Mayuresh Chitale wrote:
> When svinval is supported the local_flush_tlb_page*
> functions would prefer to use the following sequence
> to optimize the tlb flushes instead of a simple sfence.vma:
> 
> sfence.w.inval
> svinval.vma
>   .
>   .
> svinval.vma
> sfence.inval.ir
> 
> The maximum number of consecutive svinval.vma instructions
> that can be executed in local_flush_tlb_page* functions is
> limited to PTRS_PER_PTE. This is required to avoid soft
> lockups and the approach is similar to that used in arm64.
> 
> Signed-off-by: Mayuresh Chitale <mchitale@ventanamicro.com>
> ---
>  arch/riscv/mm/tlbflush.c | 62 ++++++++++++++++++++++++++++++++++++----
>  1 file changed, 56 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
> index 77be59aadc73..ade0b5cf8b47 100644
> --- a/arch/riscv/mm/tlbflush.c
> +++ b/arch/riscv/mm/tlbflush.c
> @@ -5,6 +5,12 @@
>  #include <linux/sched.h>
>  #include <asm/sbi.h>
>  #include <asm/mmu_context.h>
> +#include <asm/hwcap.h>
> +#include <asm/insn-def.h>
> +
> +#define has_svinval()	riscv_has_extension_unlikely(RISCV_ISA_EXT_SVINVAL)
> +
> +static unsigned long tlb_flush_all_threshold __read_mostly = PTRS_PER_PTE;
>  
>  static inline void local_flush_tlb_all_asid(unsigned long asid)
>  {
> @@ -26,19 +32,63 @@ static inline void local_flush_tlb_page_asid(unsigned long addr,
>  static inline void local_flush_tlb_range(unsigned long start,
>  		unsigned long size, unsigned long stride)
>  {
> -	if (size <= stride)
> -		local_flush_tlb_page(start);
> -	else
> +	if ((size / stride) <= tlb_flush_all_threshold) {
> +		if (has_svinval()) {
> +			asm volatile(SFENCE_W_INVAL() ::: "memory");
> +			while (size) {
> +				asm volatile(SINVAL_VMA(%0, zero)
> +					     : : "r" (start) : "memory");
> +				start += stride;
> +				if (size > stride)
> +					size -= stride;
> +				else
> +					size = 0;
> +			}

nit: The four while loops added by this patch could all be written more
concisely as

  unsigned long end = start + size;

  while (start < end) {
    /* flush one */
    start += stride;
  }

And we could shift everything one level of indentation left with

  if ((size / stride) > tlb_flush_all_threshold) {
    local_flush_tlb_all();
    return;
  }

  if (has_svinval()) {
  ...

Thanks,
drew

> +			asm volatile(SFENCE_INVAL_IR() ::: "memory");
> +		} else {
> +			while (size) {
> +				local_flush_tlb_page(start);
> +				start += stride;
> +				if (size > stride)
> +					size -= stride;
> +				else
> +					size = 0;
> +			}
> +		}
> +	} else {
>  		local_flush_tlb_all();
> +	}
>  }
>  
>  static inline void local_flush_tlb_range_asid(unsigned long start,
>  		unsigned long size, unsigned long stride, unsigned long asid)
>  {
> -	if (size <= stride)
> -		local_flush_tlb_page_asid(start, asid);
> -	else
> +	if ((size / stride) <= tlb_flush_all_threshold) {
> +		if (has_svinval()) {
> +			asm volatile(SFENCE_W_INVAL() ::: "memory");
> +			while (size) {
> +				asm volatile(SINVAL_VMA(%0, %1) : : "r" (start),
> +					     "r" (asid) : "memory");
> +				start += stride;
> +				if (size > stride)
> +					size -= stride;
> +				else
> +					size = 0;
> +			}
> +			asm volatile(SFENCE_INVAL_IR() ::: "memory");
> +		} else {
> +			while (size) {
> +				local_flush_tlb_page_asid(start, asid);
> +				start += stride;
> +				if (size > stride)
> +					size -= stride;
> +				else
> +					size = 0;
> +			}
> +		}
> +	} else {
>  		local_flush_tlb_all_asid(asid);
> +	}
>  }
>  
>  static void __ipi_flush_tlb_all(void *info)
> -- 
> 2.34.1
> 
> 
> _______________________________________________
> linux-riscv mailing list
> linux-riscv@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-riscv
Alexandre Ghiti June 21, 2023, 3:44 p.m. UTC | #2
Hi Mayuresh,

On 21/06/2023 14:41, Mayuresh Chitale wrote:
> When svinval is supported the local_flush_tlb_page*
> functions would prefer to use the following sequence
> to optimize the tlb flushes instead of a simple sfence.vma:
>
> sfence.w.inval
> svinval.vma
>    .
>    .
> svinval.vma
> sfence.inval.ir
>
> The maximum number of consecutive svinval.vma instructions
> that can be executed in local_flush_tlb_page* functions is
> limited to PTRS_PER_PTE. This is required to avoid soft
> lockups and the approach is similar to that used in arm64.
>
> Signed-off-by: Mayuresh Chitale <mchitale@ventanamicro.com>
> ---
>   arch/riscv/mm/tlbflush.c | 62 ++++++++++++++++++++++++++++++++++++----
>   1 file changed, 56 insertions(+), 6 deletions(-)
>
> diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
> index 77be59aadc73..ade0b5cf8b47 100644
> --- a/arch/riscv/mm/tlbflush.c
> +++ b/arch/riscv/mm/tlbflush.c
> @@ -5,6 +5,12 @@
>   #include <linux/sched.h>
>   #include <asm/sbi.h>
>   #include <asm/mmu_context.h>
> +#include <asm/hwcap.h>
> +#include <asm/insn-def.h>
> +
> +#define has_svinval()	riscv_has_extension_unlikely(RISCV_ISA_EXT_SVINVAL)
> +
> +static unsigned long tlb_flush_all_threshold __read_mostly = PTRS_PER_PTE;


The threshold is quite high to me: internally, we computed that 
something like 50 would be a good bet, because to me, 512 * sfence.vma 
(or svinval equivalent) takes way more time than just flushing the whole 
tlb (even with the whole refill I'd say). How did you get this number? 
And this value is micro-architecture dependent, so we need to find a 
consensus or a mechanism to allow a vendor to change it.

FYI, x86 threshold is 33 (can't find right now the pointer sorry).


>   
>   static inline void local_flush_tlb_all_asid(unsigned long asid)
>   {
> @@ -26,19 +32,63 @@ static inline void local_flush_tlb_page_asid(unsigned long addr,
>   static inline void local_flush_tlb_range(unsigned long start,
>   		unsigned long size, unsigned long stride)
>   {
> -	if (size <= stride)
> -		local_flush_tlb_page(start);
> -	else
> +	if ((size / stride) <= tlb_flush_all_threshold) {


If size is not aligned on stride, you could get another page to flush so 
you do not respect the threshold. In my patchset, I used DIV_ROUND_UP.


> +		if (has_svinval()) {
> +			asm volatile(SFENCE_W_INVAL() ::: "memory");
> +			while (size) {
> +				asm volatile(SINVAL_VMA(%0, zero)
> +					     : : "r" (start) : "memory");
> +				start += stride;
> +				if (size > stride)
> +					size -= stride;
> +				else
> +					size = 0;
> +			}
> +			asm volatile(SFENCE_INVAL_IR() ::: "memory");
> +		} else {
> +			while (size) {
> +				local_flush_tlb_page(start);
> +				start += stride;
> +				if (size > stride)
> +					size -= stride;
> +				else
> +					size = 0;
> +			}
> +		}
> +	} else {
>   		local_flush_tlb_all();
> +	}
>   }
>   
>   static inline void local_flush_tlb_range_asid(unsigned long start,
>   		unsigned long size, unsigned long stride, unsigned long asid)
>   {
> -	if (size <= stride)
> -		local_flush_tlb_page_asid(start, asid);
> -	else
> +	if ((size / stride) <= tlb_flush_all_threshold) {
> +		if (has_svinval()) {
> +			asm volatile(SFENCE_W_INVAL() ::: "memory");
> +			while (size) {
> +				asm volatile(SINVAL_VMA(%0, %1) : : "r" (start),
> +					     "r" (asid) : "memory");
> +				start += stride;
> +				if (size > stride)
> +					size -= stride;
> +				else
> +					size = 0;
> +			}
> +			asm volatile(SFENCE_INVAL_IR() ::: "memory");
> +		} else {
> +			while (size) {
> +				local_flush_tlb_page_asid(start, asid);
> +				start += stride;
> +				if (size > stride)
> +					size -= stride;
> +				else
> +					size = 0;
> +			}
> +		}
> +	} else {
>   		local_flush_tlb_all_asid(asid);
> +	}
>   }
>   
>   static void __ipi_flush_tlb_all(void *info)
Mayuresh Chitale June 22, 2023, 2:13 p.m. UTC | #3
On Wed, Jun 21, 2023 at 7:44 PM Andrew Jones <ajones@ventanamicro.com> wrote:
>
> On Wed, Jun 21, 2023 at 06:11:33PM +0530, Mayuresh Chitale wrote:
> > When svinval is supported the local_flush_tlb_page*
> > functions would prefer to use the following sequence
> > to optimize the tlb flushes instead of a simple sfence.vma:
> >
> > sfence.w.inval
> > svinval.vma
> >   .
> >   .
> > svinval.vma
> > sfence.inval.ir
> >
> > The maximum number of consecutive svinval.vma instructions
> > that can be executed in local_flush_tlb_page* functions is
> > limited to PTRS_PER_PTE. This is required to avoid soft
> > lockups and the approach is similar to that used in arm64.
> >
> > Signed-off-by: Mayuresh Chitale <mchitale@ventanamicro.com>
> > ---
> >  arch/riscv/mm/tlbflush.c | 62 ++++++++++++++++++++++++++++++++++++----
> >  1 file changed, 56 insertions(+), 6 deletions(-)
> >
> > diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
> > index 77be59aadc73..ade0b5cf8b47 100644
> > --- a/arch/riscv/mm/tlbflush.c
> > +++ b/arch/riscv/mm/tlbflush.c
> > @@ -5,6 +5,12 @@
> >  #include <linux/sched.h>
> >  #include <asm/sbi.h>
> >  #include <asm/mmu_context.h>
> > +#include <asm/hwcap.h>
> > +#include <asm/insn-def.h>
> > +
> > +#define has_svinval()        riscv_has_extension_unlikely(RISCV_ISA_EXT_SVINVAL)
> > +
> > +static unsigned long tlb_flush_all_threshold __read_mostly = PTRS_PER_PTE;
> >
> >  static inline void local_flush_tlb_all_asid(unsigned long asid)
> >  {
> > @@ -26,19 +32,63 @@ static inline void local_flush_tlb_page_asid(unsigned long addr,
> >  static inline void local_flush_tlb_range(unsigned long start,
> >               unsigned long size, unsigned long stride)
> >  {
> > -     if (size <= stride)
> > -             local_flush_tlb_page(start);
> > -     else
> > +     if ((size / stride) <= tlb_flush_all_threshold) {
> > +             if (has_svinval()) {
> > +                     asm volatile(SFENCE_W_INVAL() ::: "memory");
> > +                     while (size) {
> > +                             asm volatile(SINVAL_VMA(%0, zero)
> > +                                          : : "r" (start) : "memory");
> > +                             start += stride;
> > +                             if (size > stride)
> > +                                     size -= stride;
> > +                             else
> > +                                     size = 0;
> > +                     }
>
> nit: The four while loops added by this patch could all be written more
> concisely as
>
>   unsigned long end = start + size;
>
>   while (start < end) {
>     /* flush one */
>     start += stride;
>   }
>
> And we could shift everything one level of indentation left with
>
>   if ((size / stride) > tlb_flush_all_threshold) {
>     local_flush_tlb_all();
>     return;
>   }
>
>   if (has_svinval()) {

Thx Drew. I will update in the next revision
>   ...
>
> Thanks,
> drew
>
> > +                     asm volatile(SFENCE_INVAL_IR() ::: "memory");
> > +             } else {
> > +                     while (size) {
> > +                             local_flush_tlb_page(start);
> > +                             start += stride;
> > +                             if (size > stride)
> > +                                     size -= stride;
> > +                             else
> > +                                     size = 0;
> > +                     }
> > +             }
> > +     } else {
> >               local_flush_tlb_all();
> > +     }
> >  }
> >
> >  static inline void local_flush_tlb_range_asid(unsigned long start,
> >               unsigned long size, unsigned long stride, unsigned long asid)
> >  {
> > -     if (size <= stride)
> > -             local_flush_tlb_page_asid(start, asid);
> > -     else
> > +     if ((size / stride) <= tlb_flush_all_threshold) {
> > +             if (has_svinval()) {
> > +                     asm volatile(SFENCE_W_INVAL() ::: "memory");
> > +                     while (size) {
> > +                             asm volatile(SINVAL_VMA(%0, %1) : : "r" (start),
> > +                                          "r" (asid) : "memory");
> > +                             start += stride;
> > +                             if (size > stride)
> > +                                     size -= stride;
> > +                             else
> > +                                     size = 0;
> > +                     }
> > +                     asm volatile(SFENCE_INVAL_IR() ::: "memory");
> > +             } else {
> > +                     while (size) {
> > +                             local_flush_tlb_page_asid(start, asid);
> > +                             start += stride;
> > +                             if (size > stride)
> > +                                     size -= stride;
> > +                             else
> > +                                     size = 0;
> > +                     }
> > +             }
> > +     } else {
> >               local_flush_tlb_all_asid(asid);
> > +     }
> >  }
> >
> >  static void __ipi_flush_tlb_all(void *info)
> > --
> > 2.34.1
> >
> >
> > _______________________________________________
> > linux-riscv mailing list
> > linux-riscv@lists.infradead.org
> > http://lists.infradead.org/mailman/listinfo/linux-riscv
Mayuresh Chitale June 23, 2023, 6:58 a.m. UTC | #4
Hi Alex,

On Wed, Jun 21, 2023 at 9:14 PM Alexandre Ghiti <alex@ghiti.fr> wrote:
>
> Hi Mayuresh,
>
> On 21/06/2023 14:41, Mayuresh Chitale wrote:
> > When svinval is supported the local_flush_tlb_page*
> > functions would prefer to use the following sequence
> > to optimize the tlb flushes instead of a simple sfence.vma:
> >
> > sfence.w.inval
> > svinval.vma
> >    .
> >    .
> > svinval.vma
> > sfence.inval.ir
> >
> > The maximum number of consecutive svinval.vma instructions
> > that can be executed in local_flush_tlb_page* functions is
> > limited to PTRS_PER_PTE. This is required to avoid soft
> > lockups and the approach is similar to that used in arm64.
> >
> > Signed-off-by: Mayuresh Chitale <mchitale@ventanamicro.com>
> > ---
> >   arch/riscv/mm/tlbflush.c | 62 ++++++++++++++++++++++++++++++++++++----
> >   1 file changed, 56 insertions(+), 6 deletions(-)
> >
> > diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
> > index 77be59aadc73..ade0b5cf8b47 100644
> > --- a/arch/riscv/mm/tlbflush.c
> > +++ b/arch/riscv/mm/tlbflush.c
> > @@ -5,6 +5,12 @@
> >   #include <linux/sched.h>
> >   #include <asm/sbi.h>
> >   #include <asm/mmu_context.h>
> > +#include <asm/hwcap.h>
> > +#include <asm/insn-def.h>
> > +
> > +#define has_svinval()        riscv_has_extension_unlikely(RISCV_ISA_EXT_SVINVAL)
> > +
> > +static unsigned long tlb_flush_all_threshold __read_mostly = PTRS_PER_PTE;
>
>
> The threshold is quite high to me: internally, we computed that
> something like 50 would be a good bet, because to me, 512 * sfence.vma
> (or svinval equivalent) takes way more time than just flushing the whole
> tlb (even with the whole refill I'd say). How did you get this number?
> And this value is micro-architecture dependent, so we need to find a
> consensus or a mechanism to allow a vendor to change it.
I had borrowed this limit from ARM64 but I agree that this is
micro-architecture specific.
So how about we make this a global variable and let the platform override it?
>
> FYI, x86 threshold is 33 (can't find right now the pointer sorry).
>
>
> >
> >   static inline void local_flush_tlb_all_asid(unsigned long asid)
> >   {
> > @@ -26,19 +32,63 @@ static inline void local_flush_tlb_page_asid(unsigned long addr,
> >   static inline void local_flush_tlb_range(unsigned long start,
> >               unsigned long size, unsigned long stride)
> >   {
> > -     if (size <= stride)
> > -             local_flush_tlb_page(start);
> > -     else
> > +     if ((size / stride) <= tlb_flush_all_threshold) {
>
>
> If size is not aligned on stride, you could get another page to flush so
> you do not respect the threshold. In my patchset, I used DIV_ROUND_UP.
Ok. Will change it in the next revision.
>
>
> > +             if (has_svinval()) {
> > +                     asm volatile(SFENCE_W_INVAL() ::: "memory");
> > +                     while (size) {
> > +                             asm volatile(SINVAL_VMA(%0, zero)
> > +                                          : : "r" (start) : "memory");
> > +                             start += stride;
> > +                             if (size > stride)
> > +                                     size -= stride;
> > +                             else
> > +                                     size = 0;
> > +                     }
> > +                     asm volatile(SFENCE_INVAL_IR() ::: "memory");
> > +             } else {
> > +                     while (size) {
> > +                             local_flush_tlb_page(start);
> > +                             start += stride;
> > +                             if (size > stride)
> > +                                     size -= stride;
> > +                             else
> > +                                     size = 0;
> > +                     }
> > +             }
> > +     } else {
> >               local_flush_tlb_all();
> > +     }
> >   }
> >
> >   static inline void local_flush_tlb_range_asid(unsigned long start,
> >               unsigned long size, unsigned long stride, unsigned long asid)
> >   {
> > -     if (size <= stride)
> > -             local_flush_tlb_page_asid(start, asid);
> > -     else
> > +     if ((size / stride) <= tlb_flush_all_threshold) {
> > +             if (has_svinval()) {
> > +                     asm volatile(SFENCE_W_INVAL() ::: "memory");
> > +                     while (size) {
> > +                             asm volatile(SINVAL_VMA(%0, %1) : : "r" (start),
> > +                                          "r" (asid) : "memory");
> > +                             start += stride;
> > +                             if (size > stride)
> > +                                     size -= stride;
> > +                             else
> > +                                     size = 0;
> > +                     }
> > +                     asm volatile(SFENCE_INVAL_IR() ::: "memory");
> > +             } else {
> > +                     while (size) {
> > +                             local_flush_tlb_page_asid(start, asid);
> > +                             start += stride;
> > +                             if (size > stride)
> > +                                     size -= stride;
> > +                             else
> > +                                     size = 0;
> > +                     }
> > +             }
> > +     } else {
> >               local_flush_tlb_all_asid(asid);
> > +     }
> >   }
> >
> >   static void __ipi_flush_tlb_all(void *info)
diff mbox series

Patch

diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index 77be59aadc73..ade0b5cf8b47 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -5,6 +5,12 @@ 
 #include <linux/sched.h>
 #include <asm/sbi.h>
 #include <asm/mmu_context.h>
+#include <asm/hwcap.h>
+#include <asm/insn-def.h>
+
+#define has_svinval()	riscv_has_extension_unlikely(RISCV_ISA_EXT_SVINVAL)
+
+static unsigned long tlb_flush_all_threshold __read_mostly = PTRS_PER_PTE;
 
 static inline void local_flush_tlb_all_asid(unsigned long asid)
 {
@@ -26,19 +32,63 @@  static inline void local_flush_tlb_page_asid(unsigned long addr,
 static inline void local_flush_tlb_range(unsigned long start,
 		unsigned long size, unsigned long stride)
 {
-	if (size <= stride)
-		local_flush_tlb_page(start);
-	else
+	if ((size / stride) <= tlb_flush_all_threshold) {
+		if (has_svinval()) {
+			asm volatile(SFENCE_W_INVAL() ::: "memory");
+			while (size) {
+				asm volatile(SINVAL_VMA(%0, zero)
+					     : : "r" (start) : "memory");
+				start += stride;
+				if (size > stride)
+					size -= stride;
+				else
+					size = 0;
+			}
+			asm volatile(SFENCE_INVAL_IR() ::: "memory");
+		} else {
+			while (size) {
+				local_flush_tlb_page(start);
+				start += stride;
+				if (size > stride)
+					size -= stride;
+				else
+					size = 0;
+			}
+		}
+	} else {
 		local_flush_tlb_all();
+	}
 }
 
 static inline void local_flush_tlb_range_asid(unsigned long start,
 		unsigned long size, unsigned long stride, unsigned long asid)
 {
-	if (size <= stride)
-		local_flush_tlb_page_asid(start, asid);
-	else
+	if ((size / stride) <= tlb_flush_all_threshold) {
+		if (has_svinval()) {
+			asm volatile(SFENCE_W_INVAL() ::: "memory");
+			while (size) {
+				asm volatile(SINVAL_VMA(%0, %1) : : "r" (start),
+					     "r" (asid) : "memory");
+				start += stride;
+				if (size > stride)
+					size -= stride;
+				else
+					size = 0;
+			}
+			asm volatile(SFENCE_INVAL_IR() ::: "memory");
+		} else {
+			while (size) {
+				local_flush_tlb_page_asid(start, asid);
+				start += stride;
+				if (size > stride)
+					size -= stride;
+				else
+					size = 0;
+			}
+		}
+	} else {
 		local_flush_tlb_all_asid(asid);
+	}
 }
 
 static void __ipi_flush_tlb_all(void *info)