diff mbox series

[v3,2/2] arm64: optimize flush tlb kernel range

Message ID 20240923131351.713304-3-wangkefeng.wang@huawei.com (mailing list archive)
State New, archived
Headers show
Series arm64: tlbflush: optimize flush tlb kernel range | expand

Commit Message

Kefeng Wang Sept. 23, 2024, 1:13 p.m. UTC
Currently the kernel TLBs is flushed page by page if the target
VA range is less than MAX_DVM_OPS * PAGE_SIZE, otherwise we'll
brutally issue a TLBI ALL.

But we could optimize it when CPU supports TLB range operations,
convert to use __flush_tlb_range_op() like other tlb range flush
to improve performance.

Co-developed-by: Yicong Yang <yangyicong@hisilicon.com>
Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
---
 arch/arm64/include/asm/tlbflush.h | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

Comments

Anshuman Khandual Sept. 24, 2024, 3:48 a.m. UTC | #1
On 9/23/24 18:43, Kefeng Wang wrote:
> Currently the kernel TLBs is flushed page by page if the target
> VA range is less than MAX_DVM_OPS * PAGE_SIZE, otherwise we'll
> brutally issue a TLBI ALL.
> 
> But we could optimize it when CPU supports TLB range operations,
> convert to use __flush_tlb_range_op() like other tlb range flush
> to improve performance.
> 
> Co-developed-by: Yicong Yang <yangyicong@hisilicon.com>
> Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
> Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
> ---
>  arch/arm64/include/asm/tlbflush.h | 16 +++++++++-------
>  1 file changed, 9 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
> index 5f5e7d1f2e7d..bc94e036a26b 100644
> --- a/arch/arm64/include/asm/tlbflush.h
> +++ b/arch/arm64/include/asm/tlbflush.h
> @@ -501,19 +501,21 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
>  
>  static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
>  {
> -	unsigned long addr;
> +	const unsigned long stride = PAGE_SIZE;
> +	unsigned long pages;
>  
> -	if ((end - start) > (MAX_DVM_OPS * PAGE_SIZE)) {
> +	start = round_down(start, stride);
> +	end = round_up(end, stride);
> +	pages = (end - start) >> PAGE_SHIFT;
> +
> +	if (__flush_tlb_range_limit_excess(start, end, pages, stride)) {
>  		flush_tlb_all();
>  		return;
>  	}
>  
> -	start = __TLBI_VADDR(start, 0);
> -	end = __TLBI_VADDR(end, 0);
> -
>  	dsb(ishst);
> -	for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
> -		__tlbi(vaale1is, addr);
> +	__flush_tlb_range_op(vaale1is, start, pages, stride, 0,
> +			     TLBI_TTL_UNKNOWN, false, lpa2_is_enabled());
>  	dsb(ish);
>  	isb();
>  }

Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 5f5e7d1f2e7d..bc94e036a26b 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -501,19 +501,21 @@  static inline void flush_tlb_range(struct vm_area_struct *vma,
 
 static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 {
-	unsigned long addr;
+	const unsigned long stride = PAGE_SIZE;
+	unsigned long pages;
 
-	if ((end - start) > (MAX_DVM_OPS * PAGE_SIZE)) {
+	start = round_down(start, stride);
+	end = round_up(end, stride);
+	pages = (end - start) >> PAGE_SHIFT;
+
+	if (__flush_tlb_range_limit_excess(start, end, pages, stride)) {
 		flush_tlb_all();
 		return;
 	}
 
-	start = __TLBI_VADDR(start, 0);
-	end = __TLBI_VADDR(end, 0);
-
 	dsb(ishst);
-	for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
-		__tlbi(vaale1is, addr);
+	__flush_tlb_range_op(vaale1is, start, pages, stride, 0,
+			     TLBI_TTL_UNKNOWN, false, lpa2_is_enabled());
 	dsb(ish);
 	isb();
 }