diff mbox series

[v7,1/2] mm/tlbbatch: Introduce arch_tlbbatch_should_defer()

Message ID 20221117082648.47526-2-yangyicong@huawei.com (mailing list archive)
State New, archived
Headers show
Series arm64: support batched/deferred tlb shootdown during page reclamation | expand

Commit Message

Yicong Yang Nov. 17, 2022, 8:26 a.m. UTC
From: Anshuman Khandual <khandual@linux.vnet.ibm.com>

The entire scheme of deferred TLB flush in reclaim path rests on the
fact that the cost to refill TLB entries is less than flushing out
individual entries by sending IPI to remote CPUs. But architecture
can have different ways to evaluate that. Hence apart from checking
TTU_BATCH_FLUSH in the TTU flags, rest of the decision should be
architecture specific.

Signed-off-by: Anshuman Khandual <khandual@linux.vnet.ibm.com>
[https://lore.kernel.org/linuxppc-dev/20171101101735.2318-2-khandual@linux.vnet.ibm.com/]
Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
[Rebase and fix incorrect return value type]
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: Barry Song <baohua@kernel.org>
Reviewed-by: Xin Hao <xhao@linux.alibaba.com>
Tested-by: Punit Agrawal <punit.agrawal@bytedance.com>
---
 arch/x86/include/asm/tlbflush.h | 12 ++++++++++++
 mm/rmap.c                       |  9 +--------
 2 files changed, 13 insertions(+), 8 deletions(-)

Comments

Andrew Morton Nov. 29, 2022, 11:23 p.m. UTC | #1
On Thu, 17 Nov 2022 16:26:47 +0800 Yicong Yang <yangyicong@huawei.com> wrote:

> From: Anshuman Khandual <khandual@linux.vnet.ibm.com>
> 
> The entire scheme of deferred TLB flush in reclaim path rests on the
> fact that the cost to refill TLB entries is less than flushing out
> individual entries by sending IPI to remote CPUs. But architecture
> can have different ways to evaluate that. Hence apart from checking
> TTU_BATCH_FLUSH in the TTU flags, rest of the decision should be
> architecture specific.
> 
> ...
>
> --- a/arch/x86/include/asm/tlbflush.h
> +++ b/arch/x86/include/asm/tlbflush.h
> @@ -240,6 +240,18 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
>  	flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, PAGE_SHIFT, false);
>  }
>  
> +static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
> +{
> +	bool should_defer = false;
> +
> +	/* If remote CPUs need to be flushed then defer batch the flush */
> +	if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids)
> +		should_defer = true;
> +	put_cpu();
> +
> +	return should_defer;
> +}
> +
>  static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
>  {
>  	/*
> diff --git a/mm/rmap.c b/mm/rmap.c
> index 2ec925e5fa6a..a9ab10bc0144 100644
> --- a/mm/rmap.c
> +++ b/mm/rmap.c
> @@ -685,17 +685,10 @@ static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
>   */
>  static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
>  {
> -	bool should_defer = false;
> -
>  	if (!(flags & TTU_BATCH_FLUSH))
>  		return false;
>  
> -	/* If remote CPUs need to be flushed then defer batch the flush */
> -	if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids)
> -		should_defer = true;
> -	put_cpu();
> -
> -	return should_defer;
> +	return arch_tlbbatch_should_defer(mm);
>  }

I think this conversion could have been done better.

should_defer_flush() is compiled if
CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH.  So the patch implicitly
assumes that only x86 implements
CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH.  Presently true, but what
happens if sparc (for example) wants to set
CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH?  Now sparc needs its private
version of arch_tlbbatch_should_defer(), even if that is identical to
x86's.

Wouldn't it be better to make arch_tlbbatch_should_defer() a __weak
function in rmap.c, or a static inline inside #ifndef
ARCH_HAS_ARCH_TLBBATCH_SHOULD_DEFER, or whatever technique best fits?
Yicong Yang Nov. 30, 2022, 2:23 a.m. UTC | #2
On 2022/11/30 7:23, Andrew Morton wrote:
> On Thu, 17 Nov 2022 16:26:47 +0800 Yicong Yang <yangyicong@huawei.com> wrote:
> 
>> From: Anshuman Khandual <khandual@linux.vnet.ibm.com>
>>
>> The entire scheme of deferred TLB flush in reclaim path rests on the
>> fact that the cost to refill TLB entries is less than flushing out
>> individual entries by sending IPI to remote CPUs. But architecture
>> can have different ways to evaluate that. Hence apart from checking
>> TTU_BATCH_FLUSH in the TTU flags, rest of the decision should be
>> architecture specific.
>>
>> ...
>>
>> --- a/arch/x86/include/asm/tlbflush.h
>> +++ b/arch/x86/include/asm/tlbflush.h
>> @@ -240,6 +240,18 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
>>  	flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, PAGE_SHIFT, false);
>>  }
>>  
>> +static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
>> +{
>> +	bool should_defer = false;
>> +
>> +	/* If remote CPUs need to be flushed then defer batch the flush */
>> +	if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids)
>> +		should_defer = true;
>> +	put_cpu();
>> +
>> +	return should_defer;
>> +}
>> +
>>  static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
>>  {
>>  	/*
>> diff --git a/mm/rmap.c b/mm/rmap.c
>> index 2ec925e5fa6a..a9ab10bc0144 100644
>> --- a/mm/rmap.c
>> +++ b/mm/rmap.c
>> @@ -685,17 +685,10 @@ static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
>>   */
>>  static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
>>  {
>> -	bool should_defer = false;
>> -
>>  	if (!(flags & TTU_BATCH_FLUSH))
>>  		return false;
>>  
>> -	/* If remote CPUs need to be flushed then defer batch the flush */
>> -	if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids)
>> -		should_defer = true;
>> -	put_cpu();
>> -
>> -	return should_defer;
>> +	return arch_tlbbatch_should_defer(mm);
>>  }
> 
> I think this conversion could have been done better.
> 
> should_defer_flush() is compiled if
> CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH.  So the patch implicitly
> assumes that only x86 implements
> CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH.  Presently true, but what
> happens if sparc (for example) wants to set
> CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH?  Now sparc needs its private
> version of arch_tlbbatch_should_defer(), even if that is identical to
> x86's.
> 

The current logic is if architecture want to enable batched TLB flush, they
need to implement their own version of arch_tlbbatch_should_defer() (for the
hint to defer the TLB flush) and arch_tlbbatch_add_mm() (for pending TLB flush)
and select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH. That's what we do in Patch 2/2 for
enabling this on arm64.

Since it is architecture specific, we must rely on the architecture to implement
these two functions. Only select the ARCH_HAS_ARCH_TLBBATCH_SHOULD_DEFER is not
enough.

> Wouldn't it be better to make should_defer_flush() a __weak
> function in rmap.c, or a static inline inside #ifndef
> ARCH_HAS_ARCH_TLBBATCH_SHOULD_DEFER, or whatever technique best fits?
> 

When ARCH_HAS_ARCH_TLBBATCH_SHOULD_DEFER is not selected, should_defer_flush()
is implemented to only return false. I think this match what you want already.

Thanks.
Anshuman Khandual Nov. 30, 2022, 2:57 a.m. UTC | #3
On 11/30/22 07:53, Yicong Yang wrote:
> On 2022/11/30 7:23, Andrew Morton wrote:
>> On Thu, 17 Nov 2022 16:26:47 +0800 Yicong Yang <yangyicong@huawei.com> wrote:
>>
>>> From: Anshuman Khandual <khandual@linux.vnet.ibm.com>
>>>
>>> The entire scheme of deferred TLB flush in reclaim path rests on the
>>> fact that the cost to refill TLB entries is less than flushing out
>>> individual entries by sending IPI to remote CPUs. But architecture
>>> can have different ways to evaluate that. Hence apart from checking
>>> TTU_BATCH_FLUSH in the TTU flags, rest of the decision should be
>>> architecture specific.
>>>
>>> ...
>>>
>>> --- a/arch/x86/include/asm/tlbflush.h
>>> +++ b/arch/x86/include/asm/tlbflush.h
>>> @@ -240,6 +240,18 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
>>>  	flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, PAGE_SHIFT, false);
>>>  }
>>>  
>>> +static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
>>> +{
>>> +	bool should_defer = false;
>>> +
>>> +	/* If remote CPUs need to be flushed then defer batch the flush */
>>> +	if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids)
>>> +		should_defer = true;
>>> +	put_cpu();
>>> +
>>> +	return should_defer;
>>> +}
>>> +
>>>  static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
>>>  {
>>>  	/*
>>> diff --git a/mm/rmap.c b/mm/rmap.c
>>> index 2ec925e5fa6a..a9ab10bc0144 100644
>>> --- a/mm/rmap.c
>>> +++ b/mm/rmap.c
>>> @@ -685,17 +685,10 @@ static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
>>>   */
>>>  static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
>>>  {
>>> -	bool should_defer = false;
>>> -
>>>  	if (!(flags & TTU_BATCH_FLUSH))
>>>  		return false;
>>>  
>>> -	/* If remote CPUs need to be flushed then defer batch the flush */
>>> -	if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids)
>>> -		should_defer = true;
>>> -	put_cpu();
>>> -
>>> -	return should_defer;
>>> +	return arch_tlbbatch_should_defer(mm);
>>>  }
>>
>> I think this conversion could have been done better.
>>
>> should_defer_flush() is compiled if
>> CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH.  So the patch implicitly
>> assumes that only x86 implements
>> CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH.  Presently true, but what
>> happens if sparc (for example) wants to set
>> CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH?  Now sparc needs its private
>> version of arch_tlbbatch_should_defer(), even if that is identical to
>> x86's.
>>
> 
> The current logic is if architecture want to enable batched TLB flush, they
> need to implement their own version of arch_tlbbatch_should_defer() (for the
> hint to defer the TLB flush) and arch_tlbbatch_add_mm() (for pending TLB flush)
> and select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH. That's what we do in Patch 2/2 for
> enabling this on arm64.
> 
> Since it is architecture specific, we must rely on the architecture to implement
> these two functions. Only select the ARCH_HAS_ARCH_TLBBATCH_SHOULD_DEFER is not
> enough.
> 
>> Wouldn't it be better to make should_defer_flush() a __weak
>> function in rmap.c, or a static inline inside #ifndef
>> ARCH_HAS_ARCH_TLBBATCH_SHOULD_DEFER, or whatever technique best fits?
>>
> 
> When ARCH_HAS_ARCH_TLBBATCH_SHOULD_DEFER is not selected, should_defer_flush()
> is implemented to only return false. I think this match what you want already.

Right, platform needs to provide both the helpers arch_tlbbatch_should_defer() and
arch_tlbbatch_add_mm() before ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH can be selected.
Otherwise there is a fallback should_defer_flush() definition which always return
negative when ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH is not selected.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index cda3118f3b27..8a497d902c16 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -240,6 +240,18 @@  static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
 	flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, PAGE_SHIFT, false);
 }
 
+static inline bool arch_tlbbatch_should_defer(struct mm_struct *mm)
+{
+	bool should_defer = false;
+
+	/* If remote CPUs need to be flushed then defer batch the flush */
+	if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids)
+		should_defer = true;
+	put_cpu();
+
+	return should_defer;
+}
+
 static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
 {
 	/*
diff --git a/mm/rmap.c b/mm/rmap.c
index 2ec925e5fa6a..a9ab10bc0144 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -685,17 +685,10 @@  static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)
  */
 static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
 {
-	bool should_defer = false;
-
 	if (!(flags & TTU_BATCH_FLUSH))
 		return false;
 
-	/* If remote CPUs need to be flushed then defer batch the flush */
-	if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids)
-		should_defer = true;
-	put_cpu();
-
-	return should_defer;
+	return arch_tlbbatch_should_defer(mm);
 }
 
 /*