diff mbox series

[v9,03/12] x86/mm: consolidate full flush threshold decision

Message ID 20250206044346.3810242-4-riel@surriel.com (mailing list archive)
State New
Headers show
Series AMD broadcast TLB invalidation | expand

Commit Message

Rik van Riel Feb. 6, 2025, 4:43 a.m. UTC
Reduce code duplication by consolidating the decision point
for whether to do individual invalidations or a full flush
inside get_flush_tlb_info.

Signed-off-by: Rik van Riel <riel@surriel.com>
Suggested-by: Dave Hansen <dave.hansen@intel.com>
---
 arch/x86/mm/tlb.c | 56 ++++++++++++++++++++++++++---------------------
 1 file changed, 31 insertions(+), 25 deletions(-)

Comments

Brendan Jackman Feb. 7, 2025, 2:50 p.m. UTC | #1
On Thu, 6 Feb 2025 at 05:45, Rik van Riel <riel@surriel.com> wrote:
> @@ -1276,7 +1282,7 @@ void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
>
>         int cpu = get_cpu();
>
> -       info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false,
> +       info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, PAGE_SHIFT, false,
>                                   TLB_GENERATION_INVALID);

[Why] do we need this change? If it's necessary here, why isn't it
needed everywhere else that does TLB_FLUSH_ALL too, like
flush_tlb_mm()?
Rik van Riel Feb. 7, 2025, 8:22 p.m. UTC | #2
On Fri, 2025-02-07 at 15:50 +0100, Brendan Jackman wrote:
> On Thu, 6 Feb 2025 at 05:45, Rik van Riel <riel@surriel.com> wrote:
> > @@ -1276,7 +1282,7 @@ void arch_tlbbatch_flush(struct
> > arch_tlbflush_unmap_batch *batch)
> > 
> >         int cpu = get_cpu();
> > 
> > -       info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false,
> > +       info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL,
> > PAGE_SHIFT, false,
> >                                   TLB_GENERATION_INVALID);
> 
> [Why] do we need this change? If it's necessary here, why isn't it
> needed everywhere else that does TLB_FLUSH_ALL too, like
> flush_tlb_mm()?
> 
flush_tlb_mm() calls flush_tlb_mm_range(), which
does also use get_flush_tlb_info().

We pass in PAGE_SHIFT here to ensure that the
stride shift is specified correctly to the
INVLPGB instruction later on.
Brendan Jackman Feb. 10, 2025, 11:15 a.m. UTC | #3
On Fri, 7 Feb 2025 at 21:25, Rik van Riel <riel@surriel.com> wrote:
>
> On Fri, 2025-02-07 at 15:50 +0100, Brendan Jackman wrote:
> > On Thu, 6 Feb 2025 at 05:45, Rik van Riel <riel@surriel.com> wrote:
> > > @@ -1276,7 +1282,7 @@ void arch_tlbbatch_flush(struct
> > > arch_tlbflush_unmap_batch *batch)
> > >
> > >         int cpu = get_cpu();
> > >
> > > -       info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false,
> > > +       info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL,
> > > PAGE_SHIFT, false,
> > >                                   TLB_GENERATION_INVALID);
> >
> > [Why] do we need this change? If it's necessary here, why isn't it
> > needed everywhere else that does TLB_FLUSH_ALL too, like
> > flush_tlb_mm()?
> >
> flush_tlb_mm() calls flush_tlb_mm_range(), which
> does also use get_flush_tlb_info().
>
> We pass in PAGE_SHIFT here to ensure that the
> stride shift is specified correctly to the
> INVLPGB instruction later on.

Hm I don't follow your answer here so lemme just state my current
understanding more verbosely...

flush_tlb_mm() indirectly calls get_flush_tlb_info() with
end=TLB_FLUSH_ALL and stride_shift=0. That's an invalid stride shift,
but this doesn't break anything because with TLB_FLUSH_ALL the stride
shift is never used (except to check for values above PMD_SHIFT).

So here I think passing stride_shift=0 would be fine too. Concretely,
here we specifically call invlpgb_flush_all_nonglobals() which doesn't
care about stride_shift, and flush_tlb_func() is the same as before
this patchset in this respect.

I would be quite happy with removing the "it's invalid but it doesn't
matter" thing everywhere but this seems to be localised and I think
the issue is orthogonal to the presence of invlpgb.
Rik van Riel Feb. 10, 2025, 7:12 p.m. UTC | #4
On Fri, 2025-02-07 at 15:50 +0100, Brendan Jackman wrote:
> On Thu, 6 Feb 2025 at 05:45, Rik van Riel <riel@surriel.com> wrote:
> > @@ -1276,7 +1282,7 @@ void arch_tlbbatch_flush(struct
> > arch_tlbflush_unmap_batch *batch)
> > 
> >         int cpu = get_cpu();
> > 
> > -       info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false,
> > +       info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL,
> > PAGE_SHIFT, false,
> >                                   TLB_GENERATION_INVALID);
> 
> [Why] do we need this change? If it's necessary here, why isn't it
> needed everywhere else that does TLB_FLUSH_ALL too, like
> flush_tlb_mm()?
> 
You are right, we do not actually need this.

I'll drop this for v10
diff mbox series

Patch

diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 6cf881a942bb..36939b104561 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -1000,8 +1000,13 @@  static struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm,
 	BUG_ON(this_cpu_inc_return(flush_tlb_info_idx) != 1);
 #endif
 
-	info->start		= start;
-	info->end		= end;
+	/*
+	 * Round the start and end addresses to the page size specified
+	 * by the stride shift. This ensures partial pages at the end of
+	 * a range get fully invalidated.
+	 */
+	info->start		= round_down(start, 1 << stride_shift);
+	info->end		= round_up(end, 1 << stride_shift);
 	info->mm		= mm;
 	info->stride_shift	= stride_shift;
 	info->freed_tables	= freed_tables;
@@ -1009,6 +1014,19 @@  static struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm,
 	info->initiating_cpu	= smp_processor_id();
 	info->trim_cpumask	= 0;
 
+	WARN_ONCE(start != info->start || end != info->end,
+		  "TLB flush not stride %x aligned. Start %lx, end %lx\n",
+		  1 << stride_shift, start, end);
+
+	/*
+	 * If the number of flushes is so large that a full flush
+	 * would be faster, do a full flush.
+	 */
+	if ((end - start) >> stride_shift > tlb_single_page_flush_ceiling) {
+		info->start = 0;
+		info->end = TLB_FLUSH_ALL;
+	}
+
 	return info;
 }
 
@@ -1026,17 +1044,8 @@  void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
 				bool freed_tables)
 {
 	struct flush_tlb_info *info;
+	int cpu = get_cpu();
 	u64 new_tlb_gen;
-	int cpu;
-
-	cpu = get_cpu();
-
-	/* Should we flush just the requested range? */
-	if ((end == TLB_FLUSH_ALL) ||
-	    ((end - start) >> stride_shift) > tlb_single_page_flush_ceiling) {
-		start = 0;
-		end = TLB_FLUSH_ALL;
-	}
 
 	/* This is also a barrier that synchronizes with switch_mm(). */
 	new_tlb_gen = inc_mm_tlb_gen(mm);
@@ -1089,22 +1098,19 @@  static void do_kernel_range_flush(void *info)
 
 void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 {
-	/* Balance as user space task's flush, a bit conservative */
-	if (end == TLB_FLUSH_ALL ||
-	    (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
-		on_each_cpu(do_flush_tlb_all, NULL, 1);
-	} else {
-		struct flush_tlb_info *info;
+	struct flush_tlb_info *info;
 
-		preempt_disable();
-		info = get_flush_tlb_info(NULL, start, end, 0, false,
-					  TLB_GENERATION_INVALID);
+	guard(preempt)();
+
+	info = get_flush_tlb_info(NULL, start, end, PAGE_SHIFT, false,
+				  TLB_GENERATION_INVALID);
 
+	if (info->end == TLB_FLUSH_ALL)
+		on_each_cpu(do_flush_tlb_all, NULL, 1);
+	else
 		on_each_cpu(do_kernel_range_flush, info, 1);
 
-		put_flush_tlb_info();
-		preempt_enable();
-	}
+	put_flush_tlb_info();
 }
 
 /*
@@ -1276,7 +1282,7 @@  void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
 
 	int cpu = get_cpu();
 
-	info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false,
+	info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, PAGE_SHIFT, false,
 				  TLB_GENERATION_INVALID);
 	/*
 	 * flush_tlb_multi() is not optimized for the common case in which only