Message ID | 20250213161423.449435-4-riel@surriel.com (mailing list archive) |
---|---|
State | New |
Headers | show |
Series | AMD broadcast TLB invalidation | expand |
On 2/13/25 08:13, Rik van Riel wrote: > Reduce code duplication by consolidating the decision point > for whether to do individual invalidations or a full flush > inside get_flush_tlb_info. Looks sane. Not a ton of code savings, but it does look a lot nicer: Acked-by: Dave Hansen <dave.hansen@intel.com>
On Thu, Feb 13, 2025 at 11:13:54AM -0500, Rik van Riel wrote: > @@ -1009,6 +1009,15 @@ static struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm, > info->initiating_cpu = smp_processor_id(); > info->trim_cpumask = 0; > > + /* > + * If the number of flushes is so large that a full flush > + * would be faster, do a full flush. > + */ > + if ((end - start) >> stride_shift > tlb_single_page_flush_ceiling) { > + info->start = 0; > + info->end = TLB_FLUSH_ALL; > + } And if you move the range decision before the info-> struct members assignment, it becomes even more readable because you're using start and end in the check and then you assign it so a reader doesn't have to go and look whether start and end are the same as info->start and info->end: /* * If the number of flushes is so large that a full flush * would be faster, do a full flush. */ if ((end - start) >> stride_shift > tlb_single_page_flush_ceiling) { start = 0; end = TLB_FLUSH_ALL; } info->start = start; info->end = end; info->mm = mm; info->stride_shift = stride_shift; info->freed_tables = freed_tables; info->new_tlb_gen = new_tlb_gen; info->initiating_cpu = smp_processor_id(); info->trim_cpumask = 0; return info; } with that: Reviewed-by: Borislav Petkov (AMD) <bp@alien8.de>
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index ffc25b348041..924ac2263725 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -1009,6 +1009,15 @@ static struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm, info->initiating_cpu = smp_processor_id(); info->trim_cpumask = 0; + /* + * If the number of flushes is so large that a full flush + * would be faster, do a full flush. + */ + if ((end - start) >> stride_shift > tlb_single_page_flush_ceiling) { + info->start = 0; + info->end = TLB_FLUSH_ALL; + } + return info; } @@ -1026,17 +1035,8 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, bool freed_tables) { struct flush_tlb_info *info; + int cpu = get_cpu(); u64 new_tlb_gen; - int cpu; - - cpu = get_cpu(); - - /* Should we flush just the requested range? */ - if ((end == TLB_FLUSH_ALL) || - ((end - start) >> stride_shift) > tlb_single_page_flush_ceiling) { - start = 0; - end = TLB_FLUSH_ALL; - } /* This is also a barrier that synchronizes with switch_mm(). */ new_tlb_gen = inc_mm_tlb_gen(mm); @@ -1089,22 +1089,19 @@ static void do_kernel_range_flush(void *info) void flush_tlb_kernel_range(unsigned long start, unsigned long end) { - /* Balance as user space task's flush, a bit conservative */ - if (end == TLB_FLUSH_ALL || - (end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) { - on_each_cpu(do_flush_tlb_all, NULL, 1); - } else { - struct flush_tlb_info *info; + struct flush_tlb_info *info; - preempt_disable(); - info = get_flush_tlb_info(NULL, start, end, 0, false, - TLB_GENERATION_INVALID); + guard(preempt)(); + info = get_flush_tlb_info(NULL, start, end, PAGE_SHIFT, false, + TLB_GENERATION_INVALID); + + if (info->end == TLB_FLUSH_ALL) + on_each_cpu(do_flush_tlb_all, NULL, 1); + else on_each_cpu(do_kernel_range_flush, info, 1); - put_flush_tlb_info(); - preempt_enable(); - } + put_flush_tlb_info(); } /*