diff mbox series

[RFC,03/11] x86/mm: Page size aware flush_tlb_mm_range()

Message ID 20180913092812.012757318@infradead.org (mailing list archive)
State New, archived
Headers show
Series my generic mmu_gather patches | expand

Commit Message

Peter Zijlstra Sept. 13, 2018, 9:21 a.m. UTC
Use the new tlb_get_unmap_shift() to determine the stride of the
INVLPG loop.

Cc: Will Deacon <will.deacon@arm.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Nick Piggin <npiggin@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/include/asm/tlb.h      |   21 ++++++++++++++-------
 arch/x86/include/asm/tlbflush.h |   10 ++++++----
 arch/x86/mm/tlb.c               |   10 +++++-----
 3 files changed, 25 insertions(+), 16 deletions(-)

Comments

Dave Hansen Sept. 13, 2018, 5:22 p.m. UTC | #1
> +static inline void tlb_flush(struct mmu_gather *tlb)
> +{
> +	unsigned long start = 0UL, end = TLB_FLUSH_ALL;
> +	unsigned int invl_shift = tlb_get_unmap_shift(tlb);

I had to go back and look at

	https://patchwork.kernel.org/patch/10587207/

to figure out what was going on.  I wonder if we could make the code a
bit more standalone.

This at least needs a comment about what it's getting from 'tlb'.  Maybe
just:

	/* Find the smallest page size that we unmapped: */

> --- a/arch/x86/include/asm/tlbflush.h
> +++ b/arch/x86/include/asm/tlbflush.h
> @@ -507,23 +507,25 @@ struct flush_tlb_info {
>  	unsigned long		start;
>  	unsigned long		end;
>  	u64			new_tlb_gen;
> +	unsigned int		invl_shift;
>  };

Maybe we really should just call this flush_stride or something.

>  #define local_flush_tlb() __flush_tlb()
>  
>  #define flush_tlb_mm(mm)	flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL)
>  
> -#define flush_tlb_range(vma, start, end)	\
> -		flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags)
> +#define flush_tlb_range(vma, start, end)			\
> +		flush_tlb_mm_range((vma)->vm_mm, start, end,	\
> +				(vma)->vm_flags & VM_HUGETLB ? PMD_SHIFT : PAGE_SHIFT)

This is safe.  But, Couldn't this PMD_SHIFT also be PUD_SHIFT for a 1G
hugetlb page?

>  void native_flush_tlb_others(const struct cpumask *cpumask,
> --- a/arch/x86/mm/tlb.c
> +++ b/arch/x86/mm/tlb.c
> @@ -522,12 +522,12 @@ static void flush_tlb_func_common(const
>  	    f->new_tlb_gen == mm_tlb_gen) {
>  		/* Partial flush */
>  		unsigned long addr;
> -		unsigned long nr_pages = (f->end - f->start) >> PAGE_SHIFT;
> +		unsigned long nr_pages = (f->end - f->start) >> f->invl_shift;

We might want to make this nr_invalidations or nr_flushes now so we
don't get it confused with PAGE_SIZE stuff.

Otherwise, this makes me a *tiny* bit nervous.  I think we're good about
ensuring that we fully flush 4k mappings from the TLB before going up to
a 2MB mapping because of all the errata we've had there over the years.
But, had we left 4k mappings around, the old flushing code would have
cleaned them up for us.

This certainly tightly ties the invalidations to what was in the page
tables.  If that diverged from the TLB at some point, there's certainly
more exposure here.

Looks fun, though. :)
Peter Zijlstra Sept. 13, 2018, 6:42 p.m. UTC | #2
On Thu, Sep 13, 2018 at 10:22:58AM -0700, Dave Hansen wrote:
> > +static inline void tlb_flush(struct mmu_gather *tlb)
> > +{
> > +	unsigned long start = 0UL, end = TLB_FLUSH_ALL;
> > +	unsigned int invl_shift = tlb_get_unmap_shift(tlb);
> 
> I had to go back and look at
> 
> 	https://patchwork.kernel.org/patch/10587207/

I so hate patchwork...

> to figure out what was going on.  I wonder if we could make the code a
> bit more standalone.
> 
> This at least needs a comment about what it's getting from 'tlb'.  Maybe
> just:
> 
> 	/* Find the smallest page size that we unmapped: */
> 
> > --- a/arch/x86/include/asm/tlbflush.h
> > +++ b/arch/x86/include/asm/tlbflush.h
> > @@ -507,23 +507,25 @@ struct flush_tlb_info {
> >  	unsigned long		start;
> >  	unsigned long		end;
> >  	u64			new_tlb_gen;
> > +	unsigned int		invl_shift;
> >  };
> 
> Maybe we really should just call this flush_stride or something.

But its a shift, not a size. stride_shift?

> >  #define local_flush_tlb() __flush_tlb()
> >  
> >  #define flush_tlb_mm(mm)	flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL)
> >  
> > -#define flush_tlb_range(vma, start, end)	\
> > -		flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags)
> > +#define flush_tlb_range(vma, start, end)			\
> > +		flush_tlb_mm_range((vma)->vm_mm, start, end,	\
> > +				(vma)->vm_flags & VM_HUGETLB ? PMD_SHIFT : PAGE_SHIFT)
> 
> This is safe.  But, Couldn't this PMD_SHIFT also be PUD_SHIFT for a 1G
> hugetlb page?

It could be, but can we tell at that point?

> >  void native_flush_tlb_others(const struct cpumask *cpumask,
> > --- a/arch/x86/mm/tlb.c
> > +++ b/arch/x86/mm/tlb.c
> > @@ -522,12 +522,12 @@ static void flush_tlb_func_common(const
> >  	    f->new_tlb_gen == mm_tlb_gen) {
> >  		/* Partial flush */
> >  		unsigned long addr;
> > -		unsigned long nr_pages = (f->end - f->start) >> PAGE_SHIFT;
> > +		unsigned long nr_pages = (f->end - f->start) >> f->invl_shift;
> 
> We might want to make this nr_invalidations or nr_flushes now so we
> don't get it confused with PAGE_SIZE stuff.

Sure, can rename.

> Otherwise, this makes me a *tiny* bit nervous.  I think we're good about
> ensuring that we fully flush 4k mappings from the TLB before going up to
> a 2MB mapping because of all the errata we've had there over the years.
> But, had we left 4k mappings around, the old flushing code would have
> cleaned them up for us.

Indeed.

> This certainly tightly ties the invalidations to what was in the page
> tables.  If that diverged from the TLB at some point, there's certainly
> more exposure here.
>
> Looks fun, though. :)

:-)
Peter Zijlstra Sept. 13, 2018, 6:46 p.m. UTC | #3
On Thu, Sep 13, 2018 at 08:42:30PM +0200, Peter Zijlstra wrote:
> > > +#define flush_tlb_range(vma, start, end)			\
> > > +		flush_tlb_mm_range((vma)->vm_mm, start, end,	\
> > > +				(vma)->vm_flags & VM_HUGETLB ? PMD_SHIFT : PAGE_SHIFT)
> > 
> > This is safe.  But, Couldn't this PMD_SHIFT also be PUD_SHIFT for a 1G
> > hugetlb page?
> 
> It could be, but can we tell at that point?

I had me a look in higetlb.h, would something like so work?

#define flush_tlb_range(vma, start, end)			\
	flush_tlb_mm_range((vma)->vm_mm, start, end,		\
			   huge_page_shift(hstate_vma(vma)))
Dave Hansen Sept. 13, 2018, 6:47 p.m. UTC | #4
>>> --- a/arch/x86/include/asm/tlbflush.h
>>> +++ b/arch/x86/include/asm/tlbflush.h
>>> @@ -507,23 +507,25 @@ struct flush_tlb_info {
>>>  	unsigned long		start;
>>>  	unsigned long		end;
>>>  	u64			new_tlb_gen;
>>> +	unsigned int		invl_shift;
>>>  };
>>
>> Maybe we really should just call this flush_stride or something.
> 
> But its a shift, not a size. stride_shift?

Yeah, sounds better than 'invl' to me.

>>>  #define local_flush_tlb() __flush_tlb()
>>>  
>>>  #define flush_tlb_mm(mm)	flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL)
>>>  
>>> -#define flush_tlb_range(vma, start, end)	\
>>> -		flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags)
>>> +#define flush_tlb_range(vma, start, end)			\
>>> +		flush_tlb_mm_range((vma)->vm_mm, start, end,	\
>>> +				(vma)->vm_flags & VM_HUGETLB ? PMD_SHIFT : PAGE_SHIFT)
>>
>> This is safe.  But, Couldn't this PMD_SHIFT also be PUD_SHIFT for a 1G
>> hugetlb page?
> 
> It could be, but can we tell at that point?

We should have the page size via huge_page_shift(hstate_vma(vma)).  No
idea if it'll work in practice, though.
Peter Zijlstra Sept. 13, 2018, 6:48 p.m. UTC | #5
On Thu, Sep 13, 2018 at 08:46:32PM +0200, Peter Zijlstra wrote:
> On Thu, Sep 13, 2018 at 08:42:30PM +0200, Peter Zijlstra wrote:
> > > > +#define flush_tlb_range(vma, start, end)			\
> > > > +		flush_tlb_mm_range((vma)->vm_mm, start, end,	\
> > > > +				(vma)->vm_flags & VM_HUGETLB ? PMD_SHIFT : PAGE_SHIFT)
> > > 
> > > This is safe.  But, Couldn't this PMD_SHIFT also be PUD_SHIFT for a 1G
> > > hugetlb page?
> > 
> > It could be, but can we tell at that point?
> 
> I had me a look in higetlb.h, would something like so work?
> 
> #define flush_tlb_range(vma, start, end)			\
> 	flush_tlb_mm_range((vma)->vm_mm, start, end,		\
> 			   huge_page_shift(hstate_vma(vma)))
> 

D'uh

#define flush_tlb_range(vma, start, end)			\
	flush_tlb_mm_range((vma)->vm_mm, start, end,		\
	   (vma)->vm_flags & VM_HUGETLB ? huge_page_shift(hstate_vma(vma)) : PAGE_SHIFT)
Dave Hansen Sept. 13, 2018, 6:49 p.m. UTC | #6
On 09/13/2018 11:46 AM, Peter Zijlstra wrote:
> On Thu, Sep 13, 2018 at 08:42:30PM +0200, Peter Zijlstra wrote:
>>>> +#define flush_tlb_range(vma, start, end)			\
>>>> +		flush_tlb_mm_range((vma)->vm_mm, start, end,	\
>>>> +				(vma)->vm_flags & VM_HUGETLB ? PMD_SHIFT : PAGE_SHIFT)
>>> This is safe.  But, Couldn't this PMD_SHIFT also be PUD_SHIFT for a 1G
>>> hugetlb page?
>> It could be, but can we tell at that point?
> I had me a look in higetlb.h, would something like so work?
> 
> #define flush_tlb_range(vma, start, end)			\
> 	flush_tlb_mm_range((vma)->vm_mm, start, end,		\
> 			   huge_page_shift(hstate_vma(vma)))
> 

I think you still need the VM_HUGETLB check somewhere because
hstate_vma() won't work on non-VM_HUGETLB VMAs.  But, yeah, something
close to that should be OK.
diff mbox series

Patch

--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -6,16 +6,23 @@ 
 #define tlb_end_vma(tlb, vma) do { } while (0)
 #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
 
-#define tlb_flush(tlb)							\
-{									\
-	if (!tlb->fullmm && !tlb->need_flush_all) 			\
-		flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end, 0UL);	\
-	else								\
-		flush_tlb_mm_range(tlb->mm, 0UL, TLB_FLUSH_ALL, 0UL);	\
-}
+static inline void tlb_flush(struct mmu_gather *tlb);
 
 #include <asm-generic/tlb.h>
 
+static inline void tlb_flush(struct mmu_gather *tlb)
+{
+	unsigned long start = 0UL, end = TLB_FLUSH_ALL;
+	unsigned int invl_shift = tlb_get_unmap_shift(tlb);
+
+	if (!tlb->fullmm && !tlb->need_flush_all) {
+		start = tlb->start;
+		end = tlb->end;
+	}
+
+	flush_tlb_mm_range(tlb->mm, start, end, invl_shift);
+}
+
 /*
  * While x86 architecture in general requires an IPI to perform TLB
  * shootdown, enablement code for several hypervisors overrides
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -507,23 +507,25 @@  struct flush_tlb_info {
 	unsigned long		start;
 	unsigned long		end;
 	u64			new_tlb_gen;
+	unsigned int		invl_shift;
 };
 
 #define local_flush_tlb() __flush_tlb()
 
 #define flush_tlb_mm(mm)	flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL)
 
-#define flush_tlb_range(vma, start, end)	\
-		flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags)
+#define flush_tlb_range(vma, start, end)			\
+		flush_tlb_mm_range((vma)->vm_mm, start, end,	\
+				(vma)->vm_flags & VM_HUGETLB ? PMD_SHIFT : PAGE_SHIFT)
 
 extern void flush_tlb_all(void);
 extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
-				unsigned long end, unsigned long vmflag);
+				unsigned long end, unsigned int invl_shift);
 extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
 
 static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
 {
-	flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, VM_NONE);
+	flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, PAGE_SHIFT);
 }
 
 void native_flush_tlb_others(const struct cpumask *cpumask,
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -522,12 +522,12 @@  static void flush_tlb_func_common(const
 	    f->new_tlb_gen == mm_tlb_gen) {
 		/* Partial flush */
 		unsigned long addr;
-		unsigned long nr_pages = (f->end - f->start) >> PAGE_SHIFT;
+		unsigned long nr_pages = (f->end - f->start) >> f->invl_shift;
 
 		addr = f->start;
 		while (addr < f->end) {
 			__flush_tlb_one_user(addr);
-			addr += PAGE_SIZE;
+			addr += 1UL << f->invl_shift;
 		}
 		if (local)
 			count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_pages);
@@ -616,12 +616,13 @@  void native_flush_tlb_others(const struc
 static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
 
 void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
-				unsigned long end, unsigned long vmflag)
+				unsigned long end, unsigned int invl_shift)
 {
 	int cpu;
 
 	struct flush_tlb_info info __aligned(SMP_CACHE_BYTES) = {
 		.mm = mm,
+		.invl_shift = invl_shift,
 	};
 
 	cpu = get_cpu();
@@ -631,8 +632,7 @@  void flush_tlb_mm_range(struct mm_struct
 
 	/* Should we flush just the requested range? */
 	if ((end != TLB_FLUSH_ALL) &&
-	    !(vmflag & VM_HUGETLB) &&
-	    ((end - start) >> PAGE_SHIFT) <= tlb_single_page_flush_ceiling) {
+	    ((end - start) >> invl_shift) <= tlb_single_page_flush_ceiling) {
 		info.start = start;
 		info.end = end;
 	} else {