diff mbox series

[05/17] asm-generic/tlb: Rename HAVE_RCU_TABLE_NO_INVALIDATE

Message ID 20191211122955.940455408@infradead.org (mailing list archive)
State New, archived
Headers show
Series Fixup page directory freeing | expand

Commit Message

Peter Zijlstra Dec. 11, 2019, 12:07 p.m. UTC
Towards a more consistent naming scheme.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/Kconfig              |    3 ++-
 arch/powerpc/Kconfig      |    2 +-
 arch/sparc/Kconfig        |    2 +-
 include/asm-generic/tlb.h |    2 +-
 mm/mmu_gather.c           |    2 +-
 5 files changed, 6 insertions(+), 5 deletions(-)

Comments

Aneesh Kumar K.V Dec. 16, 2019, 12:31 p.m. UTC | #1
Peter Zijlstra <peterz@infradead.org> writes:

> Towards a more consistent naming scheme.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  arch/Kconfig              |    3 ++-
>  arch/powerpc/Kconfig      |    2 +-
>  arch/sparc/Kconfig        |    2 +-
>  include/asm-generic/tlb.h |    2 +-
>  mm/mmu_gather.c           |    2 +-
>  5 files changed, 6 insertions(+), 5 deletions(-)
>
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -396,8 +396,9 @@ config HAVE_ARCH_JUMP_LABEL_RELATIVE
>  config MMU_GATHER_RCU_TABLE_FREE
>  	bool
>  
> -config HAVE_RCU_TABLE_NO_INVALIDATE
> +config MMU_GATHER_NO_TABLE_INVALIDATE
>  	bool
> +	depends on MMU_GATHER_RCU_TABLE_FREE


Can we drop this Kernel config option instead use
MMU_GATHER_RCU_TABLE_FREE? IMHO reducing the kernel config related to
mmu_gather can reduce the complexity. 

>  
>  config HAVE_MMU_GATHER_PAGE_SIZE
>  	bool
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -223,7 +223,7 @@ config PPC
>  	select HAVE_PERF_REGS
>  	select HAVE_PERF_USER_STACK_DUMP
>  	select MMU_GATHER_RCU_TABLE_FREE		if SMP
> -	select HAVE_RCU_TABLE_NO_INVALIDATE	if MMU_GATHER_RCU_TABLE_FREE
> +	select MMU_GATHER_NO_TABLE_INVALIDATE	if MMU_GATHER_RCU_TABLE_FREE
>  	select HAVE_MMU_GATHER_PAGE_SIZE
>  	select HAVE_REGS_AND_STACK_ACCESS_API
>  	select HAVE_RELIABLE_STACKTRACE		if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN
> --- a/arch/sparc/Kconfig
> +++ b/arch/sparc/Kconfig
> @@ -65,7 +65,7 @@ config SPARC64
>  	select HAVE_KRETPROBES
>  	select HAVE_KPROBES
>  	select MMU_GATHER_RCU_TABLE_FREE if SMP
> -	select HAVE_RCU_TABLE_NO_INVALIDATE if MMU_GATHER_RCU_TABLE_FREE
> +	select MMU_GATHER_NO_TABLE_INVALIDATE if MMU_GATHER_RCU_TABLE_FREE
>  	select HAVE_MEMBLOCK_NODE_MAP
>  	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
>  	select HAVE_DYNAMIC_FTRACE
> --- a/include/asm-generic/tlb.h
> +++ b/include/asm-generic/tlb.h
> @@ -137,7 +137,7 @@
>   *  When used, an architecture is expected to provide __tlb_remove_table()
>   *  which does the actual freeing of these pages.
>   *
> - *  HAVE_RCU_TABLE_NO_INVALIDATE
> + *  MMU_GATHER_NO_TABLE_INVALIDATE
>   *
>   *  This makes MMU_GATHER_RCU_TABLE_FREE avoid calling tlb_flush_mmu_tlbonly() before
>   *  freeing the page-table pages. This can be avoided if you use
> --- a/mm/mmu_gather.c
> +++ b/mm/mmu_gather.c
> @@ -102,7 +102,7 @@ bool __tlb_remove_page_size(struct mmu_g
>   */
>  static inline void tlb_table_invalidate(struct mmu_gather *tlb)
>  {
> -#ifndef CONFIG_HAVE_RCU_TABLE_NO_INVALIDATE
> +#ifndef CONFIG_MMU_GATHER_NO_TABLE_INVALIDATE
>  	/*
>  	 * Invalidate page-table caches used by hardware walkers. Then we still
>  	 * need to RCU-sched wait while freeing the pages because software
Peter Zijlstra Dec. 16, 2019, 12:37 p.m. UTC | #2
On Mon, Dec 16, 2019 at 06:01:58PM +0530, Aneesh Kumar K.V wrote:
> Peter Zijlstra <peterz@infradead.org> writes:
> 
> > Towards a more consistent naming scheme.
> >
> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > ---
> >  arch/Kconfig              |    3 ++-
> >  arch/powerpc/Kconfig      |    2 +-
> >  arch/sparc/Kconfig        |    2 +-
> >  include/asm-generic/tlb.h |    2 +-
> >  mm/mmu_gather.c           |    2 +-
> >  5 files changed, 6 insertions(+), 5 deletions(-)
> >
> > --- a/arch/Kconfig
> > +++ b/arch/Kconfig
> > @@ -396,8 +396,9 @@ config HAVE_ARCH_JUMP_LABEL_RELATIVE
> >  config MMU_GATHER_RCU_TABLE_FREE
> >  	bool
> >  
> > -config HAVE_RCU_TABLE_NO_INVALIDATE
> > +config MMU_GATHER_NO_TABLE_INVALIDATE
> >  	bool
> > +	depends on MMU_GATHER_RCU_TABLE_FREE
> 
> 
> Can we drop this Kernel config option instead use
> MMU_GATHER_RCU_TABLE_FREE? IMHO reducing the kernel config related to
> mmu_gather can reduce the complexity. 

I'm confused, are you saing you're happy to have PowerPC eat the extra
TLB invalidates? I thought you cared about PPC performance :-)
Aneesh Kumar K.V Dec. 16, 2019, 1:13 p.m. UTC | #3
On 12/16/19 6:07 PM, Peter Zijlstra wrote:
> On Mon, Dec 16, 2019 at 06:01:58PM +0530, Aneesh Kumar K.V wrote:
>> Peter Zijlstra <peterz@infradead.org> writes:
>>
>>> Towards a more consistent naming scheme.
>>>
>>> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
>>> ---
>>>   arch/Kconfig              |    3 ++-
>>>   arch/powerpc/Kconfig      |    2 +-
>>>   arch/sparc/Kconfig        |    2 +-
>>>   include/asm-generic/tlb.h |    2 +-
>>>   mm/mmu_gather.c           |    2 +-
>>>   5 files changed, 6 insertions(+), 5 deletions(-)
>>>
>>> --- a/arch/Kconfig
>>> +++ b/arch/Kconfig
>>> @@ -396,8 +396,9 @@ config HAVE_ARCH_JUMP_LABEL_RELATIVE
>>>   config MMU_GATHER_RCU_TABLE_FREE
>>>   	bool
>>>   
>>> -config HAVE_RCU_TABLE_NO_INVALIDATE
>>> +config MMU_GATHER_NO_TABLE_INVALIDATE
>>>   	bool
>>> +	depends on MMU_GATHER_RCU_TABLE_FREE
>>
>>
>> Can we drop this Kernel config option instead use
>> MMU_GATHER_RCU_TABLE_FREE? IMHO reducing the kernel config related to
>> mmu_gather can reduce the complexity.
> 
> I'm confused, are you saing you're happy to have PowerPC eat the extra
> TLB invalidates? I thought you cared about PPC performance :-)
> 
> 

Instead can we do

static inline void tlb_table_invalidate(struct mmu_gather *tlb)
{
#ifndef CONFIG_MMU_GATHER_RCU_TABLE_FREE
	 * Invalidate page-table caches used by hardware walkers. Then we still
	 * need to RCU-sched wait while freeing the pages because software
	 * walkers can still be in-flight.
	 */
	tlb_flush_mmu_tlbonly(tlb);
#endif
}

-aneesh
Peter Zijlstra Dec. 16, 2019, 1:20 p.m. UTC | #4
On Mon, Dec 16, 2019 at 06:43:53PM +0530, Aneesh Kumar K.V wrote:
> On 12/16/19 6:07 PM, Peter Zijlstra wrote:

> > I'm confused, are you saing you're happy to have PowerPC eat the extra
> > TLB invalidates? I thought you cared about PPC performance :-)
> > 
> > 
> 
> Instead can we do
> 
> static inline void tlb_table_invalidate(struct mmu_gather *tlb)
> {
> #ifndef CONFIG_MMU_GATHER_RCU_TABLE_FREE
> 	 * Invalidate page-table caches used by hardware walkers. Then we still
> 	 * need to RCU-sched wait while freeing the pages because software
> 	 * walkers can still be in-flight.
> 	 */
> 	tlb_flush_mmu_tlbonly(tlb);
> #endif
> }

How does that not break ARM/ARM64/s390 and x86 ?
Aneesh Kumar K.V Dec. 16, 2019, 1:40 p.m. UTC | #5
On 12/16/19 6:50 PM, Peter Zijlstra wrote:
> On Mon, Dec 16, 2019 at 06:43:53PM +0530, Aneesh Kumar K.V wrote:
>> On 12/16/19 6:07 PM, Peter Zijlstra wrote:
> 
>>> I'm confused, are you saing you're happy to have PowerPC eat the extra
>>> TLB invalidates? I thought you cared about PPC performance :-)
>>>
>>>
>>
>> Instead can we do
>>
>> static inline void tlb_table_invalidate(struct mmu_gather *tlb)
>> {
>> #ifndef CONFIG_MMU_GATHER_RCU_TABLE_FREE
>> 	 * Invalidate page-table caches used by hardware walkers. Then we still
>> 	 * need to RCU-sched wait while freeing the pages because software
>> 	 * walkers can still be in-flight.
>> 	 */
>> 	tlb_flush_mmu_tlbonly(tlb);
>> #endif
>> }
> 
> How does that not break ARM/ARM64/s390 and x86 ?
> 

Hmm I missed that usage of RCU_TABLE_NO_INVALIDATE.

Ok I guess we need to revert this change that went upstream this merge 
window then

commit 52162ec784fa05f3a4b1d8e84421279998be3773
Author: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Date:   Thu Oct 24 13:28:00 2019 +0530

     powerpc/mm/book3s64/radix: Use freed_tables instead of need_flush_all



I will review the change closely.

-aneesh
Aneesh Kumar K.V Dec. 16, 2019, 1:54 p.m. UTC | #6
On 12/16/19 7:10 PM, Aneesh Kumar K.V wrote:
> On 12/16/19 6:50 PM, Peter Zijlstra wrote:
>> On Mon, Dec 16, 2019 at 06:43:53PM +0530, Aneesh Kumar K.V wrote:
>>> On 12/16/19 6:07 PM, Peter Zijlstra wrote:
>>
>>>> I'm confused, are you saing you're happy to have PowerPC eat the extra
>>>> TLB invalidates? I thought you cared about PPC performance :-)
>>>>
>>>>
>>>
>>> Instead can we do
>>>
>>> static inline void tlb_table_invalidate(struct mmu_gather *tlb)
>>> {
>>> #ifndef CONFIG_MMU_GATHER_RCU_TABLE_FREE
>>>      * Invalidate page-table caches used by hardware walkers. Then we 
>>> still
>>>      * need to RCU-sched wait while freeing the pages because software
>>>      * walkers can still be in-flight.
>>>      */
>>>     tlb_flush_mmu_tlbonly(tlb);
>>> #endif
>>> }
>>
>> How does that not break ARM/ARM64/s390 and x86 ?
>>
> 
> Hmm I missed that usage of RCU_TABLE_NO_INVALIDATE.
> 
> Ok I guess we need to revert this change that went upstream this merge 
> window then
> 
> commit 52162ec784fa05f3a4b1d8e84421279998be3773
> Author: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> Date:   Thu Oct 24 13:28:00 2019 +0530
> 
>      powerpc/mm/book3s64/radix: Use freed_tables instead of need_flush_all
> 
> 
> 
> I will review the change closely.


So __p*_free_tlb() routines on ppc64 just mark that we need a page walk 
cache flush and the actual flush in done in tlb_flush_mmu. As per

d86564a2f085b79ec046a5cba90188e61235280 (mm/tlb, x86/mm: Support 
invalidating TLB caches for RCU_TABLE_FREE ) that is not sufficient?

-aneesh
Peter Zijlstra Dec. 16, 2019, 2 p.m. UTC | #7
On Mon, Dec 16, 2019 at 07:10:30PM +0530, Aneesh Kumar K.V wrote:
> On 12/16/19 6:50 PM, Peter Zijlstra wrote:
> > On Mon, Dec 16, 2019 at 06:43:53PM +0530, Aneesh Kumar K.V wrote:
> > > On 12/16/19 6:07 PM, Peter Zijlstra wrote:
> > 
> > > > I'm confused, are you saing you're happy to have PowerPC eat the extra
> > > > TLB invalidates? I thought you cared about PPC performance :-)
> > > > 
> > > > 
> > > 
> > > Instead can we do
> > > 
> > > static inline void tlb_table_invalidate(struct mmu_gather *tlb)
> > > {
> > > #ifndef CONFIG_MMU_GATHER_RCU_TABLE_FREE
> > > 	 * Invalidate page-table caches used by hardware walkers. Then we still
> > > 	 * need to RCU-sched wait while freeing the pages because software
> > > 	 * walkers can still be in-flight.
> > > 	 */
> > > 	tlb_flush_mmu_tlbonly(tlb);
> > > #endif
> > > }
> > 
> > How does that not break ARM/ARM64/s390 and x86 ?
> > 
> 
> Hmm I missed that usage of RCU_TABLE_NO_INVALIDATE.

What use? Only PPC and SPARC64 use that option. The reason they can use
it is because they don't have a hardware walker (with exception of
PPC-Radix, which I suppose your below patch fudges ?!).

So HAVE_RCU_TABLE_FREE will provide tlb_remove_table() for the use of
freeing page-tables/directories. This is required for all architectures
that have software walkers and !IPI TLBI.

Arm, Arm64, Power, Sparc64, s390 and x86 use this option. While x86
natively has IPI based TLBI, a bunch of the virtualization solutions got
rid of the IPI for performance.

Of those, Arm, Arm64, s390, x86 (and PPC-Radix) also have hardware
walkers on those page-tables, and thus _must_ TLBI in between unhooking
and freeing these pages.

PPC-Hash and Sparc64 OTOH only ever access the linux page-tables through
the software walker and thus can forgo this TLBI, and _THAT_ is what
TABLE_NO_INVALIDATE is about (there actually is a comment that clearly
states this).

> Ok I guess we need to revert this change that went upstream this merge
> window then
> 
> commit 52162ec784fa05f3a4b1d8e84421279998be3773
> Author: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
> Date:   Thu Oct 24 13:28:00 2019 +0530
> 
>     powerpc/mm/book3s64/radix: Use freed_tables instead of need_flush_all
> 

This really looks like you've got PPC-Radix wrong. As soon as you got
hardware walkers on the linux page-tables, you must not use
TABLE_NO_INVALIDATE.
Peter Zijlstra Dec. 16, 2019, 2:50 p.m. UTC | #8
On Mon, Dec 16, 2019 at 07:24:24PM +0530, Aneesh Kumar K.V wrote:

> So __p*_free_tlb() routines on ppc64 just mark that we need a page walk
> cache flush and the actual flush in done in tlb_flush_mmu.

Not quite, your __p*_free_tlb() goes to pgtable_free_tlb() which call
tlb_remove_table().

> As per
> 
> d86564a2f085b79ec046a5cba90188e61235280 (mm/tlb, x86/mm: Support
> invalidating TLB caches for RCU_TABLE_FREE ) that is not sufficient?

96bc9567cbe1 ("asm-generic/tlb, arch: Invert CONFIG_HAVE_RCU_TABLE_INVALIDATE")

And no. Since you have TABLE_NO_INVALIDATE set, tlb_remove_table() will
not TLBI when it fails to allocate a batch page, which is an error for
PPC-Radix.

There is also no TLBI when the batch page is full and the RCU callback
happens, which is also a bug on PPC-Radix.
Peter Zijlstra Dec. 16, 2019, 3:14 p.m. UTC | #9
On Mon, Dec 16, 2019 at 03:50:41PM +0100, Peter Zijlstra wrote:
> On Mon, Dec 16, 2019 at 07:24:24PM +0530, Aneesh Kumar K.V wrote:
> 
> > So __p*_free_tlb() routines on ppc64 just mark that we need a page walk
> > cache flush and the actual flush in done in tlb_flush_mmu.
> 
> Not quite, your __p*_free_tlb() goes to pgtable_free_tlb() which call
> tlb_remove_table().
> 
> > As per
> > 
> > d86564a2f085b79ec046a5cba90188e61235280 (mm/tlb, x86/mm: Support
> > invalidating TLB caches for RCU_TABLE_FREE ) that is not sufficient?
> 
> 96bc9567cbe1 ("asm-generic/tlb, arch: Invert CONFIG_HAVE_RCU_TABLE_INVALIDATE")
> 
> And no. Since you have TABLE_NO_INVALIDATE set, tlb_remove_table() will
> not TLBI when it fails to allocate a batch page, which is an error for
> PPC-Radix.
> 
> There is also no TLBI when the batch page is full and the RCU callback
> happens, which is also a bug on PPC-Radix.

It seems to me you need something like this here patch, all you need to
add is a suitable definition of tlb_needs_table_invalidate() for Power.

---

diff --git a/arch/Kconfig b/arch/Kconfig
index c44ef15866a3..98de654b79b3 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -400,10 +400,6 @@ config MMU_GATHER_RCU_TABLE_FREE
 	bool
 	select MMU_GATHER_TABLE_FREE
 
-config MMU_GATHER_NO_TABLE_INVALIDATE
-	bool
-	depends on MMU_GATHER_RCU_TABLE_FREE
-
 config MMU_GATHER_PAGE_SIZE
 	bool
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 3dea4c8d39f2..2ddf24822d5b 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -223,7 +223,6 @@ config PPC
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
 	select MMU_GATHER_RCU_TABLE_FREE		if SMP
-	select MMU_GATHER_NO_TABLE_INVALIDATE	if MMU_GATHER_RCU_TABLE_FREE
 	select MMU_GATHER_PAGE_SIZE
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_RELIABLE_STACKTRACE		if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index a76e915ab207..acf20b6c0a54 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -66,7 +66,6 @@ config SPARC64
 	select HAVE_KRETPROBES
 	select HAVE_KPROBES
 	select MMU_GATHER_RCU_TABLE_FREE if SMP
-	select MMU_GATHER_NO_TABLE_INVALIDATE if MMU_GATHER_RCU_TABLE_FREE
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	select HAVE_DYNAMIC_FTRACE
diff --git a/arch/sparc/include/asm/tlb_64.h b/arch/sparc/include/asm/tlb_64.h
index a2f3fa61ee36..ac8e74a96122 100644
--- a/arch/sparc/include/asm/tlb_64.h
+++ b/arch/sparc/include/asm/tlb_64.h
@@ -28,6 +28,12 @@ void flush_tlb_pending(void);
 #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
 #define tlb_flush(tlb)	flush_tlb_pending()
 
+/*
+ * SPARC64's hardware TLB fill does not use the Linux page-tables
+ * and therefore we don't need a TLBI when freeing page-table pages.
+ */
+#define tlb_needs_table_invalidate()	(false)
+
 #include <asm-generic/tlb.h>
 
 #endif /* _SPARC64_TLB_H */
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index fe0ea6ff3636..4108d6d18ca5 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -156,13 +156,6 @@
  *  Useful if your architecture doesn't use IPIs for remote TLB invalidates
  *  and therefore doesn't naturally serialize with software page-table walkers.
  *
- *  MMU_GATHER_NO_TABLE_INVALIDATE
- *
- *  This makes MMU_GATHER_RCU_TABLE_FREE avoid calling tlb_flush_mmu_tlbonly()
- *  before freeing the page-table pages. This can be avoided if you use
- *  MMU_GATHER_RCU_TABLE_FREE and your architecture does _NOT_ use the Linux
- *  page-tables natively.
- *
  *  MMU_GATHER_NO_RANGE
  *
  *  Use this if your architecture lacks an efficient flush_tlb_range().
@@ -203,6 +196,24 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
 
 #endif /* CONFIG_MMU_GATHER_TABLE_FREE */
 
+#ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE
+
+/*
+ * This allows an architecture that does not use the linux page-tables for
+ * hardware to skip the TLBI when freeing page tables.
+ */
+#ifndef tlb_needs_table_invalidate
+#define tlb_needs_table_invalidate() (true)
+#endif
+
+#else
+
+#ifdef tlb_needs_table_invalidate
+#error tlb_needs_table_invalidate() requires MMU_GATHER_RCU_TABLE_FREE
+#endif
+
+#endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */
+
 #ifndef CONFIG_MMU_GATHER_NO_GATHER
 /*
  * If we can't allocate a page to make a big batch of page pointers
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index 9d103031568d..a3538cb2bcbe 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -177,14 +177,14 @@ static void tlb_remove_table_free(struct mmu_table_batch *batch)
  */
 static inline void tlb_table_invalidate(struct mmu_gather *tlb)
 {
-#ifndef CONFIG_MMU_GATHER_NO_TABLE_INVALIDATE
-	/*
-	 * Invalidate page-table caches used by hardware walkers. Then we still
-	 * need to RCU-sched wait while freeing the pages because software
-	 * walkers can still be in-flight.
-	 */
-	tlb_flush_mmu_tlbonly(tlb);
-#endif
+	if (tlb_needs_table_invalidate()) {
+		/*
+		 * Invalidate page-table caches used by hardware walkers. Then
+		 * we still need to RCU-sched wait while freeing the pages
+		 * because software walkers can still be in-flight.
+		 */
+		tlb_flush_mmu_tlbonly(tlb);
+	}
 }
 
 static void tlb_remove_table_one(void *table)
Peter Zijlstra Dec. 16, 2019, 3:30 p.m. UTC | #10
On Mon, Dec 16, 2019 at 04:14:19PM +0100, Peter Zijlstra wrote:
> On Mon, Dec 16, 2019 at 03:50:41PM +0100, Peter Zijlstra wrote:
> > On Mon, Dec 16, 2019 at 07:24:24PM +0530, Aneesh Kumar K.V wrote:
> > 
> > > So __p*_free_tlb() routines on ppc64 just mark that we need a page walk
> > > cache flush and the actual flush in done in tlb_flush_mmu.
> > 
> > Not quite, your __p*_free_tlb() goes to pgtable_free_tlb() which call
> > tlb_remove_table().
> > 
> > > As per
> > > 
> > > d86564a2f085b79ec046a5cba90188e61235280 (mm/tlb, x86/mm: Support
> > > invalidating TLB caches for RCU_TABLE_FREE ) that is not sufficient?
> > 
> > 96bc9567cbe1 ("asm-generic/tlb, arch: Invert CONFIG_HAVE_RCU_TABLE_INVALIDATE")
> > 
> > And no. Since you have TABLE_NO_INVALIDATE set, tlb_remove_table() will
> > not TLBI when it fails to allocate a batch page, which is an error for
> > PPC-Radix.
> > 
> > There is also no TLBI when the batch page is full and the RCU callback
> > happens, which is also a bug on PPC-Radix.
> 
> It seems to me you need something like this here patch, all you need to
> add is a suitable definition of tlb_needs_table_invalidate() for Power.

I'm thinking this:

#define tlb_needs_table_invalidate()	radix_enabled()

should work for you. When you have Radix you need that TLBI, if you have
Hash you don't.
Aneesh Kumar K.V Dec. 16, 2019, 5 p.m. UTC | #11
On 12/16/19 9:00 PM, Peter Zijlstra wrote:
> On Mon, Dec 16, 2019 at 04:14:19PM +0100, Peter Zijlstra wrote:
>> On Mon, Dec 16, 2019 at 03:50:41PM +0100, Peter Zijlstra wrote:
>>> On Mon, Dec 16, 2019 at 07:24:24PM +0530, Aneesh Kumar K.V wrote:
>>>
>>>> So __p*_free_tlb() routines on ppc64 just mark that we need a page walk
>>>> cache flush and the actual flush in done in tlb_flush_mmu.
>>>
>>> Not quite, your __p*_free_tlb() goes to pgtable_free_tlb() which call
>>> tlb_remove_table().
>>>
>>>> As per
>>>>
>>>> d86564a2f085b79ec046a5cba90188e61235280 (mm/tlb, x86/mm: Support
>>>> invalidating TLB caches for RCU_TABLE_FREE ) that is not sufficient?
>>>
>>> 96bc9567cbe1 ("asm-generic/tlb, arch: Invert CONFIG_HAVE_RCU_TABLE_INVALIDATE")
>>>
>>> And no. Since you have TABLE_NO_INVALIDATE set, tlb_remove_table() will
>>> not TLBI when it fails to allocate a batch page, which is an error for
>>> PPC-Radix.
>>>
>>> There is also no TLBI when the batch page is full and the RCU callback
>>> happens, which is also a bug on PPC-Radix.
>>
>> It seems to me you need something like this here patch, all you need to
>> add is a suitable definition of tlb_needs_table_invalidate() for Power.
> 
> I'm thinking this:
> 
> #define tlb_needs_table_invalidate()	radix_enabled()
> 
> should work for you. When you have Radix you need that TLBI, if you have
> Hash you don't.
> 

yes. I was doing something similar with #ifdef around 
tlb_table_invalidate(). I will take your approach rather than an arch 
override of tlb_table_invalidate()

-aneesh
Peter Zijlstra Dec. 17, 2019, 8:51 a.m. UTC | #12
On Mon, Dec 16, 2019 at 04:14:19PM +0100, Peter Zijlstra wrote:
> It seems to me you need something like this here patch, all you need to
> add is a suitable definition of tlb_needs_table_invalidate() for Power.

FWIW, Paul (Burton), MIPS should be able to have
tlb_needs_table_invalidate() return false when it has pure software TLB
fill. I tried to have a quick look for P5600 and P6600 to see if I could
find the right state that indicates hardware TLB, but couldn't find
anything.

> ---
> 
> diff --git a/arch/Kconfig b/arch/Kconfig
> index c44ef15866a3..98de654b79b3 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -400,10 +400,6 @@ config MMU_GATHER_RCU_TABLE_FREE
>  	bool
>  	select MMU_GATHER_TABLE_FREE
>  
> -config MMU_GATHER_NO_TABLE_INVALIDATE
> -	bool
> -	depends on MMU_GATHER_RCU_TABLE_FREE
> -
>  config MMU_GATHER_PAGE_SIZE
>  	bool
>  
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index 3dea4c8d39f2..2ddf24822d5b 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -223,7 +223,6 @@ config PPC
>  	select HAVE_PERF_REGS
>  	select HAVE_PERF_USER_STACK_DUMP
>  	select MMU_GATHER_RCU_TABLE_FREE		if SMP
> -	select MMU_GATHER_NO_TABLE_INVALIDATE	if MMU_GATHER_RCU_TABLE_FREE
>  	select MMU_GATHER_PAGE_SIZE
>  	select HAVE_REGS_AND_STACK_ACCESS_API
>  	select HAVE_RELIABLE_STACKTRACE		if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN
> diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
> index a76e915ab207..acf20b6c0a54 100644
> --- a/arch/sparc/Kconfig
> +++ b/arch/sparc/Kconfig
> @@ -66,7 +66,6 @@ config SPARC64
>  	select HAVE_KRETPROBES
>  	select HAVE_KPROBES
>  	select MMU_GATHER_RCU_TABLE_FREE if SMP
> -	select MMU_GATHER_NO_TABLE_INVALIDATE if MMU_GATHER_RCU_TABLE_FREE
>  	select HAVE_MEMBLOCK_NODE_MAP
>  	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
>  	select HAVE_DYNAMIC_FTRACE
> diff --git a/arch/sparc/include/asm/tlb_64.h b/arch/sparc/include/asm/tlb_64.h
> index a2f3fa61ee36..ac8e74a96122 100644
> --- a/arch/sparc/include/asm/tlb_64.h
> +++ b/arch/sparc/include/asm/tlb_64.h
> @@ -28,6 +28,12 @@ void flush_tlb_pending(void);
>  #define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
>  #define tlb_flush(tlb)	flush_tlb_pending()
>  
> +/*
> + * SPARC64's hardware TLB fill does not use the Linux page-tables
> + * and therefore we don't need a TLBI when freeing page-table pages.
> + */
> +#define tlb_needs_table_invalidate()	(false)
> +
>  #include <asm-generic/tlb.h>
>  
>  #endif /* _SPARC64_TLB_H */
> diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
> index fe0ea6ff3636..4108d6d18ca5 100644
> --- a/include/asm-generic/tlb.h
> +++ b/include/asm-generic/tlb.h
> @@ -156,13 +156,6 @@
>   *  Useful if your architecture doesn't use IPIs for remote TLB invalidates
>   *  and therefore doesn't naturally serialize with software page-table walkers.
>   *
> - *  MMU_GATHER_NO_TABLE_INVALIDATE
> - *
> - *  This makes MMU_GATHER_RCU_TABLE_FREE avoid calling tlb_flush_mmu_tlbonly()
> - *  before freeing the page-table pages. This can be avoided if you use
> - *  MMU_GATHER_RCU_TABLE_FREE and your architecture does _NOT_ use the Linux
> - *  page-tables natively.
> - *
>   *  MMU_GATHER_NO_RANGE
>   *
>   *  Use this if your architecture lacks an efficient flush_tlb_range().
> @@ -203,6 +196,24 @@ extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
>  
>  #endif /* CONFIG_MMU_GATHER_TABLE_FREE */
>  
> +#ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE
> +
> +/*
> + * This allows an architecture that does not use the linux page-tables for
> + * hardware to skip the TLBI when freeing page tables.
> + */
> +#ifndef tlb_needs_table_invalidate
> +#define tlb_needs_table_invalidate() (true)
> +#endif
> +
> +#else
> +
> +#ifdef tlb_needs_table_invalidate
> +#error tlb_needs_table_invalidate() requires MMU_GATHER_RCU_TABLE_FREE
> +#endif
> +
> +#endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */
> +
>  #ifndef CONFIG_MMU_GATHER_NO_GATHER
>  /*
>   * If we can't allocate a page to make a big batch of page pointers
> diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
> index 9d103031568d..a3538cb2bcbe 100644
> --- a/mm/mmu_gather.c
> +++ b/mm/mmu_gather.c
> @@ -177,14 +177,14 @@ static void tlb_remove_table_free(struct mmu_table_batch *batch)
>   */
>  static inline void tlb_table_invalidate(struct mmu_gather *tlb)
>  {
> -#ifndef CONFIG_MMU_GATHER_NO_TABLE_INVALIDATE
> -	/*
> -	 * Invalidate page-table caches used by hardware walkers. Then we still
> -	 * need to RCU-sched wait while freeing the pages because software
> -	 * walkers can still be in-flight.
> -	 */
> -	tlb_flush_mmu_tlbonly(tlb);
> -#endif
> +	if (tlb_needs_table_invalidate()) {
> +		/*
> +		 * Invalidate page-table caches used by hardware walkers. Then
> +		 * we still need to RCU-sched wait while freeing the pages
> +		 * because software walkers can still be in-flight.
> +		 */
> +		tlb_flush_mmu_tlbonly(tlb);
> +	}
>  }
>  
>  static void tlb_remove_table_one(void *table)
diff mbox series

Patch

--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -396,8 +396,9 @@  config HAVE_ARCH_JUMP_LABEL_RELATIVE
 config MMU_GATHER_RCU_TABLE_FREE
 	bool
 
-config HAVE_RCU_TABLE_NO_INVALIDATE
+config MMU_GATHER_NO_TABLE_INVALIDATE
 	bool
+	depends on MMU_GATHER_RCU_TABLE_FREE
 
 config HAVE_MMU_GATHER_PAGE_SIZE
 	bool
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -223,7 +223,7 @@  config PPC
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
 	select MMU_GATHER_RCU_TABLE_FREE		if SMP
-	select HAVE_RCU_TABLE_NO_INVALIDATE	if MMU_GATHER_RCU_TABLE_FREE
+	select MMU_GATHER_NO_TABLE_INVALIDATE	if MMU_GATHER_RCU_TABLE_FREE
 	select HAVE_MMU_GATHER_PAGE_SIZE
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_RELIABLE_STACKTRACE		if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -65,7 +65,7 @@  config SPARC64
 	select HAVE_KRETPROBES
 	select HAVE_KPROBES
 	select MMU_GATHER_RCU_TABLE_FREE if SMP
-	select HAVE_RCU_TABLE_NO_INVALIDATE if MMU_GATHER_RCU_TABLE_FREE
+	select MMU_GATHER_NO_TABLE_INVALIDATE if MMU_GATHER_RCU_TABLE_FREE
 	select HAVE_MEMBLOCK_NODE_MAP
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	select HAVE_DYNAMIC_FTRACE
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -137,7 +137,7 @@ 
  *  When used, an architecture is expected to provide __tlb_remove_table()
  *  which does the actual freeing of these pages.
  *
- *  HAVE_RCU_TABLE_NO_INVALIDATE
+ *  MMU_GATHER_NO_TABLE_INVALIDATE
  *
  *  This makes MMU_GATHER_RCU_TABLE_FREE avoid calling tlb_flush_mmu_tlbonly() before
  *  freeing the page-table pages. This can be avoided if you use
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -102,7 +102,7 @@  bool __tlb_remove_page_size(struct mmu_g
  */
 static inline void tlb_table_invalidate(struct mmu_gather *tlb)
 {
-#ifndef CONFIG_HAVE_RCU_TABLE_NO_INVALIDATE
+#ifndef CONFIG_MMU_GATHER_NO_TABLE_INVALIDATE
 	/*
 	 * Invalidate page-table caches used by hardware walkers. Then we still
 	 * need to RCU-sched wait while freeing the pages because software