diff mbox series

[v3,3/3] arm64: mm: enable per pmd page table lock

Message ID 20190310011906.254635-3-yuzhao@google.com (mailing list archive)
State New, archived
Headers show
Series [v3,1/3] arm64: mm: use appropriate ctors for page tables | expand

Commit Message

Yu Zhao March 10, 2019, 1:19 a.m. UTC
Switch from per mm_struct to per pmd page table lock by enabling
ARCH_ENABLE_SPLIT_PMD_PTLOCK. This provides better granularity for
large system.

I'm not sure if there is contention on mm->page_table_lock. Given
the option comes at no cost (apart from initializing more spin
locks), why not enable it now.

We only do so when pmd is not folded, so we don't mistakenly call
pgtable_pmd_page_ctor() on pud or p4d in pgd_pgtable_alloc(). (We
check shift against PMD_SHIFT, which is same as PUD_SHIFT when pmd
is folded).

Signed-off-by: Yu Zhao <yuzhao@google.com>
---
 arch/arm64/Kconfig               |  3 +++
 arch/arm64/include/asm/pgalloc.h | 12 +++++++++++-
 arch/arm64/include/asm/tlb.h     |  5 ++++-
 3 files changed, 18 insertions(+), 2 deletions(-)

Comments

Anshuman Khandual March 11, 2019, 8:28 a.m. UTC | #1
On 03/10/2019 06:49 AM, Yu Zhao wrote:
> Switch from per mm_struct to per pmd page table lock by enabling
> ARCH_ENABLE_SPLIT_PMD_PTLOCK. This provides better granularity for
> large system.
> 
> I'm not sure if there is contention on mm->page_table_lock. Given
> the option comes at no cost (apart from initializing more spin
> locks), why not enable it now.
> 
> We only do so when pmd is not folded, so we don't mistakenly call
> pgtable_pmd_page_ctor() on pud or p4d in pgd_pgtable_alloc(). (We
> check shift against PMD_SHIFT, which is same as PUD_SHIFT when pmd
> is folded).
> 
> Signed-off-by: Yu Zhao <yuzhao@google.com>
> ---
>  arch/arm64/Kconfig               |  3 +++
>  arch/arm64/include/asm/pgalloc.h | 12 +++++++++++-
>  arch/arm64/include/asm/tlb.h     |  5 ++++-
>  3 files changed, 18 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index cfbf307d6dc4..a3b1b789f766 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -872,6 +872,9 @@ config ARCH_WANT_HUGE_PMD_SHARE
>  config ARCH_HAS_CACHE_LINE_SIZE
>  	def_bool y
>  
> +config ARCH_ENABLE_SPLIT_PMD_PTLOCK
> +	def_bool y if PGTABLE_LEVELS > 2
> +
>  config SECCOMP
>  	bool "Enable seccomp to safely compute untrusted bytecode"
>  	---help---
> diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
> index 52fa47c73bf0..dabba4b2c61f 100644
> --- a/arch/arm64/include/asm/pgalloc.h
> +++ b/arch/arm64/include/asm/pgalloc.h
> @@ -33,12 +33,22 @@
>  
>  static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
>  {
> -	return (pmd_t *)__get_free_page(PGALLOC_GFP);
> +	struct page *page;
> +
> +	page = alloc_page(PGALLOC_GFP);
> +	if (!page)
> +		return NULL;
> +	if (!pgtable_pmd_page_ctor(page)) {
> +		__free_page(page);
> +		return NULL;
> +	}
> +	return page_address(page);
>  }
>  
>  static inline void pmd_free(struct mm_struct *mm, pmd_t *pmdp)
>  {
>  	BUG_ON((unsigned long)pmdp & (PAGE_SIZE-1));
> +	pgtable_pmd_page_dtor(virt_to_page(pmdp));
>  	free_page((unsigned long)pmdp);
>  }

There is just one problem here. ARM KVM's stage2_pmd_free() calls into pmd_free() on a page
originally allocated with __get_free_page() and never went through pgtable_pmd_page_ctor().
So when ARCH_ENABLE_SPLIT_PMD_PTLOCK is enabled

stage2_pmd_free()
	pgtable_pmd_page_dtor()
		ptlock_free()
			kmem_cache_free(page_ptl_cachep, page->ptl)

Though SLUB implementation for kmem_cache_free() seems to be handling NULL page->ptl (as the
page never got it's lock allocated or initialized) correctly I am not sure if it is a right
thing to do.
Mark Rutland March 11, 2019, 12:12 p.m. UTC | #2
Hi,

On Sat, Mar 09, 2019 at 06:19:06PM -0700, Yu Zhao wrote:
> Switch from per mm_struct to per pmd page table lock by enabling
> ARCH_ENABLE_SPLIT_PMD_PTLOCK. This provides better granularity for
> large system.
> 
> I'm not sure if there is contention on mm->page_table_lock. Given
> the option comes at no cost (apart from initializing more spin
> locks), why not enable it now.
> 
> We only do so when pmd is not folded, so we don't mistakenly call
> pgtable_pmd_page_ctor() on pud or p4d in pgd_pgtable_alloc(). (We
> check shift against PMD_SHIFT, which is same as PUD_SHIFT when pmd
> is folded).

Just to check, I take it pgtable_pmd_page_ctor() is now a NOP when the
PMD is folded, and this last paragraph is stale?

> Signed-off-by: Yu Zhao <yuzhao@google.com>
> ---
>  arch/arm64/Kconfig               |  3 +++
>  arch/arm64/include/asm/pgalloc.h | 12 +++++++++++-
>  arch/arm64/include/asm/tlb.h     |  5 ++++-
>  3 files changed, 18 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index cfbf307d6dc4..a3b1b789f766 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -872,6 +872,9 @@ config ARCH_WANT_HUGE_PMD_SHARE
>  config ARCH_HAS_CACHE_LINE_SIZE
>  	def_bool y
>  
> +config ARCH_ENABLE_SPLIT_PMD_PTLOCK
> +	def_bool y if PGTABLE_LEVELS > 2
> +
>  config SECCOMP
>  	bool "Enable seccomp to safely compute untrusted bytecode"
>  	---help---
> diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
> index 52fa47c73bf0..dabba4b2c61f 100644
> --- a/arch/arm64/include/asm/pgalloc.h
> +++ b/arch/arm64/include/asm/pgalloc.h
> @@ -33,12 +33,22 @@
>  
>  static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
>  {
> -	return (pmd_t *)__get_free_page(PGALLOC_GFP);
> +	struct page *page;
> +
> +	page = alloc_page(PGALLOC_GFP);
> +	if (!page)
> +		return NULL;
> +	if (!pgtable_pmd_page_ctor(page)) {
> +		__free_page(page);
> +		return NULL;
> +	}
> +	return page_address(page);
>  }
>  
>  static inline void pmd_free(struct mm_struct *mm, pmd_t *pmdp)
>  {
>  	BUG_ON((unsigned long)pmdp & (PAGE_SIZE-1));
> +	pgtable_pmd_page_dtor(virt_to_page(pmdp));
>  	free_page((unsigned long)pmdp);
>  }

It looks like arm64's existing stage-2 code is inconsistent across
alloc/free, and IIUC this change might turn that into a real problem.
Currently we allocate all levels of stage-2 table with
__get_free_page(), but free them with p?d_free(). We always miss the
ctor and always use the dtor.

Other than that, this patch looks fine to me, but I'd feel more
comfortable if we could first fix the stage-2 code to free those stage-2
tables without invoking the dtor.

Anshuman, IIRC you had a patch to fix the stage-2 code to not invoke the
dtors. If so, could you please post that so that we could take it as a
preparatory patch for this series?

Thanks,
Mark.

> diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
> index 106fdc951b6e..4e3becfed387 100644
> --- a/arch/arm64/include/asm/tlb.h
> +++ b/arch/arm64/include/asm/tlb.h
> @@ -62,7 +62,10 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
>  static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
>  				  unsigned long addr)
>  {
> -	tlb_remove_table(tlb, virt_to_page(pmdp));
> +	struct page *page = virt_to_page(pmdp);
> +
> +	pgtable_pmd_page_dtor(page);
> +	tlb_remove_table(tlb, page);
>  }
>  #endif
>  
> -- 
> 2.21.0.360.g471c308f928-goog
>
Anshuman Khandual March 11, 2019, 12:57 p.m. UTC | #3
On 03/11/2019 05:42 PM, Mark Rutland wrote:
> Hi,
> 
> On Sat, Mar 09, 2019 at 06:19:06PM -0700, Yu Zhao wrote:
>> Switch from per mm_struct to per pmd page table lock by enabling
>> ARCH_ENABLE_SPLIT_PMD_PTLOCK. This provides better granularity for
>> large system.
>>
>> I'm not sure if there is contention on mm->page_table_lock. Given
>> the option comes at no cost (apart from initializing more spin
>> locks), why not enable it now.
>>
>> We only do so when pmd is not folded, so we don't mistakenly call
>> pgtable_pmd_page_ctor() on pud or p4d in pgd_pgtable_alloc(). (We
>> check shift against PMD_SHIFT, which is same as PUD_SHIFT when pmd
>> is folded).
> 
> Just to check, I take it pgtable_pmd_page_ctor() is now a NOP when the
> PMD is folded, and this last paragraph is stale?
> 
>> Signed-off-by: Yu Zhao <yuzhao@google.com>
>> ---
>>  arch/arm64/Kconfig               |  3 +++
>>  arch/arm64/include/asm/pgalloc.h | 12 +++++++++++-
>>  arch/arm64/include/asm/tlb.h     |  5 ++++-
>>  3 files changed, 18 insertions(+), 2 deletions(-)
>>
>> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
>> index cfbf307d6dc4..a3b1b789f766 100644
>> --- a/arch/arm64/Kconfig
>> +++ b/arch/arm64/Kconfig
>> @@ -872,6 +872,9 @@ config ARCH_WANT_HUGE_PMD_SHARE
>>  config ARCH_HAS_CACHE_LINE_SIZE
>>  	def_bool y
>>  
>> +config ARCH_ENABLE_SPLIT_PMD_PTLOCK
>> +	def_bool y if PGTABLE_LEVELS > 2
>> +
>>  config SECCOMP
>>  	bool "Enable seccomp to safely compute untrusted bytecode"
>>  	---help---
>> diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
>> index 52fa47c73bf0..dabba4b2c61f 100644
>> --- a/arch/arm64/include/asm/pgalloc.h
>> +++ b/arch/arm64/include/asm/pgalloc.h
>> @@ -33,12 +33,22 @@
>>  
>>  static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
>>  {
>> -	return (pmd_t *)__get_free_page(PGALLOC_GFP);
>> +	struct page *page;
>> +
>> +	page = alloc_page(PGALLOC_GFP);
>> +	if (!page)
>> +		return NULL;
>> +	if (!pgtable_pmd_page_ctor(page)) {
>> +		__free_page(page);
>> +		return NULL;
>> +	}
>> +	return page_address(page);
>>  }
>>  
>>  static inline void pmd_free(struct mm_struct *mm, pmd_t *pmdp)
>>  {
>>  	BUG_ON((unsigned long)pmdp & (PAGE_SIZE-1));
>> +	pgtable_pmd_page_dtor(virt_to_page(pmdp));
>>  	free_page((unsigned long)pmdp);
>>  }
> 
> It looks like arm64's existing stage-2 code is inconsistent across
> alloc/free, and IIUC this change might turn that into a real problem.
> Currently we allocate all levels of stage-2 table with
> __get_free_page(), but free them with p?d_free(). We always miss the
> ctor and always use the dtor.
> 
> Other than that, this patch looks fine to me, but I'd feel more
> comfortable if we could first fix the stage-2 code to free those stage-2
> tables without invoking the dtor.

Thats right. I have already highlighted this problem.
 
> 
> Anshuman, IIRC you had a patch to fix the stage-2 code to not invoke the
> dtors. If so, could you please post that so that we could take it as a
> preparatory patch for this series?

Sure I can after fixing PTE level pte_free_kernel/__free_page which I had
missed in V2.

https://www.spinics.net/lists/arm-kernel/msg710118.html
Yu Zhao March 11, 2019, 11:10 p.m. UTC | #4
On Mon, Mar 11, 2019 at 01:58:27PM +0530, Anshuman Khandual wrote:
> On 03/10/2019 06:49 AM, Yu Zhao wrote:
> > Switch from per mm_struct to per pmd page table lock by enabling
> > ARCH_ENABLE_SPLIT_PMD_PTLOCK. This provides better granularity for
> > large system.
> > 
> > I'm not sure if there is contention on mm->page_table_lock. Given
> > the option comes at no cost (apart from initializing more spin
> > locks), why not enable it now.
> > 
> > We only do so when pmd is not folded, so we don't mistakenly call
> > pgtable_pmd_page_ctor() on pud or p4d in pgd_pgtable_alloc(). (We
> > check shift against PMD_SHIFT, which is same as PUD_SHIFT when pmd
> > is folded).
> > 
> > Signed-off-by: Yu Zhao <yuzhao@google.com>
> > ---
> >  arch/arm64/Kconfig               |  3 +++
> >  arch/arm64/include/asm/pgalloc.h | 12 +++++++++++-
> >  arch/arm64/include/asm/tlb.h     |  5 ++++-
> >  3 files changed, 18 insertions(+), 2 deletions(-)
> > 
> > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> > index cfbf307d6dc4..a3b1b789f766 100644
> > --- a/arch/arm64/Kconfig
> > +++ b/arch/arm64/Kconfig
> > @@ -872,6 +872,9 @@ config ARCH_WANT_HUGE_PMD_SHARE
> >  config ARCH_HAS_CACHE_LINE_SIZE
> >  	def_bool y
> >  
> > +config ARCH_ENABLE_SPLIT_PMD_PTLOCK
> > +	def_bool y if PGTABLE_LEVELS > 2
> > +
> >  config SECCOMP
> >  	bool "Enable seccomp to safely compute untrusted bytecode"
> >  	---help---
> > diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
> > index 52fa47c73bf0..dabba4b2c61f 100644
> > --- a/arch/arm64/include/asm/pgalloc.h
> > +++ b/arch/arm64/include/asm/pgalloc.h
> > @@ -33,12 +33,22 @@
> >  
> >  static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
> >  {
> > -	return (pmd_t *)__get_free_page(PGALLOC_GFP);
> > +	struct page *page;
> > +
> > +	page = alloc_page(PGALLOC_GFP);
> > +	if (!page)
> > +		return NULL;
> > +	if (!pgtable_pmd_page_ctor(page)) {
> > +		__free_page(page);
> > +		return NULL;
> > +	}
> > +	return page_address(page);
> >  }
> >  
> >  static inline void pmd_free(struct mm_struct *mm, pmd_t *pmdp)
> >  {
> >  	BUG_ON((unsigned long)pmdp & (PAGE_SIZE-1));
> > +	pgtable_pmd_page_dtor(virt_to_page(pmdp));
> >  	free_page((unsigned long)pmdp);
> >  }
> 
> There is just one problem here. ARM KVM's stage2_pmd_free() calls into pmd_free() on a page
> originally allocated with __get_free_page() and never went through pgtable_pmd_page_ctor().
> So when ARCH_ENABLE_SPLIT_PMD_PTLOCK is enabled
> 
> stage2_pmd_free()
> 	pgtable_pmd_page_dtor()
> 		ptlock_free()
> 			kmem_cache_free(page_ptl_cachep, page->ptl)
> 
> Though SLUB implementation for kmem_cache_free() seems to be handling NULL page->ptl (as the
> page never got it's lock allocated or initialized) correctly I am not sure if it is a right
> thing to do.

Thanks for reminding me. This should be fixed as well. Will do it
in a separate patch.
Yu Zhao March 11, 2019, 11:11 p.m. UTC | #5
On Mon, Mar 11, 2019 at 12:12:28PM +0000, Mark Rutland wrote:
> Hi,
> 
> On Sat, Mar 09, 2019 at 06:19:06PM -0700, Yu Zhao wrote:
> > Switch from per mm_struct to per pmd page table lock by enabling
> > ARCH_ENABLE_SPLIT_PMD_PTLOCK. This provides better granularity for
> > large system.
> > 
> > I'm not sure if there is contention on mm->page_table_lock. Given
> > the option comes at no cost (apart from initializing more spin
> > locks), why not enable it now.
> > 
> > We only do so when pmd is not folded, so we don't mistakenly call
> > pgtable_pmd_page_ctor() on pud or p4d in pgd_pgtable_alloc(). (We
> > check shift against PMD_SHIFT, which is same as PUD_SHIFT when pmd
> > is folded).
> 
> Just to check, I take it pgtable_pmd_page_ctor() is now a NOP when the
> PMD is folded, and this last paragraph is stale?

Yes, and will remove it.

> > Signed-off-by: Yu Zhao <yuzhao@google.com>
> > ---
> >  arch/arm64/Kconfig               |  3 +++
> >  arch/arm64/include/asm/pgalloc.h | 12 +++++++++++-
> >  arch/arm64/include/asm/tlb.h     |  5 ++++-
> >  3 files changed, 18 insertions(+), 2 deletions(-)
> > 
> > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> > index cfbf307d6dc4..a3b1b789f766 100644
> > --- a/arch/arm64/Kconfig
> > +++ b/arch/arm64/Kconfig
> > @@ -872,6 +872,9 @@ config ARCH_WANT_HUGE_PMD_SHARE
> >  config ARCH_HAS_CACHE_LINE_SIZE
> >  	def_bool y
> >  
> > +config ARCH_ENABLE_SPLIT_PMD_PTLOCK
> > +	def_bool y if PGTABLE_LEVELS > 2
> > +
> >  config SECCOMP
> >  	bool "Enable seccomp to safely compute untrusted bytecode"
> >  	---help---
> > diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
> > index 52fa47c73bf0..dabba4b2c61f 100644
> > --- a/arch/arm64/include/asm/pgalloc.h
> > +++ b/arch/arm64/include/asm/pgalloc.h
> > @@ -33,12 +33,22 @@
> >  
> >  static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
> >  {
> > -	return (pmd_t *)__get_free_page(PGALLOC_GFP);
> > +	struct page *page;
> > +
> > +	page = alloc_page(PGALLOC_GFP);
> > +	if (!page)
> > +		return NULL;
> > +	if (!pgtable_pmd_page_ctor(page)) {
> > +		__free_page(page);
> > +		return NULL;
> > +	}
> > +	return page_address(page);
> >  }
> >  
> >  static inline void pmd_free(struct mm_struct *mm, pmd_t *pmdp)
> >  {
> >  	BUG_ON((unsigned long)pmdp & (PAGE_SIZE-1));
> > +	pgtable_pmd_page_dtor(virt_to_page(pmdp));
> >  	free_page((unsigned long)pmdp);
> >  }
> 
> It looks like arm64's existing stage-2 code is inconsistent across
> alloc/free, and IIUC this change might turn that into a real problem.
> Currently we allocate all levels of stage-2 table with
> __get_free_page(), but free them with p?d_free(). We always miss the
> ctor and always use the dtor.
> 
> Other than that, this patch looks fine to me, but I'd feel more
> comfortable if we could first fix the stage-2 code to free those stage-2
> tables without invoking the dtor.
> 
> Anshuman, IIRC you had a patch to fix the stage-2 code to not invoke the
> dtors. If so, could you please post that so that we could take it as a
> preparatory patch for this series?

Will do.

> Thanks,
> Mark.
> 
> > diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
> > index 106fdc951b6e..4e3becfed387 100644
> > --- a/arch/arm64/include/asm/tlb.h
> > +++ b/arch/arm64/include/asm/tlb.h
> > @@ -62,7 +62,10 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
> >  static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
> >  				  unsigned long addr)
> >  {
> > -	tlb_remove_table(tlb, virt_to_page(pmdp));
> > +	struct page *page = virt_to_page(pmdp);
> > +
> > +	pgtable_pmd_page_dtor(page);
> > +	tlb_remove_table(tlb, page);
> >  }
> >  #endif
> >  
> > -- 
> > 2.21.0.360.g471c308f928-goog
> >
diff mbox series

Patch

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index cfbf307d6dc4..a3b1b789f766 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -872,6 +872,9 @@  config ARCH_WANT_HUGE_PMD_SHARE
 config ARCH_HAS_CACHE_LINE_SIZE
 	def_bool y
 
+config ARCH_ENABLE_SPLIT_PMD_PTLOCK
+	def_bool y if PGTABLE_LEVELS > 2
+
 config SECCOMP
 	bool "Enable seccomp to safely compute untrusted bytecode"
 	---help---
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 52fa47c73bf0..dabba4b2c61f 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -33,12 +33,22 @@ 
 
 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
 {
-	return (pmd_t *)__get_free_page(PGALLOC_GFP);
+	struct page *page;
+
+	page = alloc_page(PGALLOC_GFP);
+	if (!page)
+		return NULL;
+	if (!pgtable_pmd_page_ctor(page)) {
+		__free_page(page);
+		return NULL;
+	}
+	return page_address(page);
 }
 
 static inline void pmd_free(struct mm_struct *mm, pmd_t *pmdp)
 {
 	BUG_ON((unsigned long)pmdp & (PAGE_SIZE-1));
+	pgtable_pmd_page_dtor(virt_to_page(pmdp));
 	free_page((unsigned long)pmdp);
 }
 
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index 106fdc951b6e..4e3becfed387 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -62,7 +62,10 @@  static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
 static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
 				  unsigned long addr)
 {
-	tlb_remove_table(tlb, virt_to_page(pmdp));
+	struct page *page = virt_to_page(pmdp);
+
+	pgtable_pmd_page_dtor(page);
+	tlb_remove_table(tlb, page);
 }
 #endif