diff mbox series

[RFC,v3,31/35] khugepaged: arm64: Don't collapse MTE enabled VMAs

Message ID 20240125164256.4147-32-alexandru.elisei@arm.com (mailing list archive)
State New, archived
Headers show
Series Add support for arm64 MTE dynamic tag storage reuse | expand

Commit Message

Alexandru Elisei Jan. 25, 2024, 4:42 p.m. UTC
copy_user_highpage() will do memory allocation if there are saved tags for
the destination page, and the page is missing tag storage.

After commit a349d72fd9ef ("mm/pgtable: add rcu_read_lock() and
rcu_read_unlock()s"), collapse_huge_page() calls
__collapse_huge_page_copy() -> .. -> copy_user_highpage() with the RCU lock
held, which means that copy_user_highpage() can only allocate memory using
GFP_ATOMIC or equivalent.

Get around this by refusing to collapse pages into a transparent huge page
if the VMA is MTE-enabled.

Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
---

Changes since rfc v2:

* New patch. I think an agreement on whether copy*_user_highpage() should be
always allowed to sleep, or should not be allowed, would be useful.

 arch/arm64/include/asm/pgtable.h    | 3 +++
 arch/arm64/kernel/mte_tag_storage.c | 5 +++++
 include/linux/khugepaged.h          | 5 +++++
 mm/khugepaged.c                     | 4 ++++
 4 files changed, 17 insertions(+)

Comments

Anshuman Khandual Feb. 1, 2024, 8:12 a.m. UTC | #1
On 1/25/24 22:12, Alexandru Elisei wrote:
> copy_user_highpage() will do memory allocation if there are saved tags for
> the destination page, and the page is missing tag storage.
> 
> After commit a349d72fd9ef ("mm/pgtable: add rcu_read_lock() and
> rcu_read_unlock()s"), collapse_huge_page() calls
> __collapse_huge_page_copy() -> .. -> copy_user_highpage() with the RCU lock
> held, which means that copy_user_highpage() can only allocate memory using
> GFP_ATOMIC or equivalent.
> 
> Get around this by refusing to collapse pages into a transparent huge page
> if the VMA is MTE-enabled.

Makes sense when copy_user_highpage() will allocate memory for tag storage.

> 
> Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
> ---
> 
> Changes since rfc v2:
> 
> * New patch. I think an agreement on whether copy*_user_highpage() should be
> always allowed to sleep, or should not be allowed, would be useful.

This is a good question ! Even after preventing the collapse of MTE VMA here,
there still might be more paths where a sleeping (i.e memory allocating)
copy*_user_highpage() becomes problematic ?

> 
>  arch/arm64/include/asm/pgtable.h    | 3 +++
>  arch/arm64/kernel/mte_tag_storage.c | 5 +++++
>  include/linux/khugepaged.h          | 5 +++++
>  mm/khugepaged.c                     | 4 ++++
>  4 files changed, 17 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> index 87ae59436162..d0473538c926 100644
> --- a/arch/arm64/include/asm/pgtable.h
> +++ b/arch/arm64/include/asm/pgtable.h
> @@ -1120,6 +1120,9 @@ static inline bool arch_alloc_cma(gfp_t gfp_mask)
>  	return true;
>  }
>  
> +bool arch_hugepage_vma_revalidate(struct vm_area_struct *vma, unsigned long address);
> +#define arch_hugepage_vma_revalidate arch_hugepage_vma_revalidate
> +
>  #endif /* CONFIG_ARM64_MTE_TAG_STORAGE */
>  #endif /* CONFIG_ARM64_MTE */
>  
> diff --git a/arch/arm64/kernel/mte_tag_storage.c b/arch/arm64/kernel/mte_tag_storage.c
> index ac7b9c9c585c..a99959b70573 100644
> --- a/arch/arm64/kernel/mte_tag_storage.c
> +++ b/arch/arm64/kernel/mte_tag_storage.c
> @@ -636,3 +636,8 @@ void arch_alloc_page(struct page *page, int order, gfp_t gfp)
>  	if (tag_storage_enabled() && alloc_requires_tag_storage(gfp))
>  		reserve_tag_storage(page, order, gfp);
>  }
> +
> +bool arch_hugepage_vma_revalidate(struct vm_area_struct *vma, unsigned long address)
> +{
> +	return !(vma->vm_flags & VM_MTE);
> +}
> diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
> index f68865e19b0b..461e4322dff2 100644
> --- a/include/linux/khugepaged.h
> +++ b/include/linux/khugepaged.h
> @@ -38,6 +38,11 @@ static inline void khugepaged_exit(struct mm_struct *mm)
>  	if (test_bit(MMF_VM_HUGEPAGE, &mm->flags))
>  		__khugepaged_exit(mm);
>  }
> +
> +#ifndef arch_hugepage_vma_revalidate
> +#define arch_hugepage_vma_revalidate(vma, address) 1

Please replace s/1/true as arch_hugepage_vma_revalidate() returns bool ?

> +#endif

Right, above construct is much better than __HAVE_ARCH_XXXX based one.

> +
>  #else /* CONFIG_TRANSPARENT_HUGEPAGE */
>  static inline void khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm)
>  {
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 2b219acb528e..cb9a9ddb4d86 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -935,6 +935,10 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
>  	 */
>  	if (expect_anon && (!(*vmap)->anon_vma || !vma_is_anonymous(*vmap)))
>  		return SCAN_PAGE_ANON;
> +
> +	if (!arch_hugepage_vma_revalidate(vma, address))
> +		return SCAN_VMA_CHECK;
> +
>  	return SCAN_SUCCEED;
>  }
>  

Otherwise this LGTM.
Alexandru Elisei Feb. 1, 2024, 5:38 p.m. UTC | #2
On Thu, Feb 01, 2024 at 01:42:08PM +0530, Anshuman Khandual wrote:
> 
> 
> On 1/25/24 22:12, Alexandru Elisei wrote:
> > copy_user_highpage() will do memory allocation if there are saved tags for
> > the destination page, and the page is missing tag storage.
> > 
> > After commit a349d72fd9ef ("mm/pgtable: add rcu_read_lock() and
> > rcu_read_unlock()s"), collapse_huge_page() calls
> > __collapse_huge_page_copy() -> .. -> copy_user_highpage() with the RCU lock
> > held, which means that copy_user_highpage() can only allocate memory using
> > GFP_ATOMIC or equivalent.
> > 
> > Get around this by refusing to collapse pages into a transparent huge page
> > if the VMA is MTE-enabled.
> 
> Makes sense when copy_user_highpage() will allocate memory for tag storage.
> 
> > 
> > Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
> > ---
> > 
> > Changes since rfc v2:
> > 
> > * New patch. I think an agreement on whether copy*_user_highpage() should be
> > always allowed to sleep, or should not be allowed, would be useful.
> 
> This is a good question ! Even after preventing the collapse of MTE VMA here,
> there still might be more paths where a sleeping (i.e memory allocating)
> copy*_user_highpage() becomes problematic ?

Exactly!

> 
> > 
> >  arch/arm64/include/asm/pgtable.h    | 3 +++
> >  arch/arm64/kernel/mte_tag_storage.c | 5 +++++
> >  include/linux/khugepaged.h          | 5 +++++
> >  mm/khugepaged.c                     | 4 ++++
> >  4 files changed, 17 insertions(+)
> > 
> > diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
> > index 87ae59436162..d0473538c926 100644
> > --- a/arch/arm64/include/asm/pgtable.h
> > +++ b/arch/arm64/include/asm/pgtable.h
> > @@ -1120,6 +1120,9 @@ static inline bool arch_alloc_cma(gfp_t gfp_mask)
> >  	return true;
> >  }
> >  
> > +bool arch_hugepage_vma_revalidate(struct vm_area_struct *vma, unsigned long address);
> > +#define arch_hugepage_vma_revalidate arch_hugepage_vma_revalidate
> > +
> >  #endif /* CONFIG_ARM64_MTE_TAG_STORAGE */
> >  #endif /* CONFIG_ARM64_MTE */
> >  
> > diff --git a/arch/arm64/kernel/mte_tag_storage.c b/arch/arm64/kernel/mte_tag_storage.c
> > index ac7b9c9c585c..a99959b70573 100644
> > --- a/arch/arm64/kernel/mte_tag_storage.c
> > +++ b/arch/arm64/kernel/mte_tag_storage.c
> > @@ -636,3 +636,8 @@ void arch_alloc_page(struct page *page, int order, gfp_t gfp)
> >  	if (tag_storage_enabled() && alloc_requires_tag_storage(gfp))
> >  		reserve_tag_storage(page, order, gfp);
> >  }
> > +
> > +bool arch_hugepage_vma_revalidate(struct vm_area_struct *vma, unsigned long address)
> > +{
> > +	return !(vma->vm_flags & VM_MTE);
> > +}
> > diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
> > index f68865e19b0b..461e4322dff2 100644
> > --- a/include/linux/khugepaged.h
> > +++ b/include/linux/khugepaged.h
> > @@ -38,6 +38,11 @@ static inline void khugepaged_exit(struct mm_struct *mm)
> >  	if (test_bit(MMF_VM_HUGEPAGE, &mm->flags))
> >  		__khugepaged_exit(mm);
> >  }
> > +
> > +#ifndef arch_hugepage_vma_revalidate
> > +#define arch_hugepage_vma_revalidate(vma, address) 1
> 
> Please replace s/1/true as arch_hugepage_vma_revalidate() returns bool ?

Yeah, that's strange, I don't know why I used 1 there. Will change it to true,
thanks for spotting it.

> 
> > +#endif
> 
> Right, above construct is much better than __HAVE_ARCH_XXXX based one.

Thanks!

Alex

> 
> > +
> >  #else /* CONFIG_TRANSPARENT_HUGEPAGE */
> >  static inline void khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm)
> >  {
> > diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> > index 2b219acb528e..cb9a9ddb4d86 100644
> > --- a/mm/khugepaged.c
> > +++ b/mm/khugepaged.c
> > @@ -935,6 +935,10 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
> >  	 */
> >  	if (expect_anon && (!(*vmap)->anon_vma || !vma_is_anonymous(*vmap)))
> >  		return SCAN_PAGE_ANON;
> > +
> > +	if (!arch_hugepage_vma_revalidate(vma, address))
> > +		return SCAN_VMA_CHECK;
> > +
> >  	return SCAN_SUCCEED;
> >  }
> >  
> 
> Otherwise this LGTM.
diff mbox series

Patch

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 87ae59436162..d0473538c926 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -1120,6 +1120,9 @@  static inline bool arch_alloc_cma(gfp_t gfp_mask)
 	return true;
 }
 
+bool arch_hugepage_vma_revalidate(struct vm_area_struct *vma, unsigned long address);
+#define arch_hugepage_vma_revalidate arch_hugepage_vma_revalidate
+
 #endif /* CONFIG_ARM64_MTE_TAG_STORAGE */
 #endif /* CONFIG_ARM64_MTE */
 
diff --git a/arch/arm64/kernel/mte_tag_storage.c b/arch/arm64/kernel/mte_tag_storage.c
index ac7b9c9c585c..a99959b70573 100644
--- a/arch/arm64/kernel/mte_tag_storage.c
+++ b/arch/arm64/kernel/mte_tag_storage.c
@@ -636,3 +636,8 @@  void arch_alloc_page(struct page *page, int order, gfp_t gfp)
 	if (tag_storage_enabled() && alloc_requires_tag_storage(gfp))
 		reserve_tag_storage(page, order, gfp);
 }
+
+bool arch_hugepage_vma_revalidate(struct vm_area_struct *vma, unsigned long address)
+{
+	return !(vma->vm_flags & VM_MTE);
+}
diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index f68865e19b0b..461e4322dff2 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -38,6 +38,11 @@  static inline void khugepaged_exit(struct mm_struct *mm)
 	if (test_bit(MMF_VM_HUGEPAGE, &mm->flags))
 		__khugepaged_exit(mm);
 }
+
+#ifndef arch_hugepage_vma_revalidate
+#define arch_hugepage_vma_revalidate(vma, address) 1
+#endif
+
 #else /* CONFIG_TRANSPARENT_HUGEPAGE */
 static inline void khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 {
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 2b219acb528e..cb9a9ddb4d86 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -935,6 +935,10 @@  static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
 	 */
 	if (expect_anon && (!(*vmap)->anon_vma || !vma_is_anonymous(*vmap)))
 		return SCAN_PAGE_ANON;
+
+	if (!arch_hugepage_vma_revalidate(vma, address))
+		return SCAN_VMA_CHECK;
+
 	return SCAN_SUCCEED;
 }