diff mbox series

[RFC,09/14] mm/khugepaged: record SCAN_PAGE_COMPOUND when scan_pmd() finds THP

Message ID 20220308213417.1407042-10-zokeefe@google.com (mailing list archive)
State New
Headers show
Series mm: userspace hugepage collapse | expand

Commit Message

Zach O'Keefe March 8, 2022, 9:34 p.m. UTC
When scanning an anon pmd to see if it's eligible for collapse, return
SCAN_PAGE_COMPOUND if the pmd already maps a thp. This is consistent
with handling when scanning file-backed memory.

Signed-off-by: Zach O'Keefe <zokeefe@google.com>
---
 mm/khugepaged.c | 41 +++++++++++++++++++++++++++++++++++------
 1 file changed, 35 insertions(+), 6 deletions(-)

Comments

Yang Shi March 9, 2022, 11:40 p.m. UTC | #1
On Tue, Mar 8, 2022 at 1:35 PM Zach O'Keefe <zokeefe@google.com> wrote:
>
> When scanning an anon pmd to see if it's eligible for collapse, return
> SCAN_PAGE_COMPOUND if the pmd already maps a thp. This is consistent
> with handling when scanning file-backed memory.

I'm not quite keen that we have to keep anon consistent with file for
this case. SCAN_PAGE_COMPOUND typically means the page is compound
page, but PTE mapped.

And even SCAN_PMD_NULL is not returned every time when mm_find_pmd()
returns NULL. In addition, SCAN_PMD_NULL seems ambiguous to me. The
khugepaged actually sees non-present (migration) entry or trans huge
entry, so may rename it to SCAN_PMD_NOT_SUITABLE?

>
> Signed-off-by: Zach O'Keefe <zokeefe@google.com>
> ---
>  mm/khugepaged.c | 41 +++++++++++++++++++++++++++++++++++------
>  1 file changed, 35 insertions(+), 6 deletions(-)
>
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index ecbd3fc41c80..403578161a3b 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -1011,6 +1011,38 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
>                         vm_flags_ignore, vmap);
>  }
>
> +/*
> + * If returning NULL (meaning the pmd isn't mapped, isn't present, or thp),
> + * write the reason to *result.
> + */
> +static pmd_t *find_pmd_or_thp_or_none(struct mm_struct *mm,
> +                                     unsigned long address,
> +                                     int *result)
> +{
> +       pmd_t *pmd = mm_find_pmd_raw(mm, address);
> +       pmd_t pmde;
> +
> +       if (!pmd) {
> +               *result = SCAN_PMD_NULL;
> +               return NULL;
> +       }
> +
> +       pmde = pmd_read_atomic(pmd);
> +
> +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> +       /* See comments in pmd_none_or_trans_huge_or_clear_bad() */
> +       barrier();
> +#endif
> +       if (!pmd_present(pmde) || !pmd_none(pmde)) {
> +               *result = SCAN_PMD_NULL;
> +               return NULL;
> +       } else if (pmd_trans_huge(pmde)) {
> +               *result = SCAN_PAGE_COMPOUND;
> +               return NULL;
> +       }
> +       return pmd;
> +}
> +
>  /*
>   * Bring missing pages in from swap, to complete THP collapse.
>   * Only done if khugepaged_scan_pmd believes it is worthwhile.
> @@ -1212,9 +1244,8 @@ static void collapse_huge_page(struct mm_struct *mm,
>                 goto out_nolock;
>         }
>
> -       pmd = mm_find_pmd(mm, address);
> +       pmd = find_pmd_or_thp_or_none(mm, address, &result);
>         if (!pmd) {
> -               result = SCAN_PMD_NULL;
>                 mmap_read_unlock(mm);
>                 goto out_nolock;
>         }
> @@ -1287,11 +1318,9 @@ static void scan_pmd(struct mm_struct *mm,
>         mmap_assert_locked(mm);
>         VM_BUG_ON(address & ~HPAGE_PMD_MASK);
>
> -       pmd = mm_find_pmd(mm, address);
> -       if (!pmd) {
> -               scan_result->result = SCAN_PMD_NULL;
> +       pmd = find_pmd_or_thp_or_none(mm, address, &scan_result->result);
> +       if (!pmd)
>                 goto out;
> -       }
>
>         memset(cc->node_load, 0, sizeof(cc->node_load));
>         pte = pte_offset_map_lock(mm, pmd, address, &ptl);
> --
> 2.35.1.616.g0bdcbb4464-goog
>
Zach O'Keefe March 10, 2022, 12:46 a.m. UTC | #2
On Wed, Mar 9, 2022 at 3:40 PM Yang Shi <shy828301@gmail.com> wrote:
>
> On Tue, Mar 8, 2022 at 1:35 PM Zach O'Keefe <zokeefe@google.com> wrote:
> >
> > When scanning an anon pmd to see if it's eligible for collapse, return
> > SCAN_PAGE_COMPOUND if the pmd already maps a thp. This is consistent
> > with handling when scanning file-backed memory.
>
> I'm not quite keen that we have to keep anon consistent with file for
> this case. SCAN_PAGE_COMPOUND typically means the page is compound
> page, but PTE mapped.
>

Good point.

> And even SCAN_PMD_NULL is not returned every time when mm_find_pmd()
> returns NULL. In addition, SCAN_PMD_NULL seems ambiguous to me. The
> khugepaged actually sees non-present (migration) entry or trans huge
> entry, so may rename it to SCAN_PMD_NOT_SUITABLE?
>

Sorry, I'm not sure I understand the suggestion here. What this patch
would like to do, is to identify what pmds map thps. This will be
important later, since if a user requests a collapse of an
already-collapsed region, we want to return successfully (even if no
work to be done).

Maybe there should be a SCAN_PMD_MAPPED used here instead? Just to not
overload SCAN_PAGE_COMPOUND?

Though, note that when MADV_COLLAPSE supports file-backed memory, a
similar check for pmd-mapping will need to be made on the file-side of
things.

> >
> > Signed-off-by: Zach O'Keefe <zokeefe@google.com>
> > ---
> >  mm/khugepaged.c | 41 +++++++++++++++++++++++++++++++++++------
> >  1 file changed, 35 insertions(+), 6 deletions(-)
> >
> > diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> > index ecbd3fc41c80..403578161a3b 100644
> > --- a/mm/khugepaged.c
> > +++ b/mm/khugepaged.c
> > @@ -1011,6 +1011,38 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
> >                         vm_flags_ignore, vmap);
> >  }
> >
> > +/*
> > + * If returning NULL (meaning the pmd isn't mapped, isn't present, or thp),
> > + * write the reason to *result.
> > + */
> > +static pmd_t *find_pmd_or_thp_or_none(struct mm_struct *mm,
> > +                                     unsigned long address,
> > +                                     int *result)
> > +{
> > +       pmd_t *pmd = mm_find_pmd_raw(mm, address);
> > +       pmd_t pmde;
> > +
> > +       if (!pmd) {
> > +               *result = SCAN_PMD_NULL;
> > +               return NULL;
> > +       }
> > +
> > +       pmde = pmd_read_atomic(pmd);
> > +
> > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> > +       /* See comments in pmd_none_or_trans_huge_or_clear_bad() */
> > +       barrier();
> > +#endif
> > +       if (!pmd_present(pmde) || !pmd_none(pmde)) {
> > +               *result = SCAN_PMD_NULL;
> > +               return NULL;
> > +       } else if (pmd_trans_huge(pmde)) {
> > +               *result = SCAN_PAGE_COMPOUND;
> > +               return NULL;
> > +       }
> > +       return pmd;
> > +}
> > +
> >  /*
> >   * Bring missing pages in from swap, to complete THP collapse.
> >   * Only done if khugepaged_scan_pmd believes it is worthwhile.
> > @@ -1212,9 +1244,8 @@ static void collapse_huge_page(struct mm_struct *mm,
> >                 goto out_nolock;
> >         }
> >
> > -       pmd = mm_find_pmd(mm, address);
> > +       pmd = find_pmd_or_thp_or_none(mm, address, &result);
> >         if (!pmd) {
> > -               result = SCAN_PMD_NULL;
> >                 mmap_read_unlock(mm);
> >                 goto out_nolock;
> >         }
> > @@ -1287,11 +1318,9 @@ static void scan_pmd(struct mm_struct *mm,
> >         mmap_assert_locked(mm);
> >         VM_BUG_ON(address & ~HPAGE_PMD_MASK);
> >
> > -       pmd = mm_find_pmd(mm, address);
> > -       if (!pmd) {
> > -               scan_result->result = SCAN_PMD_NULL;
> > +       pmd = find_pmd_or_thp_or_none(mm, address, &scan_result->result);
> > +       if (!pmd)
> >                 goto out;
> > -       }
> >
> >         memset(cc->node_load, 0, sizeof(cc->node_load));
> >         pte = pte_offset_map_lock(mm, pmd, address, &ptl);
> > --
> > 2.35.1.616.g0bdcbb4464-goog
> >
Yang Shi March 10, 2022, 2:05 a.m. UTC | #3
On Wed, Mar 9, 2022 at 4:46 PM Zach O'Keefe <zokeefe@google.com> wrote:
>
> On Wed, Mar 9, 2022 at 3:40 PM Yang Shi <shy828301@gmail.com> wrote:
> >
> > On Tue, Mar 8, 2022 at 1:35 PM Zach O'Keefe <zokeefe@google.com> wrote:
> > >
> > > When scanning an anon pmd to see if it's eligible for collapse, return
> > > SCAN_PAGE_COMPOUND if the pmd already maps a thp. This is consistent
> > > with handling when scanning file-backed memory.
> >
> > I'm not quite keen that we have to keep anon consistent with file for
> > this case. SCAN_PAGE_COMPOUND typically means the page is compound
> > page, but PTE mapped.
> >
>
> Good point.
>
> > And even SCAN_PMD_NULL is not returned every time when mm_find_pmd()
> > returns NULL. In addition, SCAN_PMD_NULL seems ambiguous to me. The
> > khugepaged actually sees non-present (migration) entry or trans huge
> > entry, so may rename it to SCAN_PMD_NOT_SUITABLE?
> >
>
> Sorry, I'm not sure I understand the suggestion here. What this patch
> would like to do, is to identify what pmds map thps. This will be
> important later, since if a user requests a collapse of an
> already-collapsed region, we want to return successfully (even if no
> work to be done).

Makes sense.

>
> Maybe there should be a SCAN_PMD_MAPPED used here instead? Just to not
> overload SCAN_PAGE_COMPOUND?

I see. SCAN_PMD_MAPPED sounds more self-explained and suitable IMHO.



>
> Though, note that when MADV_COLLAPSE supports file-backed memory, a
> similar check for pmd-mapping will need to be made on the file-side of
> things.
>
> > >
> > > Signed-off-by: Zach O'Keefe <zokeefe@google.com>
> > > ---
> > >  mm/khugepaged.c | 41 +++++++++++++++++++++++++++++++++++------
> > >  1 file changed, 35 insertions(+), 6 deletions(-)
> > >
> > > diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> > > index ecbd3fc41c80..403578161a3b 100644
> > > --- a/mm/khugepaged.c
> > > +++ b/mm/khugepaged.c
> > > @@ -1011,6 +1011,38 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
> > >                         vm_flags_ignore, vmap);
> > >  }
> > >
> > > +/*
> > > + * If returning NULL (meaning the pmd isn't mapped, isn't present, or thp),
> > > + * write the reason to *result.
> > > + */
> > > +static pmd_t *find_pmd_or_thp_or_none(struct mm_struct *mm,
> > > +                                     unsigned long address,
> > > +                                     int *result)
> > > +{
> > > +       pmd_t *pmd = mm_find_pmd_raw(mm, address);
> > > +       pmd_t pmde;
> > > +
> > > +       if (!pmd) {
> > > +               *result = SCAN_PMD_NULL;
> > > +               return NULL;
> > > +       }
> > > +
> > > +       pmde = pmd_read_atomic(pmd);
> > > +
> > > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> > > +       /* See comments in pmd_none_or_trans_huge_or_clear_bad() */
> > > +       barrier();
> > > +#endif
> > > +       if (!pmd_present(pmde) || !pmd_none(pmde)) {
> > > +               *result = SCAN_PMD_NULL;
> > > +               return NULL;
> > > +       } else if (pmd_trans_huge(pmde)) {
> > > +               *result = SCAN_PAGE_COMPOUND;
> > > +               return NULL;
> > > +       }
> > > +       return pmd;
> > > +}
> > > +
> > >  /*
> > >   * Bring missing pages in from swap, to complete THP collapse.
> > >   * Only done if khugepaged_scan_pmd believes it is worthwhile.
> > > @@ -1212,9 +1244,8 @@ static void collapse_huge_page(struct mm_struct *mm,
> > >                 goto out_nolock;
> > >         }
> > >
> > > -       pmd = mm_find_pmd(mm, address);
> > > +       pmd = find_pmd_or_thp_or_none(mm, address, &result);
> > >         if (!pmd) {
> > > -               result = SCAN_PMD_NULL;
> > >                 mmap_read_unlock(mm);
> > >                 goto out_nolock;
> > >         }
> > > @@ -1287,11 +1318,9 @@ static void scan_pmd(struct mm_struct *mm,
> > >         mmap_assert_locked(mm);
> > >         VM_BUG_ON(address & ~HPAGE_PMD_MASK);
> > >
> > > -       pmd = mm_find_pmd(mm, address);
> > > -       if (!pmd) {
> > > -               scan_result->result = SCAN_PMD_NULL;
> > > +       pmd = find_pmd_or_thp_or_none(mm, address, &scan_result->result);
> > > +       if (!pmd)
> > >                 goto out;
> > > -       }
> > >
> > >         memset(cc->node_load, 0, sizeof(cc->node_load));
> > >         pte = pte_offset_map_lock(mm, pmd, address, &ptl);
> > > --
> > > 2.35.1.616.g0bdcbb4464-goog
> > >
Zach O'Keefe March 10, 2022, 8:37 a.m. UTC | #4
On Wed, Mar 9, 2022 at 6:06 PM Yang Shi <shy828301@gmail.com> wrote:
>
> On Wed, Mar 9, 2022 at 4:46 PM Zach O'Keefe <zokeefe@google.com> wrote:
> >
> > On Wed, Mar 9, 2022 at 3:40 PM Yang Shi <shy828301@gmail.com> wrote:
> > >
> > > On Tue, Mar 8, 2022 at 1:35 PM Zach O'Keefe <zokeefe@google.com> wrote:
> > > >
> > > > When scanning an anon pmd to see if it's eligible for collapse, return
> > > > SCAN_PAGE_COMPOUND if the pmd already maps a thp. This is consistent
> > > > with handling when scanning file-backed memory.
> > >
> > > I'm not quite keen that we have to keep anon consistent with file for
> > > this case. SCAN_PAGE_COMPOUND typically means the page is compound
> > > page, but PTE mapped.
> > >
> >
> > Good point.
> >
> > > And even SCAN_PMD_NULL is not returned every time when mm_find_pmd()
> > > returns NULL. In addition, SCAN_PMD_NULL seems ambiguous to me. The
> > > khugepaged actually sees non-present (migration) entry or trans huge
> > > entry, so may rename it to SCAN_PMD_NOT_SUITABLE?
> > >
> >
> > Sorry, I'm not sure I understand the suggestion here. What this patch
> > would like to do, is to identify what pmds map thps. This will be
> > important later, since if a user requests a collapse of an
> > already-collapsed region, we want to return successfully (even if no
> > work to be done).
>
> Makes sense.
>
> >
> > Maybe there should be a SCAN_PMD_MAPPED used here instead? Just to not
> > overload SCAN_PAGE_COMPOUND?
>
> I see. SCAN_PMD_MAPPED sounds more self-explained and suitable IMHO.
>
>

Makes sense not to conflate the two. Thanks for the feedback!

>
> >
> > Though, note that when MADV_COLLAPSE supports file-backed memory, a
> > similar check for pmd-mapping will need to be made on the file-side of
> > things.
> >
> > > >
> > > > Signed-off-by: Zach O'Keefe <zokeefe@google.com>
> > > > ---
> > > >  mm/khugepaged.c | 41 +++++++++++++++++++++++++++++++++++------
> > > >  1 file changed, 35 insertions(+), 6 deletions(-)
> > > >
> > > > diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> > > > index ecbd3fc41c80..403578161a3b 100644
> > > > --- a/mm/khugepaged.c
> > > > +++ b/mm/khugepaged.c
> > > > @@ -1011,6 +1011,38 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
> > > >                         vm_flags_ignore, vmap);
> > > >  }
> > > >
> > > > +/*
> > > > + * If returning NULL (meaning the pmd isn't mapped, isn't present, or thp),
> > > > + * write the reason to *result.
> > > > + */
> > > > +static pmd_t *find_pmd_or_thp_or_none(struct mm_struct *mm,
> > > > +                                     unsigned long address,
> > > > +                                     int *result)
> > > > +{
> > > > +       pmd_t *pmd = mm_find_pmd_raw(mm, address);
> > > > +       pmd_t pmde;
> > > > +
> > > > +       if (!pmd) {
> > > > +               *result = SCAN_PMD_NULL;
> > > > +               return NULL;
> > > > +       }
> > > > +
> > > > +       pmde = pmd_read_atomic(pmd);
> > > > +
> > > > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE
> > > > +       /* See comments in pmd_none_or_trans_huge_or_clear_bad() */
> > > > +       barrier();
> > > > +#endif
> > > > +       if (!pmd_present(pmde) || !pmd_none(pmde)) {
> > > > +               *result = SCAN_PMD_NULL;
> > > > +               return NULL;
> > > > +       } else if (pmd_trans_huge(pmde)) {
> > > > +               *result = SCAN_PAGE_COMPOUND;
> > > > +               return NULL;
> > > > +       }
> > > > +       return pmd;
> > > > +}
> > > > +
> > > >  /*
> > > >   * Bring missing pages in from swap, to complete THP collapse.
> > > >   * Only done if khugepaged_scan_pmd believes it is worthwhile.
> > > > @@ -1212,9 +1244,8 @@ static void collapse_huge_page(struct mm_struct *mm,
> > > >                 goto out_nolock;
> > > >         }
> > > >
> > > > -       pmd = mm_find_pmd(mm, address);
> > > > +       pmd = find_pmd_or_thp_or_none(mm, address, &result);
> > > >         if (!pmd) {
> > > > -               result = SCAN_PMD_NULL;
> > > >                 mmap_read_unlock(mm);
> > > >                 goto out_nolock;
> > > >         }
> > > > @@ -1287,11 +1318,9 @@ static void scan_pmd(struct mm_struct *mm,
> > > >         mmap_assert_locked(mm);
> > > >         VM_BUG_ON(address & ~HPAGE_PMD_MASK);
> > > >
> > > > -       pmd = mm_find_pmd(mm, address);
> > > > -       if (!pmd) {
> > > > -               scan_result->result = SCAN_PMD_NULL;
> > > > +       pmd = find_pmd_or_thp_or_none(mm, address, &scan_result->result);
> > > > +       if (!pmd)
> > > >                 goto out;
> > > > -       }
> > > >
> > > >         memset(cc->node_load, 0, sizeof(cc->node_load));
> > > >         pte = pte_offset_map_lock(mm, pmd, address, &ptl);
> > > > --
> > > > 2.35.1.616.g0bdcbb4464-goog
> > > >
diff mbox series

Patch

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index ecbd3fc41c80..403578161a3b 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1011,6 +1011,38 @@  static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address,
 			vm_flags_ignore, vmap);
 }
 
+/*
+ * If returning NULL (meaning the pmd isn't mapped, isn't present, or thp),
+ * write the reason to *result.
+ */
+static pmd_t *find_pmd_or_thp_or_none(struct mm_struct *mm,
+				      unsigned long address,
+				      int *result)
+{
+	pmd_t *pmd = mm_find_pmd_raw(mm, address);
+	pmd_t pmde;
+
+	if (!pmd) {
+		*result = SCAN_PMD_NULL;
+		return NULL;
+	}
+
+	pmde = pmd_read_atomic(pmd);
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	/* See comments in pmd_none_or_trans_huge_or_clear_bad() */
+	barrier();
+#endif
+	if (!pmd_present(pmde) || !pmd_none(pmde)) {
+		*result = SCAN_PMD_NULL;
+		return NULL;
+	} else if (pmd_trans_huge(pmde)) {
+		*result = SCAN_PAGE_COMPOUND;
+		return NULL;
+	}
+	return pmd;
+}
+
 /*
  * Bring missing pages in from swap, to complete THP collapse.
  * Only done if khugepaged_scan_pmd believes it is worthwhile.
@@ -1212,9 +1244,8 @@  static void collapse_huge_page(struct mm_struct *mm,
 		goto out_nolock;
 	}
 
-	pmd = mm_find_pmd(mm, address);
+	pmd = find_pmd_or_thp_or_none(mm, address, &result);
 	if (!pmd) {
-		result = SCAN_PMD_NULL;
 		mmap_read_unlock(mm);
 		goto out_nolock;
 	}
@@ -1287,11 +1318,9 @@  static void scan_pmd(struct mm_struct *mm,
 	mmap_assert_locked(mm);
 	VM_BUG_ON(address & ~HPAGE_PMD_MASK);
 
-	pmd = mm_find_pmd(mm, address);
-	if (!pmd) {
-		scan_result->result = SCAN_PMD_NULL;
+	pmd = find_pmd_or_thp_or_none(mm, address, &scan_result->result);
+	if (!pmd)
 		goto out;
-	}
 
 	memset(cc->node_load, 0, sizeof(cc->node_load));
 	pte = pte_offset_map_lock(mm, pmd, address, &ptl);