diff mbox series

[v6,05/15] mm/khugepaged: make allocation semantics context-specific

Message ID 20220604004004.954674-6-zokeefe@google.com (mailing list archive)
State New
Headers show
Series mm: userspace hugepage collapse | expand

Commit Message

Zach O'Keefe June 4, 2022, 12:39 a.m. UTC
Add a gfp_t flags member to struct collapse_control that allows contexts
to specify their own allocation semantics.  This decouples the
allocation semantics from
/sys/kernel/mm/transparent_hugepage/khugepaged/defrag.

khugepaged updates this member for every hugepage processed, since the
sysfs setting might change at any time.

Signed-off-by: Zach O'Keefe <zokeefe@google.com>
---
 mm/khugepaged.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

Comments

Yang Shi June 6, 2022, 8:58 p.m. UTC | #1
On Fri, Jun 3, 2022 at 5:40 PM Zach O'Keefe <zokeefe@google.com> wrote:
>
> Add a gfp_t flags member to struct collapse_control that allows contexts
> to specify their own allocation semantics.  This decouples the
> allocation semantics from
> /sys/kernel/mm/transparent_hugepage/khugepaged/defrag.
>
> khugepaged updates this member for every hugepage processed, since the
> sysfs setting might change at any time.
>
> Signed-off-by: Zach O'Keefe <zokeefe@google.com>
> ---
>  mm/khugepaged.c | 21 +++++++++++++--------
>  1 file changed, 13 insertions(+), 8 deletions(-)
>
> diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> index 38488d114073..ba722347bebd 100644
> --- a/mm/khugepaged.c
> +++ b/mm/khugepaged.c
> @@ -92,6 +92,9 @@ struct collapse_control {
>
>         /* Last target selected in khugepaged_find_target_node() */
>         int last_target_node;
> +
> +       /* gfp used for allocation and memcg charging */
> +       gfp_t gfp;
>  };
>
>  /**
> @@ -994,15 +997,14 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
>         return true;
>  }
>
> -static int alloc_charge_hpage(struct page **hpage, struct mm_struct *mm,
> +static int alloc_charge_hpage(struct mm_struct *mm, struct page **hpage,

Why did you have to reverse the order of mm and hpage? It seems
pointless and you could save a couple of changed lines.

>                               struct collapse_control *cc)
>  {
> -       gfp_t gfp = alloc_hugepage_khugepaged_gfpmask() | __GFP_THISNODE;
>         int node = khugepaged_find_target_node(cc);
>
> -       if (!khugepaged_alloc_page(hpage, gfp, node))
> +       if (!khugepaged_alloc_page(hpage, cc->gfp, node))
>                 return SCAN_ALLOC_HUGE_PAGE_FAIL;
> -       if (unlikely(mem_cgroup_charge(page_folio(*hpage), mm, gfp)))
> +       if (unlikely(mem_cgroup_charge(page_folio(*hpage), mm, cc->gfp)))
>                 return SCAN_CGROUP_CHARGE_FAIL;
>         count_memcg_page_event(*hpage, THP_COLLAPSE_ALLOC);
>         return SCAN_SUCCEED;
> @@ -1032,7 +1034,7 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
>          */
>         mmap_read_unlock(mm);
>
> -       result = alloc_charge_hpage(hpage, mm, cc);
> +       result = alloc_charge_hpage(mm, hpage, cc);
>         if (result != SCAN_SUCCEED)
>                 goto out_nolock;
>
> @@ -1613,7 +1615,7 @@ static void collapse_file(struct mm_struct *mm, struct file *file,
>         VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem);
>         VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
>
> -       result = alloc_charge_hpage(hpage, mm, cc);
> +       result = alloc_charge_hpage(mm, hpage, cc);
>         if (result != SCAN_SUCCEED)
>                 goto out;
>
> @@ -2037,8 +2039,7 @@ static void khugepaged_scan_file(struct mm_struct *mm, struct file *file,
>  }
>  #else
>  static void khugepaged_scan_file(struct mm_struct *mm, struct file *file,
> -                                pgoff_t start, struct page **hpage,
> -                                struct collapse_control *cc)
> +                                pgoff_t start, struct collapse_control *cc)

Why was the !CONFIG_SHMEM version definition changed, but CONFIG_SHMEM
version was not?

>  {
>         BUILD_BUG();
>  }
> @@ -2121,6 +2122,9 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
>                         if (unlikely(khugepaged_test_exit(mm)))
>                                 goto breakouterloop;
>
> +                       /* reset gfp flags since sysfs settings might change */
> +                       cc->gfp = alloc_hugepage_khugepaged_gfpmask() |
> +                                       __GFP_THISNODE;
>                         VM_BUG_ON(khugepaged_scan.address < hstart ||
>                                   khugepaged_scan.address + HPAGE_PMD_SIZE >
>                                   hend);
> @@ -2255,6 +2259,7 @@ static int khugepaged(void *none)
>         struct mm_slot *mm_slot;
>         struct collapse_control cc = {
>                 .last_target_node = NUMA_NO_NODE,
> +               /* .gfp set later  */

Seems pointless to me.

>         };
>
>         set_freezable();
> --
> 2.36.1.255.ge46751e96f-goog
>
Zach O'Keefe June 7, 2022, 7:56 p.m. UTC | #2
On Mon, Jun 6, 2022 at 1:59 PM Yang Shi <shy828301@gmail.com> wrote:
>
> On Fri, Jun 3, 2022 at 5:40 PM Zach O'Keefe <zokeefe@google.com> wrote:
> >
> > Add a gfp_t flags member to struct collapse_control that allows contexts
> > to specify their own allocation semantics.  This decouples the
> > allocation semantics from
> > /sys/kernel/mm/transparent_hugepage/khugepaged/defrag.
> >
> > khugepaged updates this member for every hugepage processed, since the
> > sysfs setting might change at any time.
> >
> > Signed-off-by: Zach O'Keefe <zokeefe@google.com>
> > ---
> >  mm/khugepaged.c | 21 +++++++++++++--------
> >  1 file changed, 13 insertions(+), 8 deletions(-)
> >
> > diff --git a/mm/khugepaged.c b/mm/khugepaged.c
> > index 38488d114073..ba722347bebd 100644
> > --- a/mm/khugepaged.c
> > +++ b/mm/khugepaged.c
> > @@ -92,6 +92,9 @@ struct collapse_control {
> >
> >         /* Last target selected in khugepaged_find_target_node() */
> >         int last_target_node;
> > +
> > +       /* gfp used for allocation and memcg charging */
> > +       gfp_t gfp;
> >  };
> >
> >  /**
> > @@ -994,15 +997,14 @@ static bool __collapse_huge_page_swapin(struct mm_struct *mm,
> >         return true;
> >  }
> >
> > -static int alloc_charge_hpage(struct page **hpage, struct mm_struct *mm,
> > +static int alloc_charge_hpage(struct mm_struct *mm, struct page **hpage,
>
> Why did you have to reverse the order of mm and hpage? It seems
> pointless and you could save a couple of changed lines.

At some point many versions ago I must have thought it was cleaner to
be consistent with struct *mm as 1st arg, but agree with unnecessary
change and have removed.

> >                               struct collapse_control *cc)
> >  {
> > -       gfp_t gfp = alloc_hugepage_khugepaged_gfpmask() | __GFP_THISNODE;
> >         int node = khugepaged_find_target_node(cc);
> >
> > -       if (!khugepaged_alloc_page(hpage, gfp, node))
> > +       if (!khugepaged_alloc_page(hpage, cc->gfp, node))
> >                 return SCAN_ALLOC_HUGE_PAGE_FAIL;
> > -       if (unlikely(mem_cgroup_charge(page_folio(*hpage), mm, gfp)))
> > +       if (unlikely(mem_cgroup_charge(page_folio(*hpage), mm, cc->gfp)))
> >                 return SCAN_CGROUP_CHARGE_FAIL;
> >         count_memcg_page_event(*hpage, THP_COLLAPSE_ALLOC);
> >         return SCAN_SUCCEED;
> > @@ -1032,7 +1034,7 @@ static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
> >          */
> >         mmap_read_unlock(mm);
> >
> > -       result = alloc_charge_hpage(hpage, mm, cc);
> > +       result = alloc_charge_hpage(mm, hpage, cc);
> >         if (result != SCAN_SUCCEED)
> >                 goto out_nolock;
> >
> > @@ -1613,7 +1615,7 @@ static void collapse_file(struct mm_struct *mm, struct file *file,
> >         VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem);
> >         VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
> >
> > -       result = alloc_charge_hpage(hpage, mm, cc);
> > +       result = alloc_charge_hpage(mm, hpage, cc);
> >         if (result != SCAN_SUCCEED)
> >                 goto out;
> >
> > @@ -2037,8 +2039,7 @@ static void khugepaged_scan_file(struct mm_struct *mm, struct file *file,
> >  }
> >  #else
> >  static void khugepaged_scan_file(struct mm_struct *mm, struct file *file,
> > -                                pgoff_t start, struct page **hpage,
> > -                                struct collapse_control *cc)
> > +                                pgoff_t start, struct collapse_control *cc)
>
> Why was the !CONFIG_SHMEM version definition changed, but CONFIG_SHMEM
> version was not?

Definitely an error - sorry about that. Surprised I didn't get caught
by this earlier - must be because the signatures are eventually made
equal when struct **hpage is dropped. Have fixed it.

> >  {
> >         BUILD_BUG();
> >  }
> > @@ -2121,6 +2122,9 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
> >                         if (unlikely(khugepaged_test_exit(mm)))
> >                                 goto breakouterloop;
> >
> > +                       /* reset gfp flags since sysfs settings might change */
> > +                       cc->gfp = alloc_hugepage_khugepaged_gfpmask() |
> > +                                       __GFP_THISNODE;
> >                         VM_BUG_ON(khugepaged_scan.address < hstart ||
> >                                   khugepaged_scan.address + HPAGE_PMD_SIZE >
> >                                   hend);
> > @@ -2255,6 +2259,7 @@ static int khugepaged(void *none)
> >         struct mm_slot *mm_slot;
> >         struct collapse_control cc = {
> >                 .last_target_node = NUMA_NO_NODE,
> > +               /* .gfp set later  */
>
> Seems pointless to me.

Ack. Removed.

> >         };
> >
> >         set_freezable();
> > --
> > 2.36.1.255.ge46751e96f-goog
> >
diff mbox series

Patch

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 38488d114073..ba722347bebd 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -92,6 +92,9 @@  struct collapse_control {
 
 	/* Last target selected in khugepaged_find_target_node() */
 	int last_target_node;
+
+	/* gfp used for allocation and memcg charging */
+	gfp_t gfp;
 };
 
 /**
@@ -994,15 +997,14 @@  static bool __collapse_huge_page_swapin(struct mm_struct *mm,
 	return true;
 }
 
-static int alloc_charge_hpage(struct page **hpage, struct mm_struct *mm,
+static int alloc_charge_hpage(struct mm_struct *mm, struct page **hpage,
 			      struct collapse_control *cc)
 {
-	gfp_t gfp = alloc_hugepage_khugepaged_gfpmask() | __GFP_THISNODE;
 	int node = khugepaged_find_target_node(cc);
 
-	if (!khugepaged_alloc_page(hpage, gfp, node))
+	if (!khugepaged_alloc_page(hpage, cc->gfp, node))
 		return SCAN_ALLOC_HUGE_PAGE_FAIL;
-	if (unlikely(mem_cgroup_charge(page_folio(*hpage), mm, gfp)))
+	if (unlikely(mem_cgroup_charge(page_folio(*hpage), mm, cc->gfp)))
 		return SCAN_CGROUP_CHARGE_FAIL;
 	count_memcg_page_event(*hpage, THP_COLLAPSE_ALLOC);
 	return SCAN_SUCCEED;
@@ -1032,7 +1034,7 @@  static void collapse_huge_page(struct mm_struct *mm, unsigned long address,
 	 */
 	mmap_read_unlock(mm);
 
-	result = alloc_charge_hpage(hpage, mm, cc);
+	result = alloc_charge_hpage(mm, hpage, cc);
 	if (result != SCAN_SUCCEED)
 		goto out_nolock;
 
@@ -1613,7 +1615,7 @@  static void collapse_file(struct mm_struct *mm, struct file *file,
 	VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem);
 	VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
 
-	result = alloc_charge_hpage(hpage, mm, cc);
+	result = alloc_charge_hpage(mm, hpage, cc);
 	if (result != SCAN_SUCCEED)
 		goto out;
 
@@ -2037,8 +2039,7 @@  static void khugepaged_scan_file(struct mm_struct *mm, struct file *file,
 }
 #else
 static void khugepaged_scan_file(struct mm_struct *mm, struct file *file,
-				 pgoff_t start, struct page **hpage,
-				 struct collapse_control *cc)
+				 pgoff_t start, struct collapse_control *cc)
 {
 	BUILD_BUG();
 }
@@ -2121,6 +2122,9 @@  static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
 			if (unlikely(khugepaged_test_exit(mm)))
 				goto breakouterloop;
 
+			/* reset gfp flags since sysfs settings might change */
+			cc->gfp = alloc_hugepage_khugepaged_gfpmask() |
+					__GFP_THISNODE;
 			VM_BUG_ON(khugepaged_scan.address < hstart ||
 				  khugepaged_scan.address + HPAGE_PMD_SIZE >
 				  hend);
@@ -2255,6 +2259,7 @@  static int khugepaged(void *none)
 	struct mm_slot *mm_slot;
 	struct collapse_control cc = {
 		.last_target_node = NUMA_NO_NODE,
+		/* .gfp set later  */
 	};
 
 	set_freezable();