[2/4] mm/gup: restrict CMA region by using allocation scope API
diff mbox series

Message ID 1594789529-6206-2-git-send-email-iamjoonsoo.kim@lge.com
State New
Headers show
Series
  • [1/4] mm/page_alloc: fix non cma alloc context
Related show

Commit Message

Joonsoo Kim July 15, 2020, 5:05 a.m. UTC
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>

We have well defined scope API to exclude CMA region.
Use it rather than manipulating gfp_mask manually. With this change,
we can now use __GFP_MOVABLE for gfp_mask and the ZONE_MOVABLE is also
searched by page allocator. For hugetlb, gfp_mask is redefined since
it has a regular allocation mask filter for migration target.

Note that this can be considered as a fix for the commit 9a4e9f3b2d73
("mm: update get_user_pages_longterm to migrate pages allocated from
CMA region"). However, "Fixes" tag isn't added here since it is just
suboptimal but it doesn't cause any problem.

Suggested-by: Michal Hocko <mhocko@suse.com>
Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
---
 include/linux/hugetlb.h |  2 ++
 mm/gup.c                | 17 ++++++++---------
 2 files changed, 10 insertions(+), 9 deletions(-)

Comments

Michal Hocko July 15, 2020, 8:24 a.m. UTC | #1
On Wed 15-07-20 14:05:27, Joonsoo Kim wrote:
> From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
> 
> We have well defined scope API to exclude CMA region.
> Use it rather than manipulating gfp_mask manually. With this change,
> we can now use __GFP_MOVABLE for gfp_mask and the ZONE_MOVABLE is also
> searched by page allocator. For hugetlb, gfp_mask is redefined since
> it has a regular allocation mask filter for migration target.
> 
> Note that this can be considered as a fix for the commit 9a4e9f3b2d73
> ("mm: update get_user_pages_longterm to migrate pages allocated from
> CMA region"). However, "Fixes" tag isn't added here since it is just
> suboptimal but it doesn't cause any problem.

But it is breaking the contract that the longterm pins never end up in a
cma managed memory. So I think Fixes tag is really due. I am not sure
about stable backport. If the patch was the trivial move of
memalloc_nocma_restore then it would be probably worth it because it is
trivial to review and backport. I suspect that longterm pins in CMA
regions would cause hard to debug issues where CMA memory will not be
available. But I am not really sure this is a real problem considering
how many long term pin users we have and I have also no idea whether
those are usually used along with CMA users.

Anyway I think it would really be much better to isolate the
memalloc_nocma_restore and have it first in the series. The reword of
the __GFP_MOVABLE functionality is orthogonal.

Btw __GFP_NOWARN change is not documented.

> Suggested-by: Michal Hocko <mhocko@suse.com>
> Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
> ---
>  include/linux/hugetlb.h |  2 ++
>  mm/gup.c                | 17 ++++++++---------
>  2 files changed, 10 insertions(+), 9 deletions(-)
> 
> diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
> index 6b9508d..2660b04 100644
> --- a/include/linux/hugetlb.h
> +++ b/include/linux/hugetlb.h
> @@ -708,6 +708,8 @@ static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask)
>  	/* Some callers might want to enfoce node */
>  	modified_mask |= (gfp_mask & __GFP_THISNODE);
>  
> +	modified_mask |= (gfp_mask & __GFP_NOWARN);
> +
>  	return modified_mask;
>  }
>  
> diff --git a/mm/gup.c b/mm/gup.c
> index 5daadae..bbd36a1 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -1619,10 +1619,12 @@ static struct page *new_non_cma_page(struct page *page, unsigned long private)
>  	 * Trying to allocate a page for migration. Ignore allocation
>  	 * failure warnings. We don't force __GFP_THISNODE here because
>  	 * this node here is the node where we have CMA reservation and
> -	 * in some case these nodes will have really less non movable
> +	 * in some case these nodes will have really less non CMA
>  	 * allocation memory.
> +	 *
> +	 * Note that CMA region is prohibited by allocation scope.
>  	 */
> -	gfp_t gfp_mask = GFP_USER | __GFP_NOWARN;
> +	gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_NOWARN;
>  
>  	if (PageHighMem(page))
>  		gfp_mask |= __GFP_HIGHMEM;
> @@ -1630,6 +1632,8 @@ static struct page *new_non_cma_page(struct page *page, unsigned long private)
>  #ifdef CONFIG_HUGETLB_PAGE
>  	if (PageHuge(page)) {
>  		struct hstate *h = page_hstate(page);
> +
> +		gfp_mask = htlb_modify_alloc_mask(h, gfp_mask);
>  		/*
>  		 * We don't want to dequeue from the pool because pool pages will
>  		 * mostly be from the CMA region.
> @@ -1644,11 +1648,6 @@ static struct page *new_non_cma_page(struct page *page, unsigned long private)
>  		 */
>  		gfp_t thp_gfpmask = GFP_TRANSHUGE | __GFP_NOWARN;
>  
> -		/*
> -		 * Remove the movable mask so that we don't allocate from
> -		 * CMA area again.
> -		 */
> -		thp_gfpmask &= ~__GFP_MOVABLE;
>  		thp = __alloc_pages_node(nid, thp_gfpmask, HPAGE_PMD_ORDER);
>  		if (!thp)
>  			return NULL;
> @@ -1794,7 +1793,6 @@ static long __gup_longterm_locked(struct task_struct *tsk,
>  				     vmas_tmp, NULL, gup_flags);
>  
>  	if (gup_flags & FOLL_LONGTERM) {
> -		memalloc_nocma_restore(flags);
>  		if (rc < 0)
>  			goto out;
>  
> @@ -1807,9 +1805,10 @@ static long __gup_longterm_locked(struct task_struct *tsk,
>  
>  		rc = check_and_migrate_cma_pages(tsk, mm, start, rc, pages,
>  						 vmas_tmp, gup_flags);
> +out:
> +		memalloc_nocma_restore(flags);
>  	}
>  
> -out:
>  	if (vmas_tmp != vmas)
>  		kfree(vmas_tmp);
>  	return rc;
> -- 
> 2.7.4
Joonsoo Kim July 17, 2020, 7:46 a.m. UTC | #2
2020년 7월 15일 (수) 오후 5:24, Michal Hocko <mhocko@kernel.org>님이 작성:
>
> On Wed 15-07-20 14:05:27, Joonsoo Kim wrote:
> > From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
> >
> > We have well defined scope API to exclude CMA region.
> > Use it rather than manipulating gfp_mask manually. With this change,
> > we can now use __GFP_MOVABLE for gfp_mask and the ZONE_MOVABLE is also
> > searched by page allocator. For hugetlb, gfp_mask is redefined since
> > it has a regular allocation mask filter for migration target.
> >
> > Note that this can be considered as a fix for the commit 9a4e9f3b2d73
> > ("mm: update get_user_pages_longterm to migrate pages allocated from
> > CMA region"). However, "Fixes" tag isn't added here since it is just
> > suboptimal but it doesn't cause any problem.
>
> But it is breaking the contract that the longterm pins never end up in a
> cma managed memory. So I think Fixes tag is really due. I am not sure
> about stable backport. If the patch was the trivial move of

Previous implementation is correct since longterm pins never end up in a CMA
managed memory with that implementation. It's just a different and suboptimal
implementation to exclude the CMA area. This is why I don't add the "Fixes"
tag on the patch.

> memalloc_nocma_restore then it would be probably worth it because it is
> trivial to review and backport. I suspect that longterm pins in CMA
> regions would cause hard to debug issues where CMA memory will not be
> available. But I am not really sure this is a real problem considering
> how many long term pin users we have and I have also no idea whether
> those are usually used along with CMA users.
>
> Anyway I think it would really be much better to isolate the
> memalloc_nocma_restore and have it first in the series. The reword of

Unfortunately, it's not possible to place it first in the series since
memalloc_nocma_XXX API has a bug that could return the CMA area even if
scope is set up. It is fixed on the first patch in this series.

> the __GFP_MOVABLE functionality is orthogonal.

My logic is that, we basically assume that using __GFP_MOVABLE is possible
in migration target allocation. And, it was necessarily cleared in
order to exclude
the CMA area. Now, we use the other method to exclude the CMA area so
__GFP_MOVABLE is added like usual. If you think that it deserves to be
a separate patch, I will do it on the next version.

> Btw __GFP_NOWARN change is not documented.

Will document it.

Thanks.
Michal Hocko July 17, 2020, 8:26 a.m. UTC | #3
On Fri 17-07-20 16:46:38, Joonsoo Kim wrote:
> 2020년 7월 15일 (수) 오후 5:24, Michal Hocko <mhocko@kernel.org>님이 작성:
> >
> > On Wed 15-07-20 14:05:27, Joonsoo Kim wrote:
> > > From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
> > >
> > > We have well defined scope API to exclude CMA region.
> > > Use it rather than manipulating gfp_mask manually. With this change,
> > > we can now use __GFP_MOVABLE for gfp_mask and the ZONE_MOVABLE is also
> > > searched by page allocator. For hugetlb, gfp_mask is redefined since
> > > it has a regular allocation mask filter for migration target.
> > >
> > > Note that this can be considered as a fix for the commit 9a4e9f3b2d73
> > > ("mm: update get_user_pages_longterm to migrate pages allocated from
> > > CMA region"). However, "Fixes" tag isn't added here since it is just
> > > suboptimal but it doesn't cause any problem.
> >
> > But it is breaking the contract that the longterm pins never end up in a
> > cma managed memory. So I think Fixes tag is really due. I am not sure
> > about stable backport. If the patch was the trivial move of
> 
> Previous implementation is correct since longterm pins never end up in a CMA
> managed memory with that implementation. It's just a different and suboptimal
> implementation to exclude the CMA area. This is why I don't add the "Fixes"A
> tag on the patch.

But the current implementation calls memalloc_nocma_restore too early so
__gu_longterm_locked will migrate pages possibly to CMA ranges as there
is no GFP_MOVABLE restriction in place. Or am I missing something?
Joonsoo Kim July 17, 2020, 9:28 a.m. UTC | #4
2020년 7월 17일 (금) 오후 5:26, Michal Hocko <mhocko@kernel.org>님이 작성:
>
> On Fri 17-07-20 16:46:38, Joonsoo Kim wrote:
> > 2020년 7월 15일 (수) 오후 5:24, Michal Hocko <mhocko@kernel.org>님이 작성:
> > >
> > > On Wed 15-07-20 14:05:27, Joonsoo Kim wrote:
> > > > From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
> > > >
> > > > We have well defined scope API to exclude CMA region.
> > > > Use it rather than manipulating gfp_mask manually. With this change,
> > > > we can now use __GFP_MOVABLE for gfp_mask and the ZONE_MOVABLE is also
> > > > searched by page allocator. For hugetlb, gfp_mask is redefined since
> > > > it has a regular allocation mask filter for migration target.
> > > >
> > > > Note that this can be considered as a fix for the commit 9a4e9f3b2d73
> > > > ("mm: update get_user_pages_longterm to migrate pages allocated from
> > > > CMA region"). However, "Fixes" tag isn't added here since it is just
> > > > suboptimal but it doesn't cause any problem.
> > >
> > > But it is breaking the contract that the longterm pins never end up in a
> > > cma managed memory. So I think Fixes tag is really due. I am not sure
> > > about stable backport. If the patch was the trivial move of
> >
> > Previous implementation is correct since longterm pins never end up in a CMA
> > managed memory with that implementation. It's just a different and suboptimal
> > implementation to exclude the CMA area. This is why I don't add the "Fixes"A
> > tag on the patch.
>
> But the current implementation calls memalloc_nocma_restore too early so
> __gu_longterm_locked will migrate pages possibly to CMA ranges as there
> is no GFP_MOVABLE restriction in place. Or am I missing something?

IIUC, calling memalloc_nocma_restore() too early doesn't cause the
actual problem.

Final check is done by check_and_migrate_cma_pages() which is outside of
scope API. If we find a CMA page in between the gup range here, the page is
migrated to the migration target page and this target page is allocated by
new_non_cma_page().

new_non_cma_page() try to allocate the page without __GFP_MOVABLE so
returned page will not be CMA area memory. Therefore, even if
memalloc_nocma_restore() is called early, there is no actual problem.

Thanks.
Michal Hocko July 17, 2020, 11:08 a.m. UTC | #5
On Fri 17-07-20 18:28:16, Joonsoo Kim wrote:
> 2020년 7월 17일 (금) 오후 5:26, Michal Hocko <mhocko@kernel.org>님이 작성:
> >
> > On Fri 17-07-20 16:46:38, Joonsoo Kim wrote:
> > > 2020년 7월 15일 (수) 오후 5:24, Michal Hocko <mhocko@kernel.org>님이 작성:
> > > >
> > > > On Wed 15-07-20 14:05:27, Joonsoo Kim wrote:
> > > > > From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
> > > > >
> > > > > We have well defined scope API to exclude CMA region.
> > > > > Use it rather than manipulating gfp_mask manually. With this change,
> > > > > we can now use __GFP_MOVABLE for gfp_mask and the ZONE_MOVABLE is also
> > > > > searched by page allocator. For hugetlb, gfp_mask is redefined since
> > > > > it has a regular allocation mask filter for migration target.
> > > > >
> > > > > Note that this can be considered as a fix for the commit 9a4e9f3b2d73
> > > > > ("mm: update get_user_pages_longterm to migrate pages allocated from
> > > > > CMA region"). However, "Fixes" tag isn't added here since it is just
> > > > > suboptimal but it doesn't cause any problem.
> > > >
> > > > But it is breaking the contract that the longterm pins never end up in a
> > > > cma managed memory. So I think Fixes tag is really due. I am not sure
> > > > about stable backport. If the patch was the trivial move of
> > >
> > > Previous implementation is correct since longterm pins never end up in a CMA
> > > managed memory with that implementation. It's just a different and suboptimal
> > > implementation to exclude the CMA area. This is why I don't add the "Fixes"A
> > > tag on the patch.
> >
> > But the current implementation calls memalloc_nocma_restore too early so
> > __gu_longterm_locked will migrate pages possibly to CMA ranges as there
> > is no GFP_MOVABLE restriction in place. Or am I missing something?
> 
> IIUC, calling memalloc_nocma_restore() too early doesn't cause the
> actual problem.
> 
> Final check is done by check_and_migrate_cma_pages() which is outside of
> scope API. If we find a CMA page in between the gup range here, the page is
> migrated to the migration target page and this target page is allocated by
> new_non_cma_page().
> 
> new_non_cma_page() try to allocate the page without __GFP_MOVABLE so
> returned page will not be CMA area memory. Therefore, even if
> memalloc_nocma_restore() is called early, there is no actual problem.

Right you are! I have misread gfp_t gfp_mask = GFP_USER | __GFP_NOWARN
and didn't realize that it doesn't include MOVABLE flag.

Sorry about the noise! The issue is only formal coding style so Fixes
tag could indeed cause more confusion than it would help.

Patch
diff mbox series

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 6b9508d..2660b04 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -708,6 +708,8 @@  static inline gfp_t htlb_modify_alloc_mask(struct hstate *h, gfp_t gfp_mask)
 	/* Some callers might want to enfoce node */
 	modified_mask |= (gfp_mask & __GFP_THISNODE);
 
+	modified_mask |= (gfp_mask & __GFP_NOWARN);
+
 	return modified_mask;
 }
 
diff --git a/mm/gup.c b/mm/gup.c
index 5daadae..bbd36a1 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1619,10 +1619,12 @@  static struct page *new_non_cma_page(struct page *page, unsigned long private)
 	 * Trying to allocate a page for migration. Ignore allocation
 	 * failure warnings. We don't force __GFP_THISNODE here because
 	 * this node here is the node where we have CMA reservation and
-	 * in some case these nodes will have really less non movable
+	 * in some case these nodes will have really less non CMA
 	 * allocation memory.
+	 *
+	 * Note that CMA region is prohibited by allocation scope.
 	 */
-	gfp_t gfp_mask = GFP_USER | __GFP_NOWARN;
+	gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_NOWARN;
 
 	if (PageHighMem(page))
 		gfp_mask |= __GFP_HIGHMEM;
@@ -1630,6 +1632,8 @@  static struct page *new_non_cma_page(struct page *page, unsigned long private)
 #ifdef CONFIG_HUGETLB_PAGE
 	if (PageHuge(page)) {
 		struct hstate *h = page_hstate(page);
+
+		gfp_mask = htlb_modify_alloc_mask(h, gfp_mask);
 		/*
 		 * We don't want to dequeue from the pool because pool pages will
 		 * mostly be from the CMA region.
@@ -1644,11 +1648,6 @@  static struct page *new_non_cma_page(struct page *page, unsigned long private)
 		 */
 		gfp_t thp_gfpmask = GFP_TRANSHUGE | __GFP_NOWARN;
 
-		/*
-		 * Remove the movable mask so that we don't allocate from
-		 * CMA area again.
-		 */
-		thp_gfpmask &= ~__GFP_MOVABLE;
 		thp = __alloc_pages_node(nid, thp_gfpmask, HPAGE_PMD_ORDER);
 		if (!thp)
 			return NULL;
@@ -1794,7 +1793,6 @@  static long __gup_longterm_locked(struct task_struct *tsk,
 				     vmas_tmp, NULL, gup_flags);
 
 	if (gup_flags & FOLL_LONGTERM) {
-		memalloc_nocma_restore(flags);
 		if (rc < 0)
 			goto out;
 
@@ -1807,9 +1805,10 @@  static long __gup_longterm_locked(struct task_struct *tsk,
 
 		rc = check_and_migrate_cma_pages(tsk, mm, start, rc, pages,
 						 vmas_tmp, gup_flags);
+out:
+		memalloc_nocma_restore(flags);
 	}
 
-out:
 	if (vmas_tmp != vmas)
 		kfree(vmas_tmp);
 	return rc;