diff mbox series

[v3,3/6] mm: apply per-task gfp constraints in fast path

Message ID 20201211202140.396852-4-pasha.tatashin@soleen.com (mailing list archive)
State New, archived
Headers show
Series prohibit pinning pages in ZONE_MOVABLE | expand

Commit Message

Pasha Tatashin Dec. 11, 2020, 8:21 p.m. UTC
Function current_gfp_context() is called after fast path. However, soon we
will add more constraints which will also limit zones based on context.
Move this call into fast path, and apply the correct constraints for all
allocations.

Also update .reclaim_idx based on value returned by current_gfp_context()
because it soon will modify the allowed zones.

Note:
With this patch we will do one extra current->flags load during fast path,
but we already load current->flags in fast-path:

__alloc_pages_nodemask()
 prepare_alloc_pages()
  current_alloc_flags(gfp_mask, *alloc_flags);

Later, when we add the zone constrain logic to current_gfp_context() we
will be able to remove current->flags load from current_alloc_flags, and
therefore return fast-path to the current performance level.

Suggested-by: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Pavel Tatashin <pasha.tatashin@soleen.com>
---
 mm/page_alloc.c | 15 ++++++++-------
 mm/vmscan.c     | 10 ++++++----
 2 files changed, 14 insertions(+), 11 deletions(-)

Comments

Michal Hocko Dec. 14, 2020, 2:09 p.m. UTC | #1
On Fri 11-12-20 15:21:37, Pavel Tatashin wrote:
> Function current_gfp_context() is called after fast path. However, soon we
> will add more constraints which will also limit zones based on context.
> Move this call into fast path, and apply the correct constraints for all
> allocations.
> 
> Also update .reclaim_idx based on value returned by current_gfp_context()
> because it soon will modify the allowed zones.
> 
> Note:
> With this patch we will do one extra current->flags load during fast path,
> but we already load current->flags in fast-path:
> 
> __alloc_pages_nodemask()
>  prepare_alloc_pages()
>   current_alloc_flags(gfp_mask, *alloc_flags);
> 
> Later, when we add the zone constrain logic to current_gfp_context() we
> will be able to remove current->flags load from current_alloc_flags, and
> therefore return fast-path to the current performance level.
> 
> Suggested-by: Michal Hocko <mhocko@kernel.org>
> Signed-off-by: Pavel Tatashin <pasha.tatashin@soleen.com>
> ---
>  mm/page_alloc.c | 15 ++++++++-------
>  mm/vmscan.c     | 10 ++++++----
>  2 files changed, 14 insertions(+), 11 deletions(-)
> 
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index ec05396a597b..c2dea9ad0e98 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -4976,6 +4976,13 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
>  	}
>  
>  	gfp_mask &= gfp_allowed_mask;
> +	/*
> +	 * Apply scoped allocation constraints. This is mainly about GFP_NOFS
> +	 * resp. GFP_NOIO which has to be inherited for all allocation requests
> +	 * from a particular context which has been marked by
> +	 * memalloc_no{fs,io}_{save,restore}.
> +	 */
> +	gfp_mask = current_gfp_context(gfp_mask);
>  	alloc_mask = gfp_mask;
>  	if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags))
>  		return NULL;
> @@ -4991,13 +4998,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
>  	if (likely(page))
>  		goto out;
>  
> -	/*
> -	 * Apply scoped allocation constraints. This is mainly about GFP_NOFS
> -	 * resp. GFP_NOIO which has to be inherited for all allocation requests
> -	 * from a particular context which has been marked by
> -	 * memalloc_no{fs,io}_{save,restore}.
> -	 */
> -	alloc_mask = current_gfp_context(gfp_mask);
> +	alloc_mask = gfp_mask;
>  	ac.spread_dirty_pages = false;
>  
>  	/*

Ack to this.

But I do not really understand this. All allocation contexts should have
a proper gfp mask so why do we have to call current_gfp_context here?
In fact moving the current_gfp_context in the allocator path should have
made all this games unnecessary. Memcg reclaim path might need some
careful check because gfp mask is used more creative there but the
general reclaim paths should be ok.

> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 469016222cdb..d9546f5897f4 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -3234,11 +3234,12 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
>  unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
>  				gfp_t gfp_mask, nodemask_t *nodemask)
>  {
> +	gfp_t current_gfp_mask = current_gfp_context(gfp_mask);
>  	unsigned long nr_reclaimed;
>  	struct scan_control sc = {
>  		.nr_to_reclaim = SWAP_CLUSTER_MAX,
> -		.gfp_mask = current_gfp_context(gfp_mask),
> -		.reclaim_idx = gfp_zone(gfp_mask),
> +		.gfp_mask = current_gfp_mask,
> +		.reclaim_idx = gfp_zone(current_gfp_mask),
>  		.order = order,
>  		.nodemask = nodemask,
>  		.priority = DEF_PRIORITY,
> @@ -4158,17 +4159,18 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
>  {
>  	/* Minimum pages needed in order to stay on node */
>  	const unsigned long nr_pages = 1 << order;
> +	gfp_t current_gfp_mask = current_gfp_context(gfp_mask);
>  	struct task_struct *p = current;
>  	unsigned int noreclaim_flag;
>  	struct scan_control sc = {
>  		.nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
> -		.gfp_mask = current_gfp_context(gfp_mask),
> +		.gfp_mask = current_gfp_mask,
>  		.order = order,
>  		.priority = NODE_RECLAIM_PRIORITY,
>  		.may_writepage = !!(node_reclaim_mode & RECLAIM_WRITE),
>  		.may_unmap = !!(node_reclaim_mode & RECLAIM_UNMAP),
>  		.may_swap = 1,
> -		.reclaim_idx = gfp_zone(gfp_mask),
> +		.reclaim_idx = gfp_zone(current_gfp_mask),

Again, why do we need this when the gfp_mask 
>  	};
>  
>  	trace_mm_vmscan_node_reclaim_begin(pgdat->node_id, order,
> -- 
> 2.25.1
Pasha Tatashin Dec. 15, 2020, 5:20 a.m. UTC | #2
> Ack to this.

Thank you.

>
> But I do not really understand this. All allocation contexts should have
> a proper gfp mask so why do we have to call current_gfp_context here?
> In fact moving the current_gfp_context in the allocator path should have
> made all this games unnecessary. Memcg reclaim path might need some
> careful check because gfp mask is used more creative there but the
> general reclaim paths should be ok.
>
> > diff --git a/mm/vmscan.c b/mm/vmscan.c
>
> Again, why do we need this when the gfp_mask
> >       };
> >
--

Hi Michal,

Beside from __alloc_pages_nodemask(), the current_gfp_context() is
called from the following six functions:

try_to_free_pages()
try_to_free_mem_cgroup_pages()
__node_reclaim()
__need_fs_reclaim()
alloc_contig_range()
pcpu_alloc()

As I understand, the idea is that because the allocator now honors
gfp_context values for all paths, the call can be removed from some of
the above functions. I think you are correct. But, at least from a
quick glance, this is not obvious, and is not the case for all of the
above functions.

For example:

alloc_contig_range()
  __alloc_contig_migrate_range
   isolate_migratepages_range
     isolate_migratepages_block
        /*
         * Only allow to migrate anonymous pages in GFP_NOFS context
         * because those do not depend on fs locks.
         */
       if (!(cc->gfp_mask & __GFP_FS) && page_mapping(page))
          goto isolate_fail;

If we remove current_gfp_context() from alloc_contig_range(), the
cc->gfp_mask will not be updated with proper __GFP_FS flag.
I have studied some other paths, and they are also convoluted.
Therefore, I am worried about performing this optimization in this
series.
Michal Hocko Dec. 15, 2020, 8:25 a.m. UTC | #3
On Tue 15-12-20 00:20:39, Pavel Tatashin wrote:
> > Ack to this.
> 
> Thank you.
> 
> >
> > But I do not really understand this. All allocation contexts should have
> > a proper gfp mask so why do we have to call current_gfp_context here?
> > In fact moving the current_gfp_context in the allocator path should have
> > made all this games unnecessary. Memcg reclaim path might need some
> > careful check because gfp mask is used more creative there but the
> > general reclaim paths should be ok.
> >
> > > diff --git a/mm/vmscan.c b/mm/vmscan.c
> >
> > Again, why do we need this when the gfp_mask
> > >       };
> > >
> --
> 
> Hi Michal,
> 
> Beside from __alloc_pages_nodemask(), the current_gfp_context() is
> called from the following six functions:
> 
> try_to_free_pages()
> try_to_free_mem_cgroup_pages()
> __node_reclaim()
> __need_fs_reclaim()
> alloc_contig_range()
> pcpu_alloc()
> 
> As I understand, the idea is that because the allocator now honors
> gfp_context values for all paths, the call can be removed from some of
> the above functions. I think you are correct. But, at least from a
> quick glance, this is not obvious, and is not the case for all of the
> above functions.
> 
> For example:
> 
> alloc_contig_range()
>   __alloc_contig_migrate_range
>    isolate_migratepages_range
>      isolate_migratepages_block
>         /*
>          * Only allow to migrate anonymous pages in GFP_NOFS context
>          * because those do not depend on fs locks.
>          */
>        if (!(cc->gfp_mask & __GFP_FS) && page_mapping(page))
>           goto isolate_fail;
> 
> If we remove current_gfp_context() from alloc_contig_range(), the
> cc->gfp_mask will not be updated with proper __GFP_FS flag.

I do not think I was proposing to drop current_gfp_context from
alloc_contig_range. ACR needs some work to be properly scoped gfp mask
aware. This should be addressed but I do not think think the code
works properly now so I wouldn't lose sleep over it in this series. At
least __alloc_contig_migrate_range should follow the gfp mask given to
alloc_contig_range.

> I have studied some other paths, and they are also convoluted.
> Therefore, I am worried about performing this optimization in this
> series.

Dropping current_gfp_context from the reclaim context should be done in
a separate patch. I didn't mean to push for this here. All I meant was
to simply not touch gfp/zone_idx in the reclaim path. The changelog
should call out that the page allocator always provides proper gfp mask.
Pasha Tatashin Dec. 15, 2020, 5:35 p.m. UTC | #4
On Tue, Dec 15, 2020 at 3:25 AM Michal Hocko <mhocko@suse.com> wrote:
>
> On Tue 15-12-20 00:20:39, Pavel Tatashin wrote:
> > > Ack to this.
> >
> > Thank you.
> >
> > >
> > > But I do not really understand this. All allocation contexts should have
> > > a proper gfp mask so why do we have to call current_gfp_context here?
> > > In fact moving the current_gfp_context in the allocator path should have
> > > made all this games unnecessary. Memcg reclaim path might need some
> > > careful check because gfp mask is used more creative there but the
> > > general reclaim paths should be ok.
> > >
> > > > diff --git a/mm/vmscan.c b/mm/vmscan.c
> > >
> > > Again, why do we need this when the gfp_mask
> > > >       };
> > > >
> > --
> >
> > Hi Michal,
> >
> > Beside from __alloc_pages_nodemask(), the current_gfp_context() is
> > called from the following six functions:
> >
> > try_to_free_pages()
> > try_to_free_mem_cgroup_pages()
> > __node_reclaim()
> > __need_fs_reclaim()
> > alloc_contig_range()
> > pcpu_alloc()
> >
> > As I understand, the idea is that because the allocator now honors
> > gfp_context values for all paths, the call can be removed from some of
> > the above functions. I think you are correct. But, at least from a
> > quick glance, this is not obvious, and is not the case for all of the
> > above functions.
> >
> > For example:
> >
> > alloc_contig_range()
> >   __alloc_contig_migrate_range
> >    isolate_migratepages_range
> >      isolate_migratepages_block
> >         /*
> >          * Only allow to migrate anonymous pages in GFP_NOFS context
> >          * because those do not depend on fs locks.
> >          */
> >        if (!(cc->gfp_mask & __GFP_FS) && page_mapping(page))
> >           goto isolate_fail;
> >
> > If we remove current_gfp_context() from alloc_contig_range(), the
> > cc->gfp_mask will not be updated with proper __GFP_FS flag.
>
> I do not think I was proposing to drop current_gfp_context from
> alloc_contig_range. ACR needs some work to be properly scoped gfp mask
> aware. This should be addressed but I do not think think the code
> works properly now so I wouldn't lose sleep over it in this series. At
> least __alloc_contig_migrate_range should follow the gfp mask given to
> alloc_contig_range.
>
> > I have studied some other paths, and they are also convoluted.
> > Therefore, I am worried about performing this optimization in this
> > series.
>
> Dropping current_gfp_context from the reclaim context should be done in
> a separate patch. I didn't mean to push for this here. All I meant was
> to simply not touch gfp/zone_idx in the reclaim path. The changelog
> should call out that the page allocator always provides proper gfp mask.

I see what you mean. I will remove reclaim changes, and will add a
note to changelog.

Thank you,
Pasha

>
> --
> Michal Hocko
> SUSE Labs
diff mbox series

Patch

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ec05396a597b..c2dea9ad0e98 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4976,6 +4976,13 @@  __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
 	}
 
 	gfp_mask &= gfp_allowed_mask;
+	/*
+	 * Apply scoped allocation constraints. This is mainly about GFP_NOFS
+	 * resp. GFP_NOIO which has to be inherited for all allocation requests
+	 * from a particular context which has been marked by
+	 * memalloc_no{fs,io}_{save,restore}.
+	 */
+	gfp_mask = current_gfp_context(gfp_mask);
 	alloc_mask = gfp_mask;
 	if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags))
 		return NULL;
@@ -4991,13 +4998,7 @@  __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
 	if (likely(page))
 		goto out;
 
-	/*
-	 * Apply scoped allocation constraints. This is mainly about GFP_NOFS
-	 * resp. GFP_NOIO which has to be inherited for all allocation requests
-	 * from a particular context which has been marked by
-	 * memalloc_no{fs,io}_{save,restore}.
-	 */
-	alloc_mask = current_gfp_context(gfp_mask);
+	alloc_mask = gfp_mask;
 	ac.spread_dirty_pages = false;
 
 	/*
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 469016222cdb..d9546f5897f4 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3234,11 +3234,12 @@  static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
 unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 				gfp_t gfp_mask, nodemask_t *nodemask)
 {
+	gfp_t current_gfp_mask = current_gfp_context(gfp_mask);
 	unsigned long nr_reclaimed;
 	struct scan_control sc = {
 		.nr_to_reclaim = SWAP_CLUSTER_MAX,
-		.gfp_mask = current_gfp_context(gfp_mask),
-		.reclaim_idx = gfp_zone(gfp_mask),
+		.gfp_mask = current_gfp_mask,
+		.reclaim_idx = gfp_zone(current_gfp_mask),
 		.order = order,
 		.nodemask = nodemask,
 		.priority = DEF_PRIORITY,
@@ -4158,17 +4159,18 @@  static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in
 {
 	/* Minimum pages needed in order to stay on node */
 	const unsigned long nr_pages = 1 << order;
+	gfp_t current_gfp_mask = current_gfp_context(gfp_mask);
 	struct task_struct *p = current;
 	unsigned int noreclaim_flag;
 	struct scan_control sc = {
 		.nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
-		.gfp_mask = current_gfp_context(gfp_mask),
+		.gfp_mask = current_gfp_mask,
 		.order = order,
 		.priority = NODE_RECLAIM_PRIORITY,
 		.may_writepage = !!(node_reclaim_mode & RECLAIM_WRITE),
 		.may_unmap = !!(node_reclaim_mode & RECLAIM_UNMAP),
 		.may_swap = 1,
-		.reclaim_idx = gfp_zone(gfp_mask),
+		.reclaim_idx = gfp_zone(current_gfp_mask),
 	};
 
 	trace_mm_vmscan_node_reclaim_begin(pgdat->node_id, order,